def PlotMultipleRuns(Alg, nruns=20, fname=None):
'''Plot "nruns" runs of a given algorithm to show performance
and variability across runs.'''
if fname:
runs = scipy.genfromtxt(fname)
else:
runs = []
for i in range(nruns):
bestSol, fitHistory = tsp.TSP(200, Alg, 3000, 30, seed=None,
coordfile='tmp.txt')
runs.append(fitHistory)
fname = 'MultRuns-' + str(Alg) + '.txt'
runs = scipy.array(runs)
scipy.savetxt(fname, runs)
# plotting
Xs = scipy.linspace(0, runs.shape[1] * 1000, runs.shape[1])
for i in range(runs.shape[0]):
pl.plot(Xs, runs[i, :])
pl.show()
python类linspace()的实例源码
def LocalCC(in_signal1, in_signal2, samp_rate, max_time_lag, zero_time,
sigma=None):
n_lags = 2 * int(max_time_lag * samp_rate)
cc_time_lags = sp.linspace(-n_lags/2/samp_rate,
n_lags/2/samp_rate,
n_lags, endpoint=True)
cc = local_CCr(in_signal1, in_signal2,
max_time_lag, samp_rate, sigma)
lag, time = np.unravel_index(cc.argmax(), cc.shape)
time = zero_time + time/samp_rate
lag = lag/samp_rate - max_time_lag
arrival1 = time - lag/2.
arrival2 = time + lag/2.
return cc_time_lags, cc, arrival1, arrival2
def plot_pdf_fit(self, label=None):
import matplotlib.pyplot as plt
from scipy.stats import exponweib, rayleigh
from scipy import linspace, diff
plt.bar(self.pdf[1][:len(self.pdf[0])], self.pdf[0], width=diff(self.pdf[1]), label=label, alpha=0.5, color='k')
x = linspace(0, 50, 1000)
plt.plot(x, exponweib.pdf(x, a=self.weibull_params[0], c=self.weibull_params[1], scale=self.weibull_params[3]),
'b--', label='Exponential Weibull pdf')
plt.plot(x, rayleigh.pdf(x, scale=self.rayleigh_params[1]), 'r--', label='Rayleigh pdf')
plt.title('Normalized distribution of wind speeds')
plt.grid()
plt.legend()
def bowtie_py(modis_img,p,cs):
print 'removie bowtie effect from image... '
stripwidth=10000/cs
#Loop over every x coordinate of the image
for x in sp.arange(modis_img.shape[1]):
#Loop over every sanning strip
overlap=sp.polyval(p,x).round() #get the overlap from the polynom
if overlap > 0:
for y in sp.arange(stripwidth,modis_img.shape[0],stripwidth):
#cut out the current part:
strippart=modis_img[y:y+stripwidth,x]
#delete the upper and lower few pixels of the strippart:
strippart=strippart[int(overlap/2.):-int(round(overlap/2.))]
#Interpolat to stipwidth length
f=interp1d(sp.arange(0,len(strippart)),strippart)
strippart=f(sp.linspace(0,len(strippart)-1,stripwidth))
#replace the current sick part in the image by the new healthy one
modis_img[y:y+stripwidth,x]=strippart
print 'done'
return modis_img
analyze_webstats.py 文件源码
项目:Building-Machine-Learning-Systems-With-Python-Second-Edition
作者: PacktPublishing
项目源码
文件源码
阅读 37
收藏 0
点赞 0
评论 0
def plot_models(x, y, models, fname, mx=None, ymax=None, xmin=None):
plt.figure(num=None, figsize=(8, 6))
plt.clf()
plt.scatter(x, y, s=10)
plt.title("Web traffic over the last month")
plt.xlabel("Time")
plt.ylabel("Hits/hour")
plt.xticks(
[w * 7 * 24 for w in range(10)], ['week %i' % w for w in range(10)])
if models:
if mx is None:
mx = sp.linspace(0, x[-1], 1000)
for model, style, color in zip(models, linestyles, colors):
# print "Model:",model
# print "Coeffs:",model.coeffs
plt.plot(mx, model(mx), linestyle=style, linewidth=2, c=color)
plt.legend(["d=%i" % m.order for m in models], loc="upper left")
plt.autoscale(tight=True)
plt.ylim(ymin=0)
if ymax:
plt.ylim(ymax=ymax)
if xmin:
plt.xlim(xmin=xmin)
plt.grid(True, linestyle='-', color='0.75')
plt.savefig(fname)
# first look at the data
def local_CC(sig1, sig2, t_lag, fs):
"""
Calculation of Local-CC after Hale 2006.
Output = H3 - non-smoothed LCC
C - smoothed LCC (after Gussian smoothing)
t_lag - array of t_lag times
"""
l_max = int(t_lag*fs)
h3 = np.zeros((2*l_max, len(sig1)), sig1.dtype)
c = np.zeros((2*l_max, len(sig1)), sig1.dtype)
##
t_lag = sp.linspace(-l_max/fs, l_max/fs, 2*l_max, endpoint=False)
##
##
for l in xrange(-l_max, l_max):
l_f = int(floor(l/2.))
l_g = int(ceil(l/2.))
h3[l+l_max] = (__shift2(sig1, l_f) * __shift2(sig2, -l_g) +
__shift2(sig1, l_g) * __shift2(sig2, -l_f))/2
c[l+l_max] = Gaussian1D(h3[l+l_max], 10, padding=0)
#------------------not sure which formula for h3 is correct--------------------
# h3[l+l_max]=(shift2(sig1,-l_f)*shift2(sig2,l_g) +\
# shift2(sig1,-l_g)*shift2(sig2,l_f))/2
#
return c, h3, t_lag
def plot_pdf_fit(self, label=None):
import matplotlib.pyplot as plt
from scipy.stats import exponweib, rayleigh
from scipy import linspace, diff
plt.bar(self.pdf[1][:len(self.pdf[0])], self.pdf[0], width=diff(self.pdf[1]), label=label, alpha=0.5, color='k')
x = linspace(0, 50, 1000)
plt.plot(x, exponweib.pdf(x, a=self.weibull_params[0], c=self.weibull_params[1], scale=self.weibull_params[3]), 'b--', label='Exponential Weibull pdf')
plt.plot(x, rayleigh.pdf(x, scale=self.rayleigh_params[1]), 'r--', label='Rayleigh pdf')
plt.title('Normalized distribution of wind speeds')
plt.grid()
plt.legend()
def transform_3d(self, X):
X_resampled = sp.zeros((X.shape[0], self.n_samples, X.shape[2]))
xnew = sp.linspace(0, 1, self.n_samples)
for i in range(X.shape[0]):
end = last_index(X[i])
for j in range(X.shape[2]):
X_resampled[i, :, j] = resampled(X[i, :end, j], n_samples=self.n_samples, kind=self.interp_kind)
# Compute indices based on alignment of dimension self.scaling_col_idx with the reference
indices_xy = [[] for _ in range(self.n_samples)]
if self.save_path and len(DTWSampler.saved_dtw_path)==(self.d+1): # verify if full dtw path already exists
current_path = DTWSampler.saved_dtw_path[i]
else:
# append path
current_path = dtw_path(X_resampled[i, :, self.scaling_col_idx], self.reference_series)
if self.save_path: # save current path is asked
DTWSampler.saved_dtw_path.append(current_path)
for t_current, t_ref in current_path:
indices_xy[t_ref].append(t_current)
for j in range(X.shape[2]):
if False and j == self.scaling_col_idx:
X_resampled[i, :, j] = xnew
else:
ynew = sp.array([sp.mean(X_resampled[i, indices, j]) for indices in indices_xy])
X_resampled[i, :, j] = ynew
return X_resampled
def resampled(X, n_samples=100, kind="linear"):
xnew = sp.linspace(0, 1, n_samples)
f = interp1d(sp.linspace(0, 1, X.shape[0]), X, kind=kind)
return f(xnew)
def plot_price(smoothed_prices):
plot_over_map(10**(smoothed_prices - 3), norm=LogNorm(1.5e2, 1e3))
cb = plt.colorbar(fraction=0.03, ticks=sp.linspace(2e2, 1e3, 9), format=FormatStrFormatter(u'£%dk'))
cb.set_label(u'price paid (£1000s)')
plt.title('2015 Average Price Paid')
plt.gcf().set_size_inches(36, 36)
plt.gcf().savefig(os.path.join(OUTPUT_PATH, 'price_paid.png'), bbox_inches='tight')
def plot_time(walking_time):
plot_over_map(walking_time, vmin=15, vmax=75)
cb = plt.colorbar(fraction=0.03, ticks=sp.linspace(15, 75, 5))
cb.set_label('commute time (mins)')
plt.title('Commute time to Green Park')
plt.gcf().set_size_inches(36, 36)
plt.gcf().savefig(os.path.join(OUTPUT_PATH, 'travel_time.png'), bbox_inches='tight')
def plot_relative_price(relative_prices):
plot_over_map(10**relative_prices, norm=LogNorm(0.5, 2))
cb = plt.colorbar(fraction=0.03, ticks=sp.linspace(0.5, 2, 4), format=FormatStrFormatter('x%.2f'))
cb.set_label('fraction of average price paid for commute time')
plt.title('Price relative to commute')
plt.gcf().set_size_inches(36, 36)
plt.gcf().savefig(os.path.join(OUTPUT_PATH, 'relative_price.png'), bbox_inches='tight')
def test_continuous_non_gaussian(db_path, sampler):
def model(args):
return {"result": sp.rand() * args['u']}
models = [model]
models = list(map(SimpleModel, models))
population_size = ConstantPopulationSize(250)
parameter_given_model_prior_distribution = [Distribution(u=RV("uniform", 0,
1))]
abc = ABCSMC(models, parameter_given_model_prior_distribution,
MinMaxDistanceFunction(measures_to_use=["result"]),
population_size,
eps=MedianEpsilon(.2),
sampler=sampler)
d_observed = .5
abc.new(db_path, {"result": d_observed})
abc.do_not_stop_when_only_single_model_alive()
minimum_epsilon = -1
history = abc.run(minimum_epsilon, max_nr_populations=2)
posterior_x, posterior_weight = history.get_distribution(0, None)
posterior_x = posterior_x["u"].as_matrix()
sort_indices = sp.argsort(posterior_x)
f_empirical = sp.interpolate.interp1d(sp.hstack((-200,
posterior_x[sort_indices],
200)),
sp.hstack((0,
sp.cumsum(
posterior_weight[
sort_indices]),
1)))
@sp.vectorize
def f_expected(u):
return (sp.log(u)-sp.log(d_observed)) / (- sp.log(d_observed)) * \
(u > d_observed)
x = sp.linspace(0.1, 1)
max_distribution_difference = sp.absolute(f_empirical(x) -
f_expected(x)).max()
assert max_distribution_difference < 0.12
def test_gaussian_multiple_populations(db_path, sampler):
sigma_x = 1
sigma_y = .5
y_observed = 2
def model(args):
return {"y": st.norm(args['x'], sigma_y).rvs()}
models = [model]
models = list(map(SimpleModel, models))
nr_populations = 4
population_size = ConstantPopulationSize(600)
parameter_given_model_prior_distribution = [Distribution(x=st.norm(0, sigma_x))]
abc = ABCSMC(models, parameter_given_model_prior_distribution,
MinMaxDistanceFunction(measures_to_use=["y"]),
population_size,
eps=MedianEpsilon(.2),
sampler=sampler)
abc.new(db_path, {"y": y_observed})
minimum_epsilon = -1
abc.do_not_stop_when_only_single_model_alive()
history = abc.run(minimum_epsilon, max_nr_populations=nr_populations)
posterior_x, posterior_weight = history.get_distribution(0, None)
posterior_x = posterior_x["x"].as_matrix()
sort_indices = sp.argsort(posterior_x)
f_empirical = sp.interpolate.interp1d(sp.hstack((-200, posterior_x[sort_indices], 200)),
sp.hstack((0, sp.cumsum(posterior_weight[sort_indices]), 1)))
sigma_x_given_y = 1 / sp.sqrt(1 / sigma_x**2 + 1 / sigma_y**2)
mu_x_given_y = sigma_x_given_y**2 * y_observed / sigma_y**2
expected_posterior_x = st.norm(mu_x_given_y, sigma_x_given_y)
x = sp.linspace(-8, 8)
max_distribution_difference = sp.absolute(f_empirical(x) - expected_posterior_x.cdf(x)).max()
assert max_distribution_difference < 0.052
assert history.max_t == nr_populations-1
mean_emp, std_emp = mean_and_std(posterior_x, posterior_weight)
assert abs(mean_emp - mu_x_given_y) < .07
assert abs(std_emp - sigma_x_given_y) < .12
def test_gaussian_multiple_populations_adpative_population_size(db_path, sampler):
sigma_x = 1
sigma_y = .5
y_observed = 2
def model(args):
return {"y": st.norm(args['x'], sigma_y).rvs()}
models = [model]
models = list(map(SimpleModel, models))
nr_populations = 4
population_size = AdaptivePopulationSize(600)
parameter_given_model_prior_distribution = [
Distribution(x=st.norm(0, sigma_x))]
abc = ABCSMC(models, parameter_given_model_prior_distribution,
MinMaxDistanceFunction(measures_to_use=["y"]),
population_size,
eps=MedianEpsilon(.2),
sampler=sampler)
abc.new(db_path, {"y": y_observed})
minimum_epsilon = -1
abc.do_not_stop_when_only_single_model_alive()
history = abc.run(minimum_epsilon, max_nr_populations=nr_populations)
posterior_x, posterior_weight = history.get_distribution(0, None)
posterior_x = posterior_x["x"].as_matrix()
sort_indices = sp.argsort(posterior_x)
f_empirical = sp.interpolate.interp1d(sp.hstack((-200, posterior_x[sort_indices], 200)),
sp.hstack((0, sp.cumsum(posterior_weight[sort_indices]), 1)))
sigma_x_given_y = 1 / sp.sqrt(1 / sigma_x ** 2 + 1 / sigma_y ** 2)
mu_x_given_y = sigma_x_given_y ** 2 * y_observed / sigma_y ** 2
expected_posterior_x = st.norm(mu_x_given_y, sigma_x_given_y)
x = sp.linspace(-8, 8)
max_distribution_difference = sp.absolute(f_empirical(x) - expected_posterior_x.cdf(x)).max()
assert max_distribution_difference < 0.15
assert history.max_t == nr_populations - 1
mean_emp, std_emp = mean_and_std(posterior_x, posterior_weight)
assert abs(mean_emp - mu_x_given_y) < .07
assert abs(std_emp - sigma_x_given_y) < .12
def findPhi_s(self, beta):
# E_z0_of_s
z1z2 = -2*self.halfnbrofoscillations*self.L/3 # /3 since A = 0 and B =/= 0
#print "z1z2: " + str(z1z2)
z4z5 = 2*self.halfnbrofoscillations*self.L/3 # /3 since A = 0 and B =/= 0
#print "z4z5: " + str(z4z5)
Ithatgoeswithphi_s = inf
for i in linspace(0,2*constants.pi,30):
## Integral
# -inf to z1|z2
I1 = quad(lambda s: self.amplitudeB*exp(((s+z1z2)/self.sigma)**self.p)*sin(2*constants.pi/(beta*self.rf_lambda)*s - i),-inf,z1z2)
#print "I1: " + str(I1)
# z2 to z4||z5
I2 = quad(lambda s: (self.amplitudeA*cos(constants.pi*s/self.L)+self.amplitudeB*cos(3*constants.pi*s/self.L))*sin(2*constants.pi/(beta*self.rf_lambda)*s - i),z1z2,z4z5)
#print "I2: " + str(I2)
# z5 to inf
I3 = quad(lambda s: self.amplitudeB*exp((-(s-z4z5)/self.sigma)**self.p)*sin(2*constants.pi/(beta*self.rf_lambda)*s - i),z4z5,inf)
#print "I3: " + str(I3)
# sum up
res = I1[0]+I2[0]+I3[0]
if abs(res) < Ithatgoeswithphi_s:
phi_s = i
Ithatgoeswithphi_s = abs(res)
print "Ithatgoeswithphi_s: " + str(Ithatgoeswithphi_s)
return phi_s
def outlier_removed_fit(m, w = None, n_iter=10, polyord=7):
"""
Remove outliers using fited data.
Args:
m (:obj:`numpy array`): Phase curve.
n_iter (:obj:'int'): Number of iteration outlier removal
polyorder (:obj:'int'): Order of polynomial used.
Returns:
fit (:obj:'numpy array'): Curve with outliers removed
"""
if w is None:
w = sp.ones_like(m)
W = sp.diag(sp.sqrt(w))
m2 = sp.copy(m)
tv = sp.linspace(-1, 1, num=len(m))
A = sp.zeros([len(m), polyord])
for j in range(polyord):
A[:, j] = tv**(float(j))
A2 = sp.dot(W,A)
m2w = sp.dot(m2,W)
fit = None
for i in range(n_iter):
xhat = sp.linalg.lstsq(A2, m2w)[0]
fit = sp.dot(A, xhat)
# use gradient for central finite differences which keeps order
resid = sp.gradient(fit - m2)
std = sp.std(resid)
bidx = sp.where(sp.absolute(resid) > 2.0*std)[0]
for bi in bidx:
A2[bi,:]=0.0
m2[bi]=0.0
m2w[bi]=0.0
if debug_plot:
plt.plot(m2,label="outlier removed")
plt.plot(m,label="original")
plt.plot(fit,label="fit")
plt.legend()
plt.ylim([sp.minimum(fit)-std*3.0,sp.maximum(fit)+std*3.0])
plt.show()
return(fit)
def test_gaussian_multiple_populations_crossval_kde(db_path, sampler):
sigma_x = 1
sigma_y = .5
y_observed = 2
def model(args):
return {"y": st.norm(args['x'], sigma_y).rvs()}
models = [model]
models = list(map(SimpleModel, models))
nr_populations = 4
population_size = ConstantPopulationSize(600)
parameter_given_model_prior_distribution = [Distribution(x=st.norm(0, sigma_x))]
parameter_perturbation_kernels = [GridSearchCV(MultivariateNormalTransition(),
{"scaling": sp.logspace(-1, 1.5, 5)})]
abc = ABCSMC(models, parameter_given_model_prior_distribution,
MinMaxDistanceFunction(measures_to_use=["y"]),
population_size,
transitions=parameter_perturbation_kernels,
eps=MedianEpsilon(.2),
sampler=sampler)
abc.new(db_path, {"y": y_observed})
minimum_epsilon = -1
abc.do_not_stop_when_only_single_model_alive()
history = abc.run(minimum_epsilon, max_nr_populations=nr_populations)
posterior_x, posterior_weight = history.get_distribution(0, None)
posterior_x = posterior_x["x"].as_matrix()
sort_indices = sp.argsort(posterior_x)
f_empirical = sp.interpolate.interp1d(sp.hstack((-200, posterior_x[sort_indices], 200)),
sp.hstack((0, sp.cumsum(posterior_weight[sort_indices]), 1)))
sigma_x_given_y = 1 / sp.sqrt(1 / sigma_x**2 + 1 / sigma_y**2)
mu_x_given_y = sigma_x_given_y**2 * y_observed / sigma_y**2
expected_posterior_x = st.norm(mu_x_given_y, sigma_x_given_y)
x = sp.linspace(-8, 8)
max_distribution_difference = sp.absolute(f_empirical(x) - expected_posterior_x.cdf(x)).max()
assert max_distribution_difference < 0.052
assert history.max_t == nr_populations-1
mean_emp, std_emp = mean_and_std(posterior_x, posterior_weight)
assert abs(mean_emp - mu_x_given_y) < .07
assert abs(std_emp - sigma_x_given_y) < .12
def test_gaussian_single_population(db_path, sampler):
sigma_prior = 1
sigma_ground_truth = 1
observed_data = 1
def model(args):
return {"y": st.norm(args['x'], sigma_ground_truth).rvs()}
models = [model]
models = list(map(SimpleModel, models))
nr_populations = 1
population_size = ConstantPopulationSize(600)
parameter_given_model_prior_distribution = [Distribution(x=RV("norm", 0,
sigma_prior))
]
abc = ABCSMC(models, parameter_given_model_prior_distribution,
MinMaxDistanceFunction(measures_to_use=["y"]),
population_size,
eps=MedianEpsilon(.1),
sampler=sampler)
abc.new(db_path, {"y": observed_data})
minimum_epsilon = -1
abc.do_not_stop_when_only_single_model_alive()
history = abc.run(minimum_epsilon, max_nr_populations=nr_populations)
posterior_x, posterior_weight = history.get_distribution(0, None)
posterior_x = posterior_x["x"].as_matrix()
sort_indices = sp.argsort(posterior_x)
f_empirical = sp.interpolate.interp1d(sp.hstack((-200, posterior_x[sort_indices], 200)),
sp.hstack((0, sp.cumsum(posterior_weight[sort_indices]), 1)))
sigma_x_given_y = 1 / sp.sqrt(1 / sigma_prior**2 + 1 / sigma_ground_truth**2)
mu_x_given_y = sigma_x_given_y**2 * observed_data / sigma_ground_truth**2
expected_posterior_x = st.norm(mu_x_given_y, sigma_x_given_y)
x = sp.linspace(-8, 8)
max_distribution_difference = sp.absolute(f_empirical(x) - expected_posterior_x.cdf(x)).max()
assert max_distribution_difference < 0.12
assert history.max_t == nr_populations-1
mean_emp, std_emp = mean_and_std(posterior_x, posterior_weight)
assert abs(mean_emp - mu_x_given_y) < .07
assert abs(std_emp - sigma_x_given_y) < .1