def freq_from_HPS(sig, fs):
"""
Estimate frequency using harmonic product spectrum (HPS)
"""
windowed = sig * blackmanharris(len(sig))
from pylab import subplot, plot, log, copy, show
# harmonic product spectrum:
c = abs(rfft(windowed))
maxharms = 3
#subplot(maxharms, 1, 1)
#plot(log(c))
for x in range(2, maxharms):
a = copy(c[::x]) # Should average or maximum instead of decimating
# max(c[::x],c[1::x],c[2::x],...)
c = c[:len(a)]
i = argmax(abs(c))
true_i = parabolic(abs(c), i)[0]
print 'Pass %d: %f Hz' % (x, fs * true_i / len(windowed))
c *= a
#subplot(maxharms, 1, x)
#plot(log(c))
#show()
python类plot()的实例源码
def view_trigger_snippets_bis(trigger_snippets, elec_index, save=None):
fig = pylab.figure()
ax = fig.add_subplot(1, 1, 1)
for n in xrange(0, trigger_snippets.shape[2]):
y = trigger_snippets[:, elec_index, n]
x = numpy.arange(- (y.size - 1) / 2, (y.size - 1) / 2 + 1)
b = 0.5 + 0.5 * numpy.random.rand()
ax.plot(x, y, color=(0.0, 0.0, b), linestyle='solid')
ax.grid(True)
ax.set_xlim([numpy.amin(x), numpy.amax(x)])
ax.set_xlabel("time")
ax.set_ylabel("amplitude")
if save is None:
pylab.show()
else:
pylab.savefig(save)
pylab.close(fig)
return
def display_results_figure(results, METRIC):
import pylab as pb
color = iter(pb.cm.rainbow(np.linspace(0, 1, len(results))))
plots = []
for method in results.keys():
x = []
y = []
for train_perc in sorted(results[method].keys()):
x.append(train_perc)
y.append(results[method][train_perc][0])
c = next(color)
(pi, ) = pb.plot(x, y, color=c)
plots.append(pi)
from matplotlib.font_manager import FontProperties
fontP = FontProperties()
fontP.set_size('small')
pb.legend(plots, map(method_name_mapper, results.keys()),
prop=fontP, bbox_to_anchor=(0.6, .65))
pb.xlabel('#Tweets from target rumour for training')
pb.ylabel('Accuracy')
pb.title(METRIC.__name__)
pb.savefig('incrementing_training_size.png')
two_sigma_financial_modelling.py 文件源码
项目:PortfolioTimeSeriesAnalysis
作者: MizioAnd
项目源码
文件源码
阅读 30
收藏 0
点赞 0
评论 0
def portfolio_timestamp_period_with_most_highly_corr_assets(self, df):
# A first approximation to model portfolio returns:
# i) Find assets that correlates with y, where correlation is higher than a threshold value
# ii) Include only above assets and find maximum timestamp period with most assets
# iii) Transform target value y to be cumulative mean of y in order to obtain monotonic behaviour
# iv) Train model to predict transformed target value with the selected most correlated assets in selected
# timestamp interval
# v) Run model on test data and apply inverse transform to get target value y.
# From plot it looks like a lot of assets are bought and sold at first and last timestamp.
# We should of course primarily select assets based on how much they are correlated with y
correlation_coeffecients = self.correlation_coeffecients
names_of_assets = correlation_coeffecients.loc[correlation_coeffecients.index != 'y'].sort_values(
ascending=False).head(self.number_of_assets_in_portfolio).index
# Todo: make a check if any intermediate sales assets are among the most corr with y
return df.loc[:, names_of_assets]
two_sigma_financial_modelling.py 文件源码
项目:PortfolioTimeSeriesAnalysis
作者: MizioAnd
项目源码
文件源码
阅读 22
收藏 0
点赞 0
评论 0
def predicted_vs_actual_y_xgb(self, xgb, best_nrounds, xgb_params, x_train_split, x_test_split, y_train_split,
y_test_split, title_name):
# Split the training data into an extra set of test
# x_train_split, x_test_split, y_train_split, y_test_split = train_test_split(x_train, y_train)
dtrain_split = xgb.DMatrix(x_train_split, label=y_train_split)
dtest_split = xgb.DMatrix(x_test_split)
print(np.shape(x_train_split), np.shape(x_test_split), np.shape(y_train_split), np.shape(y_test_split))
gbdt = xgb.train(xgb_params, dtrain_split, best_nrounds)
y_predicted = gbdt.predict(dtest_split)
plt.figure(figsize=(10, 5))
plt.scatter(y_test_split, y_predicted, s=20)
rmse_pred_vs_actual = self.rmse(y_predicted, y_test_split)
plt.title(''.join([title_name, ', Predicted vs. Actual.', ' rmse = ', str(rmse_pred_vs_actual)]))
plt.xlabel('Actual y')
plt.ylabel('Predicted y')
plt.plot([min(y_test_split), max(y_test_split)], [min(y_test_split), max(y_test_split)])
plt.tight_layout()
def display_wav(filename):
input_data = read(filename)
audio_in = input_data[1]
samples = len(audio_in)
fig = pylab.figure();
print samples/44100.0," seconds"
k = 0
plot_data_out = []
for i in xrange(samples):
plot_data_out.append(audio_in[k]/32768.0)
k = k+1
pdata = numpy.array(plot_data_out, dtype=numpy.float)
pylab.plot(pdata)
pylab.grid(True)
pylab.ion()
pylab.show()
def display_pr_curve(precision, recall):
# following examples from sklearn
# TODO: f1 operating point
import pylab as plt
# Plot Precision-Recall curve
plt.clf()
plt.plot(recall, precision, label='Precision-Recall curve')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.ylim([0.0, 1.05])
plt.xlim([0.0, 1.0])
plt.title('Precision-Recall example: Max f1={0:0.2f}'.format(max_f1))
plt.legend(loc="lower left")
plt.show()
def retrieve_bs(coeff_file, bs, ibands, cbm):
# sp=bs.bands.keys()[0]
engre, nwave, nsym, nstv, vec, vec2, out_vec2, br_dir = get_energy_args(coeff_file, ibands)
#you can use a for loop along a certain list of k-points.
for i, iband in enumerate(ibands):
en = []
sym_line_kpoints = [k.frac_coords for k in bs.kpoints]
for kpt in sym_line_kpoints:
e, v, m = get_energy(kpt, engre[i], nwave, nsym, nstv, vec, vec2=vec2, out_vec2=out_vec2, br_dir=br_dir, cbm=cbm)
en.append(e*13.605)
# plot(np.array(bs.bands[sp])[iband-1,:].T-bs.efermi) # from MP
# plot(np.array(bs.bands[sp])[iband-2,:].T-bs.efermi) # from MP
# plot(np.array(bs.bands[sp])[iband-3,:].T-bs.efermi) # from MP
plot(en, color='b') # interpolated by BoltzTraP
show()
def edgescatter(self, ps):
for ei,X in enumerate(self.edges):
i,j = X[:2]
matchdRA, matchdDec = X[10:12]
mu = X[9]
A = self.alignments[ei]
plt.clf()
if len(matchdRA) > 1000:
plothist(matchdRA, matchdDec, 101)
else:
plt.plot(matchdRA, matchdDec, 'k.', alpha=0.5)
plt.axvline(0, color='0.5')
plt.axhline(0, color='0.5')
plt.axvline(mu[0], color='b')
plt.axhline(mu[1], color='b')
for nsig in [1,2]:
X,Y = A.getContours(nsigma=nsig)
plt.plot(X, Y, 'b-')
plt.xlabel('delta-RA (arcsec)')
plt.ylabel('delta-Dec (arcsec)')
plt.axis('scaled')
ps.savefig()
def plotaffine(aff, RR, DD, exag=1000, affineOnly=False, doclf=True, **kwargs):
import pylab as plt
if doclf:
plt.clf()
if affineOnly:
dr,dd = aff.getAffineOffset(RR, DD)
else:
rr,dd = aff.apply(RR, DD)
dr = rr - RR
dd = dd - DD
#plt.plot(RR, DD, 'r.')
#plt.plot(RR + dr*exag, DD + dd*exag, 'bx')
plt.quiver(RR, DD, exag*dr, exag*dd,
angles='xy', scale_units='xy', scale=1,
pivot='middle', color='b', **kwargs)
#pivot='tail'
ax = plt.axis()
plt.plot([aff.getReferenceRa()], [aff.getReferenceDec()], 'r+', mew=2, ms=5)
plt.axis(ax)
esuf = ''
if exag != 1.:
esuf = ' (x %g)' % exag
plt.title('Affine transformation found' + esuf)
def PlotMultipleRuns(Alg, nruns=20, fname=None):
'''Plot "nruns" runs of a given algorithm to show performance
and variability across runs.'''
if fname:
runs = scipy.genfromtxt(fname)
else:
runs = []
for i in range(nruns):
bestSol, fitHistory = tsp.TSP(200, Alg, 3000, 30, seed=None,
coordfile='tmp.txt')
runs.append(fitHistory)
fname = 'MultRuns-' + str(Alg) + '.txt'
runs = scipy.array(runs)
scipy.savetxt(fname, runs)
# plotting
Xs = scipy.linspace(0, runs.shape[1] * 1000, runs.shape[1])
for i in range(runs.shape[0]):
pl.plot(Xs, runs[i, :])
pl.show()
def LongMC3(fname=None):
'''Plot a single long MC3 run to demonstrate high performance
but slow convergence.'''
if fname:
run = scipy.genfromtxt(fname)
else:
bestSol, run = tsp.TSP(200, 'MC3', 20000, 10, seed=None,
coordfile='tmp.txt')
fname = 'ExampleOutput/MC3-Long.txt'
run = scipy.array(run)
scipy.savetxt(fname, run)
# plotting
Xs = range(0, run.shape[0] * 1000, 1000)
pl.plot(Xs, run)
pl.show()
def LongSA(fname=None):
'''Plot a single long SA run to demonstrate performance under slower
cooling schedule.'''
if fname:
run = scipy.genfromtxt(fname)
else:
bestSol, run = tsp.TSP(200, 'SA', 20000, 'placeholder', seed=None,
coordfile='tmp.txt')
fname = 'ExampleOutput/SA-Long.txt'
run = scipy.array(run)
scipy.savetxt(fname, run)
# plotting
Xs = range(0, run.shape[0] * 1000, 1000)
pl.plot(Xs, run)
pl.show()
def plot_rectified(self):
import pylab
pylab.title('rectified')
pylab.imshow(self.rectified)
for line in self.vlines:
p0, p1 = line
p0 = self.inv_transform(p0)
p1 = self.inv_transform(p1)
pylab.plot((p0[0], p1[0]), (p0[1], p1[1]), c='green')
for line in self.hlines:
p0, p1 = line
p0 = self.inv_transform(p0)
p1 = self.inv_transform(p1)
pylab.plot((p0[0], p1[0]), (p0[1], p1[1]), c='red')
pylab.axis('image');
pylab.grid(c='yellow', lw=1)
pylab.plt.yticks(np.arange(0, self.l, 100.0));
pylab.xlim(0, self.w)
pylab.ylim(self.l, 0)
def plot_original(self):
import pylab
pylab.title('original')
pylab.imshow(self.data)
for line in self.lines:
p0, p1 = line
pylab.plot((p0[0], p1[0]), (p0[1], p1[1]), c='blue', alpha=0.3)
for line in self.vlines:
p0, p1 = line
pylab.plot((p0[0], p1[0]), (p0[1], p1[1]), c='green')
for line in self.hlines:
p0, p1 = line
pylab.plot((p0[0], p1[0]), (p0[1], p1[1]), c='red')
pylab.axis('image');
pylab.grid(c='yellow', lw=1)
pylab.plt.yticks(np.arange(0, self.l, 100.0));
pylab.xlim(0, self.w)
pylab.ylim(self.l, 0)
def _plot_background(self, bgimage):
import pylab as pl
# Show the portion of the image behind this facade
left, right = self.facade_left, self.facade_right
top, bottom = 0, self.mega_facade.rectified.shape[0]
if bgimage is not None:
pl.imshow(bgimage[top:bottom, left:right], extent=(left, right, bottom, top))
else:
# Fit the facade in the plot
y0, y1 = pl.ylim()
x0, x1 = pl.xlim()
x0 = min(x0, left)
x1 = max(x1, right)
y0 = min(y0, top)
y1 = max(y1, bottom)
pl.xlim(x0, x1)
pl.ylim(y1, y0)
def PlotProps(pars):
import numpy as np
import pylab as pl
import vanGenuchten as vg
psi = np.linspace(-10, 2, 200)
pl.figure
pl.subplot(3, 1, 1)
pl.plot(psi, vg.thetaFun(psi, pars))
pl.ylabel(r'$\theta(\psi) [-]$')
pl.subplot(3, 1, 2)
pl.plot(psi, vg.CFun(psi, pars))
pl.ylabel(r'$C(\psi) [1/m]$')
pl.subplot(3, 1, 3)
pl.plot(psi, vg.KFun(psi, pars))
pl.xlabel(r'$\psi [m]$')
pl.ylabel(r'$K(\psi) [m/d]$')
# pl.show()
def plot_evaluation_episode_reward():
pylab.clf()
sns.set_context("poster")
pylab.plot(0, 0)
episodes = [0]
average_scores = [0]
median_scores = [0]
for n in xrange(len(csv_evaluation)):
params = csv_evaluation[n]
episodes.append(params[0])
average_scores.append(params[1])
median_scores.append(params[2])
pylab.plot(episodes, average_scores, sns.xkcd_rgb["windows blue"], lw=2)
pylab.xlabel("episodes")
pylab.ylabel("average score")
pylab.savefig("%s/evaluation_episode_average_reward.png" % args.plot_dir)
pylab.clf()
pylab.plot(0, 0)
pylab.plot(episodes, median_scores, sns.xkcd_rgb["windows blue"], lw=2)
pylab.xlabel("episodes")
pylab.ylabel("median score")
pylab.savefig("%s/evaluation_episode_median_reward.png" % args.plot_dir)
def joint_density(X, Y, bounds=None):
"""
Plots joint distribution of variables.
Inherited from method in src/graphics.py module in project
git://github.com/aflaxman/pymc-example-tfr-hdi.git
"""
if bounds:
X_min, X_max, Y_min, Y_max = bounds
else:
X_min = X.min()
X_max = X.max()
Y_min = Y.min()
Y_max = Y.max()
pylab.plot(X, Y, linestyle='none', marker='o', color='green', mec='green', alpha=.2, zorder=-99)
gkde = scipy.stats.gaussian_kde([X, Y])
x,y = pylab.mgrid[X_min:X_max:(X_max-X_min)/25.,Y_min:Y_max:(Y_max-Y_min)/25.]
z = pylab.array(gkde.evaluate([x.flatten(), y.flatten()])).reshape(x.shape)
pylab.contour(x, y, z, linewidths=2)
pylab.axis([X_min, X_max, Y_min, Y_max])
def error_resampler(errors):
"""
For use with ``pandas``.
Method for performing the proper ``mean`` resampling of the *uncertainties* (error bars)
in the time series with ``pandas``. Note that doing a simple resampling
will fail to propagate uncertainties, since error in the mean goes as
.. math:: \sigma=\sqrt{\Sigma_n \sigma_n^2}
Example: Resamples the errors with 30 day averages:
::
# df['errflux'] has the 1sigma uncertainties
err=df['errflux'].resample('30d').apply(nmmn.dsp.error_resampler)
# plot y-values (df['flux']) with errors (err)
df['flux'].resample('30d').mean().plot(yerr=err)
"""
err=errors**2
return numpy.sqrt(err.sum())/err.size
def plot(self, filename):
r"""Save an image file of the transfer function.
This function loads up matplotlib, plots the transfer function and saves.
Parameters
----------
filename : string
The file to save out the plot as.
Examples
--------
>>> tf = TransferFunction( (-10.0, -5.0) )
>>> tf.add_gaussian(-9.0, 0.01, 1.0)
>>> tf.plot("sample.png")
"""
import matplotlib
matplotlib.use("Agg")
import pylab
pylab.clf()
pylab.plot(self.x, self.y, 'xk-')
pylab.xlim(*self.x_bounds)
pylab.ylim(0.0, 1.0)
pylab.savefig(filename)
def show(self):
r"""Display an image of the transfer function
This function loads up matplotlib and displays the current transfer function.
Parameters
----------
Examples
--------
>>> tf = TransferFunction( (-10.0, -5.0) )
>>> tf.add_gaussian(-9.0, 0.01, 1.0)
>>> tf.show()
"""
import pylab
pylab.clf()
pylab.plot(self.x, self.y, 'xk-')
pylab.xlim(*self.x_bounds)
pylab.ylim(0.0, 1.0)
pylab.draw()
def plotPopScore(population, fitness=False):
""" Plot the population score distribution
Example:
>>> Interaction.plotPopScore(population)
:param population: population object (:class:`GPopulation.GPopulation`)
:param fitness: if True, the fitness score will be used, otherwise, the raw.
:rtype: None
"""
score_list = getPopScores(population, fitness)
pylab.plot(score_list, 'o')
pylab.title("Plot of population score distribution")
pylab.xlabel('Individual')
pylab.ylabel('Score')
pylab.grid(True)
pylab.show()
# -----------------------------------------------------------------
def plotHistPopScore(population, fitness=False):
""" Population score distribution histogram
Example:
>>> Interaction.plotHistPopScore(population)
:param population: population object (:class:`GPopulation.GPopulation`)
:param fitness: if True, the fitness score will be used, otherwise, the raw.
:rtype: None
"""
score_list = getPopScores(population, fitness)
n, bins, patches = pylab.hist(score_list, 50, facecolor='green', alpha=0.75, normed=1)
pylab.plot(bins, pylab.normpdf(bins, numpy.mean(score_list), numpy.std(score_list)), 'r--')
pylab.xlabel('Score')
pylab.ylabel('Frequency')
pylab.grid(True)
pylab.title("Plot of population score distribution")
pylab.show()
# -----------------------------------------------------------------
def plotPopScore(population, fitness=False):
""" Plot the population score distribution
Example:
>>> Interaction.plotPopScore(population)
:param population: population object (:class:`GPopulation.GPopulation`)
:param fitness: if True, the fitness score will be used, otherwise, the raw.
:rtype: None
"""
score_list = getPopScores(population, fitness)
pylab.plot(score_list, 'o')
pylab.title("Plot of population score distribution")
pylab.xlabel('Individual')
pylab.ylabel('Score')
pylab.grid(True)
pylab.show()
# -----------------------------------------------------------------
def plotHistPopScore(population, fitness=False):
""" Population score distribution histogram
Example:
>>> Interaction.plotHistPopScore(population)
:param population: population object (:class:`GPopulation.GPopulation`)
:param fitness: if True, the fitness score will be used, otherwise, the raw.
:rtype: None
"""
score_list = getPopScores(population, fitness)
n, bins, patches = pylab.hist(score_list, 50, facecolor='green', alpha=0.75, normed=1)
pylab.plot(bins, pylab.normpdf(bins, numpy.mean(score_list), numpy.std(score_list)), 'r--')
pylab.xlabel('Score')
pylab.ylabel('Frequency')
pylab.grid(True)
pylab.title("Plot of population score distribution")
pylab.show()
# -----------------------------------------------------------------
def fastLapModel(xList, labels, names, multiple=0, full_set=0):
X = numpy.array(xList)
y = numpy.array(labels)
featureNames = []
featureNames = numpy.array(names)
# take fixed holdout set 30% of data rows
xTrain, xTest, yTrain, yTest = train_test_split(
X, y, test_size=0.30, random_state=531)
# for final model (no CV)
if full_set:
xTrain = X
yTrain = y
check_set(xTrain, xTest, yTrain, yTest)
print "Fitting the model to the data set..."
# train random forest at a range of ensemble sizes in order to see how the
# mse changes
mseOos = []
m = 10 ** multiple
nTreeList = range(500 * m, 1000 * m, 100 * m)
# iTrees = 10000
for iTrees in nTreeList:
depth = None
maxFeat = int(np.sqrt(np.shape(xTrain)[1])) + 1 # try tweaking
RFmd = ensemble.RandomForestRegressor(n_estimators=iTrees, max_depth=depth, max_features=maxFeat,
oob_score=False, random_state=531, n_jobs=-1)
# RFmd.n_features = 5
RFmd.fit(xTrain, yTrain)
# Accumulate mse on test set
prediction = RFmd.predict(xTest)
mseOos.append(mean_squared_error(yTest, prediction))
# plot training and test errors vs number of trees in ensemble
plot.plot(nTreeList, mseOos)
plot.xlabel('Number of Trees in Ensemble')
plot.ylabel('Mean Squared Error')
#plot.ylim([0.0, 1.1*max(mseOob)])
plot.show()
print("MSE")
print(mseOos[-1])
return xTrain, xTest, yTrain, yTest, RFmd
def plot_importance(names, model, savefig=True):
featureNames = numpy.array(names)
featureImportance = model.feature_importances_
featureImportance = featureImportance / featureImportance.max()
sorted_idx = numpy.argsort(featureImportance)
barPos = numpy.arange(sorted_idx.shape[0]) + .5
plot.barh(barPos, featureImportance[sorted_idx], align='center')
plot.yticks(barPos, featureNames[sorted_idx])
plot.xlabel('Variable Importance')
plot.subplots_adjust(left=0.2, right=0.9, top=0.9, bottom=0.1)
if savefig:
dt_ = datetime.datetime.now().strftime('%d%b%y_%H%M')
plt.savefig("../graphs/featureImportance_" + dt_ + ".png")
plot.show()
# Plot prediction save the graph with a timestamp
def plot_pred(y_predicted, y, savefig=True):
# y_predicted.reset_index(drop=1, inplace=1)
index = np.argsort(y)
y = y[index]
# y.shape
yhat = y_predicted[index]
yy = pd.DataFrame([y, yhat])
if yy.shape[1] > yy.shape[0]:
yy = yy.T
yy.reset_index(drop=0, inplace=1)
plt.scatter(yy.index, yy[1], s=.4)
plt.plot(yy.index, yy[0], ls='-', color='red', linewidth=.5)
if savefig:
dt_ = datetime.datetime.now().strftime('%d%b%y_%H%M')
plt.savefig("../graphs/" + dt_ + ".png")
plt.show()
# Check the data before regression (no Na, size, etc)
def backgroundPeakValue(img, bins=500):
f = FitHistogramPeaks(img, bins=bins, bins2=300)
bgp = getBackgroundPeak(f.fitParams)
ind = int(bgp[1])
if ind < 0:
ind = 0
# y = f.yvals[ind:]
# i = np.argmax(np.diff(y) > 0)
# bgmaxpos = ind # + i
# print(f.xvals[bgmaxpos], bgmaxpos)
# import pylab as plt
# plt.plot(f.xvals, f.yvals)
# plt.show()
return f.xvals[ind]