def linbleeched_F0(data):
"""
Calculate a linear fit (:math:`y(t)=m t+y_0)` for each pixel, which is assumed to correct for bleeching effects.
:param data: he video data of shape (M,N,T).
:return: tuple (m,y0) with two images each with shape (M,N).
"""
# generate c coordinates
x = numpy.arange(data.shape[-1])
# reshape the data to two d array, first dimension is pixel index, second dimension is time
d = numpy.reshape(data, (data.shape[0] * data.shape[1], data.shape[-1]))
# find fit parameters
m, y0 = numpy.polyfit(x, d.T, 1)
# reshape fit parameters back to image shape
return m.reshape(data.shape[0:2]), y0.reshape(data.shape[0:2])
python类polyfit()的实例源码
def RotatePoints(self, x_in, y_in):
#Rotational matrix
angle = np.pi/1.065
rot = np.array([[np.cos(angle),-np.sin(angle)],[np.sin(angle),np.cos(angle)]])
#Rotation Stuff
x = 0.
y = np.log(1000.)*-self.rd
d1 = np.array([[x],[y]])
V1 = np.dot(rot,d1)
B1 = np.linspace(d1[0],V1[0],100)
B2 = np.linspace(d1[1],V1[1],100)
p = np.polyfit(B1,B2,1)
#Skew-T y-axis
y_out = -self.rd*np.log(y_in)
#Skew-T x-axis
B = (x_in + (-y/p[0]))*p[0]
x_out = (y_out+B)/p[0]
return x_out, y_out
#Process winds for the hodograph
def build_batch_lstsq_estimates(self, asset_returns, benchmark_returns):
if not len(asset_returns) == len(benchmark_returns):
raise '*WTF*'
# Run Kalman filter on returns data
beta = np.zeros(len(asset_returns))
alpha = np.zeros(len(asset_returns))
for enum_i, elem in enumerate(asset_returns):
lookback = min(self.lookback, enum_i)
# print '==> ', enum_i, len(asset_returns), len(beta)
beta[enum_i], alpha[enum_i] = np.polyfit(benchmark_returns[enum_i - lookback:enum_i + 1],
asset_returns[enum_i - lookback:enum_i + 1], 1)
# don't wanna do a line fit for less than 3 points, really
beta[0], alpha[0] = 0, 0
beta[1], alpha[1] = 0, 0
self.alpha_betas_lstsq = np.array(zip(alpha, beta))
def early_stop_by_valid(valid_auc, k):
'''Early stop"
'''
if k > len(valid_auc):
return False
print k, valid_auc
x = range(k)
y = valid_auc[-k:]
# print x, y
slop, bias = np.polyfit(x, y, 1)
print "slop: {0:f}".format(slop)
stopflag = False
if slop <= FLAGS.slope:
stopflag = True
return stopflag
def test_polyfit_build(self):
# Ticket #628
ref = [-1.06123820e-06, 5.70886914e-04, -1.13822012e-01,
9.95368241e+00, -3.14526520e+02]
x = [90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103,
104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115,
116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 129,
130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141,
146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157,
158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169,
170, 171, 172, 173, 174, 175, 176]
y = [9.0, 3.0, 7.0, 4.0, 4.0, 8.0, 6.0, 11.0, 9.0, 8.0, 11.0, 5.0,
6.0, 5.0, 9.0, 8.0, 6.0, 10.0, 6.0, 10.0, 7.0, 6.0, 6.0, 6.0,
13.0, 4.0, 9.0, 11.0, 4.0, 5.0, 8.0, 5.0, 7.0, 7.0, 6.0, 12.0,
7.0, 7.0, 9.0, 4.0, 12.0, 6.0, 6.0, 4.0, 3.0, 9.0, 8.0, 8.0,
6.0, 7.0, 9.0, 10.0, 6.0, 8.0, 4.0, 7.0, 7.0, 10.0, 8.0, 8.0,
6.0, 3.0, 8.0, 4.0, 5.0, 7.0, 8.0, 6.0, 6.0, 4.0, 12.0, 9.0,
8.0, 8.0, 8.0, 6.0, 7.0, 4.0, 4.0, 5.0, 7.0]
tested = np.polyfit(x, y, 4)
assert_array_almost_equal(ref, tested)
def plot_scatter_charts(data, file_name):
scatters = []
for lang, values in data.items():
s = figure(width=300, plot_height=300, title=lang)
s.yaxis.formatter = NumeralTickFormatter(format="0.0a")
s.circle(values[0], values[1], size=10, color="navy", alpha=0.5)
x = np.linspace(1, 100, 10)
# noinspection PyTupleAssignmentBalance
m, b = np.polyfit(values[0], values[1], 1)
y = m * x + b
corr_coef = round(pearsonr(values[0], values[1])[0], 1)
s.line(x, y, legend=f'PCC = {corr_coef}')
scatters.append(s)
split_scatters = split(scatters, 3)
p = gridplot(split_scatters)
output_file(file_name)
show(p)
def defineRegionOfInterest(img, left_bottom = [0, 539], right_bottom = [900, 539], apex = [475, 320]):
xsize = img.shape[1]
ysize = img.shape[0]
# Perform a linear fit (y=Ax+B) to each of the three sides of the triangle
# np.polyfit returns the coefficients [A, B] of the fit
fit_left = np.polyfit((left_bottom[0], apex[0]), (left_bottom[1], apex[1]), 1)
fit_right = np.polyfit((right_bottom[0], apex[0]), (right_bottom[1], apex[1]), 1)
fit_bottom = np.polyfit((left_bottom[0], right_bottom[0]), (left_bottom[1], right_bottom[1]), 1)
# Find the region inside the lines
XX, YY = np.meshgrid(np.arange(0, xsize), np.arange(0, ysize))
region_thresholds = (YY > (XX*fit_left[0] + fit_left[1])) & \
(YY > (XX*fit_right[0] + fit_right[1])) & \
(YY < (XX*fit_bottom[0] + fit_bottom[1]))
return region_thresholds
def testEncodeDecodeShift(self):
x = np.linspace(-1, 1, 1000).astype(np.float32)
with self.test_session() as sess:
encoded = mu_law_encode(x, QUANT_LEVELS)
decoded = mu_law_decode(encoded, QUANT_LEVELS)
roundtripped = sess.run(decoded)
# Detect non-unity scaling and non-zero shift in the roundtripped
# signal by asserting that slope = 1 and y-intercept = 0 of line fit to
# roundtripped vs x values.
coeffs = np.polyfit(x, roundtripped, 1)
slope = coeffs[0]
y_intercept = coeffs[1]
EPSILON = 1e-4
self.assertNear(slope, 1.0, EPSILON)
self.assertNear(y_intercept, 0.0, EPSILON)
def test_polyfit_build(self):
# Ticket #628
ref = [-1.06123820e-06, 5.70886914e-04, -1.13822012e-01,
9.95368241e+00, -3.14526520e+02]
x = [90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103,
104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115,
116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 129,
130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141,
146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157,
158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169,
170, 171, 172, 173, 174, 175, 176]
y = [9.0, 3.0, 7.0, 4.0, 4.0, 8.0, 6.0, 11.0, 9.0, 8.0, 11.0, 5.0,
6.0, 5.0, 9.0, 8.0, 6.0, 10.0, 6.0, 10.0, 7.0, 6.0, 6.0, 6.0,
13.0, 4.0, 9.0, 11.0, 4.0, 5.0, 8.0, 5.0, 7.0, 7.0, 6.0, 12.0,
7.0, 7.0, 9.0, 4.0, 12.0, 6.0, 6.0, 4.0, 3.0, 9.0, 8.0, 8.0,
6.0, 7.0, 9.0, 10.0, 6.0, 8.0, 4.0, 7.0, 7.0, 10.0, 8.0, 8.0,
6.0, 3.0, 8.0, 4.0, 5.0, 7.0, 8.0, 6.0, 6.0, 4.0, 12.0, 9.0,
8.0, 8.0, 8.0, 6.0, 7.0, 4.0, 4.0, 5.0, 7.0]
tested = np.polyfit(x, y, 4)
assert_array_almost_equal(ref, tested)
def hurst(ts):
# Create the range of lag values
window_len = 30
hurst_ts = np.zeros((0,), dtype=np.int)
for tail in range(window_len, len(ts)):
lags = range(1, window_len // 2)
# print(lags)
# Calculate the array of the variances of the lagged differences
cur_ts = ts[max(1, tail - window_len):tail]
# cur_ts = ts[:tail]
tau = [sqrt(std(subtract(cur_ts[lag:], cur_ts[:-lag]))) for lag in lags]
# print("Time series slice len: ",len(cur_ts),len(tau),lags)
# Use a linear fit to estimate the Hurst Exponent
poly = polyfit(log(lags), log(tau), 1)
# Return the Hurst exponent from the polyfit output
# print(poly[0]*2.0)
hurst_ts = np.append(hurst_ts, poly[0] * 2.0)
return hurst_ts
def fit_dimensions(self, data, fit_TTmax = True):
"""
if fit_max is True, use blade length profile to adjust dHS_max
"""
if (fit_TTmax and 'L_blade' in data.columns):
dat = data.dropna(subset=['L_blade'])
xn = self.xn(dat['rank'])
fit = numpy.polyfit(xn,dat['L_blade'],7)
x = numpy.linspace(min(xn), max(xn), 500)
y = numpy.polyval(fit,x)
self.xnmax = x[numpy.argmax(y)]
self.TTmax = self.TTxn(self.xnmax)
self.scale = {k: numpy.mean(data[k] / self.predict(k,data['rank'], data['nff'])) for k in data.columns if k in self.ref.columns}
return self.scale
def horner(circulation_time, times, temp):
'''
horner_bht_temp (circulation_time, times, temp)
*Input parameters:
- circulation_time - hours from last circulation;
- times - total time since circulation stopped at 1st Run, 2nd Run and so on ...
- temp - a list o temperatures coresponding to 1st Run, 2nd Run and so on ...
*Returns:
- horner_temp - formation temperature estimated by Horner method (thermometer readings
from different runs)
*Exemple of usage:
horner(6, (7.0,11.5,19.5), (100,105,108))
where:
circulation_time = 6 # time since circulation stopped (hours)
times = (7.0,11.5,19.5) # total time since circulation stopped at 1st, 2nd, 3rd RUN (hours)
temp=(100,105,108) # temperatures recorded at 1st, 2nd, 3rd RUN (Celsius degrees)
'''
horner_time = np.array(times) / (circulation_time + np.array(times))
slope,intercept = np.polyfit (np.log(horner_time), temp, 1)
horner_temp=round(slope*np.log(1) +intercept,2)
return horner_temp
def vander(points, deg):
"""N-dim Vandermonde matrix for data `points` and a polynomial of degree
`deg`.
Parameters
----------
points : see polyfit()
deg : int
Degree of the poly (e.g. 3 for cubic).
Returns
-------
vander : 2d array (npoints, (deg+1)**ndim)
"""
powers = poly_powers(points.shape[1], deg)
# low memory version, slower
##npoints = points.shape[0]
##vand = np.empty((npoints, (deg+1)**ndim), dtype=float)
##for ipoint in range(npoints):
## vand[ipoint,:] = (points[ipoint]**powers).prod(axis=1)
tmp = (points[...,None] ** np.swapaxes(powers, 0, 1)[None,...])
return tmp.prod(axis=1)
multistock.py 文件源码
项目:Machine-Learning-for-Stock-Trading
作者: wilsonsk
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def build_scatter_plot(dframe):
#build scatter plot: stock1 vs. stock2
daily_ret = get_daily_returns(dframe)
symbols = []
for symbol in dframe[0:]:
symbols.append(symbol)
daily_ret.plot(kind='scatter', x=symbols[0], y=symbols[1])
#use ployfit() which needs x-coords and y-coords to fit a line -- y-coords are daily return values
#polyfit() returns first the polynomial coefficient (beta) and second the y intercept (alpha) -- in the form y = mx + b -- m is coefficient and b is intercept
#the idea for plotting is for every value of x we find a value of y using the line equation y = mx +b
beta_stock1, alpha_stock1 = np.polyfit(daily_ret[symbols[1]], daily_ret[symbols[0]], 1)
beta_stock2, alpha_stock2 = np.polyfit(daily_ret[symbols[0]], daily_ret[symbols[1]], 1)
print "beta of ", symbols[0], "= ", beta_stock1
print "alpha of ", symbols[0], "= ", alpha_stock1
print "beta of ", symbols[1], "= ", beta_stock2
print "alpha of ", symbols[1], "= ", alpha_stock2
plt.plot(daily_ret[symbols[0]], beta_stock2 * daily_ret[symbols[0]] + alpha_stock2, '-', color='r')
plt.show()
def fourierExtrapolation(x, n_predict):
n = len(x)
n_harm = 10 # number of harmonics in model
t = np.arange(0, n)
p = np.polyfit(t, x, 1) # find linear trend in x
x_notrend = x - p[0] * t # detrended x
x_freqdom = fft.fft(x_notrend) # detrended x in frequency domain
f = fft.fftfreq(n) # frequencies
indexes = list(range(n))
# sort indexes by frequency, lower -> higher
indexes.sort(key = lambda i: np.absolute(f[i]))
t = np.arange(0, n + n_predict)
restored_sig = np.zeros(t.size)
for i in indexes[:1 + n_harm * 2]:
ampli = np.absolute(x_freqdom[i]) / n # amplitude
phase = np.angle(x_freqdom[i]) # phase
restored_sig += ampli * np.cos(2 * np.pi * f[i] * t + phase)
return restored_sig + p[0] * t
def __call__(self, words, weights, vocabulary_max):
if len(words) < vocabulary_max * self.trigger_ratio:
return words, weights
if not isinstance(words, numpy.ndarray):
words = numpy.array(words)
# Tail optimization does not help with very large vocabularies
if len(words) > vocabulary_max * 2:
indices = numpy.argpartition(weights, len(weights) - vocabulary_max)
indices = indices[-vocabulary_max:]
words = words[indices]
weights = weights[indices]
return words, weights
# Vocabulary typically consists of these three parts:
# 1) the core - we found it's border - `core_end` - 15%
# 2) the body - 70%
# 3) the minor tail - 15%
# (1) and (3) are roughly the same size
# (3) can be safely discarded, (2) can be discarded with care,
# (1) shall never be discarded.
sorter = numpy.argsort(weights)[::-1]
weights = weights[sorter]
trend_start = int(len(weights) * 0.2)
trend_finish = int(len(weights) * 0.8)
z = numpy.polyfit(numpy.arange(trend_start, trend_finish),
numpy.log(weights[trend_start:trend_finish]),
1)
exp_z = numpy.exp(z[1] + z[0] * numpy.arange(len(weights)))
avg_error = numpy.abs(weights[trend_start:trend_finish] -
exp_z[trend_start:trend_finish]).mean()
tail_size = numpy.argmax((numpy.abs(weights - exp_z) < avg_error)[::-1])
weights = weights[:-tail_size][:vocabulary_max]
words = words[sorter[:-tail_size]][:vocabulary_max]
return words, weights
def fit(self, X):
_X = self.__aggregate_dataset(X)
self.polynomial = np.polyfit(_X['expenses'].astype(np.long),
_X['distance_traveled'].astype(np.long),
3)
self._polynomial_fn = np.poly1d(self.polynomial)
return self
def minScalErr(stec,el,z,thisBias):
intel=np.asarray(el[stec.index],int)
sTEC=np.asarray(stec,float)
zmap = z[stec.index]
c=np.array([(i,np.average((sTEC[intel==i]-thisBias)
/zmap[intel==i])) for i in np.unique(intel) if i>30])
return np.polyfit(c[:,0],c[:,1],1)[0]
def trendline(xd, yd, order=1, c='r', alpha=1, plot_r=False, text_pos=None):
"""Make a line of best fit"""
#Calculate trendline
coeffs = np.polyfit(xd, yd, order)
intercept = coeffs[-1]
slope = coeffs[-2]
if order == 2: power = coeffs[0]
else: power = 0
minxd = np.min(xd)
maxxd = np.max(xd)
xl = np.array([minxd, maxxd])
yl = power * xl ** 2 + slope * xl + intercept
#Plot trendline
plt.plot(xl, yl, color=c, alpha=alpha)
#Calculate R Squared
r = sp.stats.pearsonr(xd, yd)[0]
if plot_r == False:
#Plot R^2 value
if text_pos == None:
text_pos = (0.9 * maxxd + 0.1 * minxd, 0.9 * np.max(yd) + 0.1 * np.min(yd),)
plt.text(text_pos[0], text_pos[1], '$R = %0.2f$' % r)
else:
#Return the R^2 value:
return r
def fit_line(p1, p2):
# fit a line ax+by+c = 0
if p1[0] == p1[1]:
return [1., 0., -p1[0]]
else:
[k, b] = np.polyfit(p1, p2, deg=1)
return [k, -1., b]