def train():
os.chdir(dname)
for selected_stock in onlyfiles:
df = pd.read_csv(os.path.join('data_files',selected_stock))
#preprocessing the data
df = df[['Adj. Open', 'Adj. High', 'Adj. Low', 'Adj. Close', 'Adj. Volume']]
#measure of volatility
df['HL_PCT'] = (df['Adj. High'] - df['Adj. Low']) / df['Adj. Low'] * 100.0
df['PCT_change'] = (df['Adj. Close'] - df['Adj. Open']) / df['Adj. Open'] * 100.0
df = df[['Adj. Close', 'HL_PCT', 'PCT_change', 'Adj. Volume']]
forecast_col = 'Adj. Close'
df.fillna(value=-99999, inplace=True)
forecast_out = int(math.ceil(0.01 * len(df)))
df['label'] = df[forecast_col].shift(-forecast_out)
X = np.array(df.drop(['label'],1))
X = preprocessing.scale(X)
X_lately = X[-forecast_out:]
X = X[:-forecast_out]
df.dropna(inplace=True)
y = np.array(df['label'])
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.2)
svr = SVR()
pickle.dump(svr,open(join(dname+'/models/svr_unfit/', selected_stock+'svr.sav'),'wb'))
svr.fit(X_train, y_train)
lr = LinearRegression()
pickle.dump(lr,open(join(dname+'/models/lr_unfit/', selected_stock+'lr.sav'),'wb'))
lr.fit(X_train, y_train)
mlp = MLPRegressor()
pickle.dump(mlp,open(join(dname+'/models/mlp_unfit/', selected_stock+'mlp.sav'),'wb'))
mlp.fit(X_train, y_train)
pickle.dump(svr,open(join(dname+'/models/svr_fit/', selected_stock+'svr.sav'),'wb'))
pickle.dump(lr,open(join(dname+'/models/lr_fit/', selected_stock+'lr.sav'),'wb'))
pickle.dump(mlp,open(join(dname+'/models/mlp_fit/', selected_stock+'mlp.sav'),'wb'))
print(selected_stock+" - trained")
python类SVR的实例源码
def train(self):
clf = SVR(C=1.0, epsilon=0.1, cache_size=1000)
X, y, = self._get_data('training-2016-12-01-2017-02-28.csv')
# Fit the model
clf.fit(X, y)
# Pickle the model so we can save and reuse it
s = pickle.dumps(clf)
# Save the model to a file
f = open('finish_pos.model', 'wb')
f.write(s)
f.close()
def learn(x, y, c=1e3, gamma=0.1):
svr_rbf = SVR(kernel='rbf', C=c, gamma=gamma)
model = svr_rbf.fit(x, y)
return model
stock_price_predictor.py 文件源码
项目:stock_price_prediction
作者: inaciomdrs
项目源码
文件源码
阅读 15
收藏 0
点赞 0
评论 0
def svr_lin(): return SVR(kernel='linear', C=1e3)
stock_price_predictor.py 文件源码
项目:stock_price_prediction
作者: inaciomdrs
项目源码
文件源码
阅读 16
收藏 0
点赞 0
评论 0
def svr_poly(): return SVR(kernel='poly', C=1e3, degree=2)
stock_price_predictor.py 文件源码
项目:stock_price_prediction
作者: inaciomdrs
项目源码
文件源码
阅读 19
收藏 0
点赞 0
评论 0
def svr_rbf(): return SVR(kernel='rbf',C=1e3, gamma=0.1)
def svr_grid():
param_grid = {
'C': [1e-2, 1, 1e2],
'gamma': [1e-1, 1, 1e1]
}
clf = GridSearchCV(SVR(kernel='rbf'), verbose=VERBOSE, n_jobs=THREADS, param_grid=param_grid)
return clf
# Perform an experiment for a single model type.
def __init__(self):
""" Initialize predictive model with model, model indicators, and params. """
self.name = "Support Vector"
self.summary_name = "SVR"
self.indicators_samples = {'Daily':42}
self.full_indicators_samples = {'Daily':42, 'Volume':10, 'Open':10, 'High':10, 'Low':10, 'SMA':5, 'EWMA':5, 'MOM':5, 'STD':5}
self.model_params = dict(kernel = ['poly', 'rbf'],
C = [1e-2, 0.1, 1, 10],
tolerance = [.001, 0.1],
full_indicators = [True, False],
sample_presentation = [SamplePresentation.cumulative])
self.pretrained_model = None #save the pretrained model for future use
def model_fit_and_test(TrainX,TrainY,TestX,TestY):
def bulid_model(model_name):
model = model_name()
return model
#for model_name in [LinearRegression, Ridge, Lasso, ElasticNet, KNeighborsRegressor, DecisionTreeRegressor, SVR,RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor]:
for model_name in [LinearRegression, ElasticNet]:
model = bulid_model(model_name)
model.fit(TrainX,TrainY)
print(model_name)
resid = model.predict(TestX) - TestY
#print resid
print("Residual sum of squares: %f"% np.mean(resid ** 2))
#print model.predict(TestX)
#print TestY
# Explained variance score: 1 is perfect prediction
plt.scatter(model.predict(TestX), resid);
plt.axhline(0, color='red')
plt.xlabel('Predicted Values')
plt.ylabel('Residuals')
#plt.xlim([1, 50])
plt.show()
print('Variance score: %.2f' % model.score(TestX, TestY))
from statsmodels.stats.stattools import jarque_bera
_, pvalue, _, _ = jarque_bera(resid)
print ("Test Residuals Normal", pvalue)
from statsmodels import regression, stats
import statsmodels.api as sms
import statsmodels.stats.diagnostic as smd
# xs_with_constant = sms.add_constant(np.column_stack((X1,X2,X3,X4)))
xs_with_constant = sms.add_constant(TestX)
_, pvalue1, _, _ = stats.diagnostic.het_breushpagan(resid, xs_with_constant)
print ("Test Heteroskedasticity", pvalue1)
ljung_box = smd.acorr_ljungbox(resid, lags=10)
#print "Lagrange Multiplier Statistics:", ljung_box[0]
print "Test Autocorrelation P-values:", ljung_box[1]
if any(ljung_box[1] < 0.05):
print "The residuals are autocorrelated."
else:
print "The residuals are not autocorrelated."
def train_bayesian_ridge():
# Picking model
return mp.ModelProperties(regression=True), linear_model.BayesianRidge()
# http://scikit-learn.org/stable/modules/generated/sklearn.svm.SVR.html
def setUpClass(self):
"""
Set up the unit test by loading the dataset and training a model.
"""
if not HAS_SKLEARN:
return
scikit_data = load_boston()
scikit_model = SVR(kernel='linear')
scikit_model.fit(scikit_data['data'], scikit_data['target'])
# Save the data and the model
self.scikit_data = scikit_data
self.scikit_model = scikit_model
def test_conversion_bad_inputs(self):
# Error on converting an untrained model
with self.assertRaises(TypeError):
model = SVR()
spec = sklearn_converter.convert(model, 'data', 'out')
# Check the expected class during covnersion.
with self.assertRaises(TypeError):
model = OneHotEncoder()
spec = sklearn_converter.convert(model, 'data', 'out')
def training(self, c, g):
self.model = SVR(kernel= 'rbf', C= c, gamma= g)
self.model.fit(self.train_date, self.train_price) # fitting the data points in the models
def draw(self):
plt.scatter(self.dates, self.prices, color= 'black', label= 'Data')
plt.plot(self.dates, self.model.predict(self.dates), color= 'red', label= 'RBF model')
plt.xlabel('Date')
plt.ylabel('Price')
plt.title('SVR test for SPY trimmed data (2014, 2015)')
#plt.legend()
plt.show()
time_predict.py 文件源码
项目:KDD-2017-Travel-Time-Prediction
作者: InfiniteWing
项目源码
文件源码
阅读 15
收藏 0
点赞 0
评论 0
def getWeatherPredict(predict,nowww):
if(nowww>=2):
return predict*math.sqrt(1.110727412879317)
elif(nowww>=1.0):
return predict*math.sqrt(1.0960104809326925)
elif(nowww>0):
return predict*math.sqrt(1.0730721851729204)
else:
return predict*math.sqrt(0.98)
#??SVR??????????????????????
time_predict.py 文件源码
项目:KDD-2017-Travel-Time-Prediction
作者: InfiniteWing
项目源码
文件源码
阅读 17
收藏 0
点赞 0
评论 0
def SVR_Model(fw,train_lines,test_train_lines,test_lines,mn_time):
features_train=[]
labels_train=[]
features_test=[]
labels_test=[]
for i,line in enumerate(train_lines):
label,feature=parsePoint(line)
labels_train.append(label)
features_train.append(feature)
for i,line in enumerate(test_lines):
label,feature=parsePoint(line)
labels_test.append(label)
features_test.append(feature)
X=np.array(features_train)
y=np.array(labels_train)
X_test=np.array(features_test)
svr_rbf = SVR(kernel=KERNEL, C=C_VALUE)
y_rbf = svr_rbf.fit(X, y).predict(X_test)
avgTime=getAvgTime(features_train)
for i,predict in enumerate(y_rbf):
time=getTime(features_test[i])
weighting=1-(avgTime-time)/avgTime
weighting=math.sqrt(math.sqrt(math.sqrt((weighting+weighting)/2)))
#???????????????????????????
#??????????????????????????????????(weighting=1)
if(mn_time==2 or mn_time==4):
weighting=1
predict=predict*weighting
printResult(fw,labels_test[i],predict,mn_time)
def build_ensemble(kls, **kwargs):
"""Generate ensemble of class kls."""
ens = kls(**kwargs)
ens.add([SVR() for _ in range(4)])
ens.add_meta(SVR())
return ens
def build_ensemble(kls, **kwargs):
"""Generate ensemble of class kls."""
ens = kls(**kwargs)
ens.add([SVR(), RandomForestRegressor(),
GradientBoostingRegressor(), Lasso(copy_X=False),
MLPRegressor(shuffle=False, alpha=0.001)])
ens.add_meta(Lasso(copy_X=False))
return ens
def spot_check(X, y):
if type == 'regression':
models = [
(LinearRegression(), 'Ordinary Least Squares'),
(Ridge(alpha=0.1), 'Ridge (alpha 0.1)'),
(Ridge(), 'Ridge (alpha 1.0)'),
(Lasso(alpha=0.1), 'Lasso (alpha 0.1)'),
(Lasso(), 'Lasso (alpha 1.0)'),
(ElasticNet(alpha=0.1), 'ElasticNet (alpha 0.1)'),
(ElasticNet(), 'ElasticNet (alpha 1.0)'),
(DecisionTreeRegressor(), 'Decision Tree'),
(KNeighborsRegressor(), 'K-Nearest Neighbors'),
# (RandomForestRegressor(), 'Random Forest Regressor'),
# (BaggingRegressor(), 'Bagging Regressor'),
# (GradientBoostingRegressor(), 'Gradient Bosted Regression'),
# (SVR(), 'Support Vector Regression')
]
splits = 5
scores = []
for model, model_name in models:
score = check_model(model, splits, X, y)
# get average score
scores.append(score)
model_names = map(lambda x: x[1], models)
for name, score in zip(model_names, scores):
print('%s: %f' % (name, score))
def get_classifier(self, X, Y):
""" ????????
:param X: ????
:param Y: ??????
:return: ??
"""
clf = SVR(kernel='linear')
clf.fit(X, Y)
return clf