def regression(filename):
from sklearn.linear_model import LinearRegression
from sklearn import metrics
X,y = loadDataSet(filename)
print(filename,X.shape)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, test_size=0.25)
linreg = LinearRegression()
linreg.fit(X_train, y_train)
# print(linreg.intercept_, linreg.coef_)
# pair the feature names with the coefficients
feature_cols = ['????', '????', '??????','?????','??????','???????','???????','?????????','??????']
# feature_cols = ['????', '??????','?????','??????','???????','???????','?????????','??????']
#print(feature_cols, linreg.coef_)
#zip(feature_cols, linreg.coef_)
y_pred = linreg.predict(X_test)
print("MAE:",metrics.mean_absolute_error(y_test, y_pred))
print("MSE:",metrics.mean_squared_error(y_test, y_pred))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
scores = cross_val_score(linreg, X, y,cv=3)
print('scores:',scores)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
res = pd.DataFrame(linreg.coef_.T[:len(feature_cols)].T,columns=feature_cols,index=[filename.split('.')[0]])
# res = pd.DataFrame(linreg.coef_,index=[filename.split('.')[0]])
return (res)
#files = ['201603.xlsx','201604.xlsx','201605.xlsx','?????3?.xlsx','?????4?.xlsx','?????5?.xlsx','?????6?.xlsx']
#files = ['?????3?.xlsx','?????4?.xlsx','?????5?.xlsx','?????6?.xlsx','201703_06.xlsx']
#files = ['201703_06.xlsx']
评论列表
文章目录