def train(self):
""""""
start = time.time()
print('size before truncated outliers is %d ' % len(self.TrainData))
TrainData = self.TrainData[(self.TrainData['logerror'] > self._low) & (self.TrainData['logerror'] < self._up)]
print('size after truncated outliers is %d ' % len(TrainData))
X = TrainData.drop(self._l_drop_cols, axis=1)
Y = TrainData['logerror']
self._l_train_columns = X.columns
X = X.values.astype(np.float32, copy=False)
rr = Ridge(alpha= self._alpha,
max_iter = self._iter,
solver= 'svd')
self._model = rr.fit(X, Y)
end = time.time()
print('time consumed %d ' % ((end - start)))
self._f_eval_train_model = '{0}/{1}_{2}.pkl'.format(self.OutputDir, self.__class__.__name__,
datetime.now().strftime('%Y%m%d-%H:%M:%S'))
with open(self._f_eval_train_model, 'wb') as o_file:
pickle.dump(self._model, o_file, -1)
o_file.close()
self.TrainData = pd.concat([self.TrainData, self.ValidData[self.TrainData.columns]],
ignore_index=True) ## ignore_index will reset the index or index will be overlaped
return
python类Ridge()的实例源码
def predict(self):
"""
Train the regression model with predictions on validation set.
Save the learned weights to apply to test set predictions.
"""
pred_array = np.stack(self.pred_list, -1)
reg = linear_model.Ridge(alpha=.5)
pred = np.reshape(pred_array, [-1, len(self.pred_list)])
y = np.reshape(self.labels_val, [-1,1])
reg.fit(pred, y)
self.weights = reg.coef_[0].tolist()
def ridge_regression_model(parameter_array):
alpha_value = parameter_array[0]
# ridge_solver = parameter_array[0]
return linear_model.Ridge(alpha=alpha_value, fit_intercept=True, normalize=True, copy_X=True, max_iter=None, tol=0.001, solver='auto', random_state=None)
#Returns the lasso regression model
linearregressionmodel.py 文件源码
项目:Supply-demand-forecasting
作者: LevinJ
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def setClf(self):
# self.clf = Ridge(alpha=0.0000001, tol=0.0000001)
clf = LinearRegression()
min_max_scaler = preprocessing.MinMaxScaler()
self.clf = Pipeline([('scaler', min_max_scaler), ('estimator', clf)])
return
def __init__(self, info, verbose=True, debug_mode=False):
self.label_num=info['label_num']
self.target_num=info['target_num']
self.task = info['task']
self.metric = info['metric']
self.postprocessor = None
#self.postprocessor = MultiLabelEnsemble(LogisticRegression(), balance=True) # To calibrate proba
self.postprocessor = MultiLabelEnsemble(LogisticRegression(), balance=False) # To calibrate proba
if debug_mode>=2:
self.name = "RandomPredictor"
self.model = RandomPredictor(self.target_num)
self.predict_method = self.model.predict_proba
return
if info['task']=='regression':
if info['is_sparse']==True:
self.name = "BaggingRidgeRegressor"
self.model = BaggingRegressor(base_estimator=Ridge(), n_estimators=1, verbose=verbose) # unfortunately, no warm start...
else:
self.name = "GradientBoostingRegressor"
self.model = GradientBoostingRegressor(n_estimators=1, max_depth=4, min_samples_split=14, verbose=verbose, warm_start = True)
self.predict_method = self.model.predict # Always predict probabilities
else:
if info['has_categorical']: # Out of lazziness, we do not convert categorical variables...
self.name = "RandomForestClassifier"
self.model = RandomForestClassifier(n_estimators=1, verbose=verbose) # unfortunately, no warm start...
elif info['is_sparse']:
self.name = "BaggingNBClassifier"
self.model = BaggingClassifier(base_estimator=BernoulliNB(), n_estimators=1, verbose=verbose) # unfortunately, no warm start...
else:
self.name = "GradientBoostingClassifier"
self.model = eval(self.name + "(n_estimators=1, verbose=" + str(verbose) + ", random_state=1, warm_start = True)")
if info['task']=='multilabel.classification':
self.model = MultiLabelEnsemble(self.model)
self.predict_method = self.model.predict_proba
def update_sparse_predictions(Y,D,W,Psi,lda=0.0001):
X = np.zeros((Psi.shape[0],W.shape[1]))
for i in range(W.shape[1]):
used = (W[:,i] != 0)
if used.sum() > 0:
d = np.copy(D)
d = d[:,used]
model = Ridge(alpha=lda)
model.fit(d,Y[:,i])
X[:,i] = model.predict(Psi[:,used])
return X
def __init__(self, mu=.5, tau=1.0, lamda=1, use_gpu=False, threshold=1e-16,
alpha=None, l1_ratio=None, fit_intercept=True,
normalize=False, precompute=False, max_iter=10000,
copy_X=True, tol=1e-4, warm_start=False, positive=False,
random_state=None, selection='cyclic'):
vs = L1L2(mu=mu, tau=tau, use_gpu=use_gpu, threshold=threshold,
alpha=alpha, l1_ratio=l1_ratio, fit_intercept=fit_intercept,
normalize=normalize, precompute=precompute,
max_iter=max_iter, copy_X=copy_X, tol=tol,
warm_start=warm_start, positive=positive,
random_state=random_state, selection=selection)
mdl = Ridge(alpha=lamda, fit_intercept=fit_intercept,
normalize=normalize, copy_X=copy_X, max_iter=max_iter,
tol=tol, random_state=random_state)
super(L1L2TwoStep, self).__init__(
(('l1l2', vs), ('ridge', mdl)))
self.mu = mu
self.tau = tau
self.lamda = lamda
self.alpha = alpha
self.l1_ratio = l1_ratio
self.use_gpu = use_gpu
self.threshold = threshold
self.fit_intercept = fit_intercept
self.normalize = normalize
self.precompute = precompute
self.max_iter = max_iter
self.copy_X = copy_X
self.tol = tol
self.warm_start = warm_start
self.positive = positive
self.intercept_ = 0.0
self.random_state = random_state
self.selection = selection
def model_fit_and_test(TrainX,TrainY,TestX,TestY):
def bulid_model(model_name):
model = model_name()
return model
#for model_name in [LinearRegression, Ridge, Lasso, ElasticNet, KNeighborsRegressor, DecisionTreeRegressor, SVR,RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor]:
for model_name in [LinearRegression, ElasticNet]:
model = bulid_model(model_name)
model.fit(TrainX,TrainY)
print(model_name)
resid = model.predict(TestX) - TestY
#print resid
print("Residual sum of squares: %f"% np.mean(resid ** 2))
#print model.predict(TestX)
#print TestY
# Explained variance score: 1 is perfect prediction
plt.scatter(model.predict(TestX), resid);
plt.axhline(0, color='red')
plt.xlabel('Predicted Values')
plt.ylabel('Residuals')
#plt.xlim([1, 50])
plt.show()
print('Variance score: %.2f' % model.score(TestX, TestY))
from statsmodels.stats.stattools import jarque_bera
_, pvalue, _, _ = jarque_bera(resid)
print ("Test Residuals Normal", pvalue)
from statsmodels import regression, stats
import statsmodels.api as sms
import statsmodels.stats.diagnostic as smd
# xs_with_constant = sms.add_constant(np.column_stack((X1,X2,X3,X4)))
xs_with_constant = sms.add_constant(TestX)
_, pvalue1, _, _ = stats.diagnostic.het_breushpagan(resid, xs_with_constant)
print ("Test Heteroskedasticity", pvalue1)
ljung_box = smd.acorr_ljungbox(resid, lags=10)
#print "Lagrange Multiplier Statistics:", ljung_box[0]
print "Test Autocorrelation P-values:", ljung_box[1]
if any(ljung_box[1] < 0.05):
print "The residuals are autocorrelated."
else:
print "The residuals are not autocorrelated."
def lsClassifier(trainData, trainLabel, testData, testLabel, lambdaS):
reg = linear_model.Ridge(alpha=lambdaS)
reg.fit(trainData, trainLabel.tolist())
W = reg.coef_
testResult = np.array(testData.dot(W))
testResult = np.where(testResult > 0, 1, -1).astype(np.int32)
accu = np.sum(np.where(testResult == testLabel, 1, 0)) / float(testLabel.shape[0])
return testResult, accu
def __init__(self, isTrain):
super(RegressionRidgeReg, self).__init__(isTrain)
# data preprocessing
#self.dataPreprocessing()
# Create linear regression object
self.model = linear_model.Ridge(alpha = 24420.530945486549)
def localupdate(b,A,z,u,rho,eps):
ridge = Ridge(alpha=rho/2.0, fit_intercept=False, tol=eps)
#print "b",b
#print "z",z
#print "u",u
#print A * (z-u/rho)
b_new = b - A * (z-u/rho)
#print "bnew",b_new
ret = ridge.fit(A,b_new)
#print ret
#print ret.coef_
return (ret.coef_ + (z-u/rho))
def get_next_by_EI(ni, alpha, lr, lr_time, X, y, ei_xi):
'''
Args:
ni: number of units in the each layer
alpha: lambda for Ridge regression
lr: fitted performance model in burning period
lr_time: fitted time model in burning period
X: all previous inputs x
y: all previous observations corresponding to X
ei_xi: parameter for EI exploitation-exploration trade-off
Returns:
x_next: a nested list [[0,1,0], [1,0,0,0], ...] as the next input x to run a specified pipeline
'''
var = np.var(lr.predict(X) - y)
m = np.dot(X.T, X)
inv = np.linalg.inv(m + alpha * np.eye(sum(ni)))
maxEI = float('-inf')
x_next = None
for i in range(np.prod(ni)):
x = [[0]*n for n in ni]
x_flat = []
pipeline = get_pipeline_by_flatten_index(ni, i)
for layer in range(len(ni)):
x[layer][pipeline[layer]] = 1
x_flat += x[layer]
x_flat = np.array(x_flat)
mu_x = lr.predict([x_flat])
var_x = var * (1 + np.dot(np.dot(x_flat, inv), x_flat.T))
sigma_x = np.sqrt(var_x)
u = (np.min(y) - ei_xi - mu_x) / sigma_x
EI = sigma_x * (u*norm.cdf(u) + norm.pdf(u))
estimated_time = lr_time.predict([x_flat])[0]
EIPS = EI / estimated_time
if EIPS > maxEI:
maxEI = EIPS
x_next = x
return x_next
def main(dataset_size, test_proportion):
diabetes = load_diabetes()
X = diabetes.data[:dataset_size]
y = diabetes.target[:dataset_size]
fig, ax_list = plt.subplots(3, 1, figsize=(8, 6))
plot_errors_by_lambda(X, y, test_proportion=test_proportion, regression_class=Ridge, ax=ax_list[0])
plot_errors_by_lambda(X, y, test_proportion=test_proportion, regression_class=Lasso, ax=ax_list[1])
plot_errors_by_lambda(X, y, test_proportion=test_proportion, regression_class=LinearRegression, ax=ax_list[2])
plt.tight_layout()
plt.show()
def get_models4ensamble(conf):
models = []
#models = [RFRModel(conf), DLModel(conf), LRModel(conf)]
#models = [LRModel(conf)]
# see http://scikit-learn.org/stable/modules/linear_model.html
#0 was too big to run with depth set to 1, and 1 was overfitting a bit
if conf.command == 1:
xgb_params = {"objective": "reg:linear", "booster":"gbtree", "max_depth":3, "eta":0.1, "min_child_weight":5,
"subsample":0.5, "nthread":4, "colsample_bytree":0.5, "num_parallel_tree":1, 'gamma':0}
else:
xgb_params = {"objective": "reg:linear", "booster":"gbtree", "max_depth":10, "eta":0.1, "min_child_weight":8,
"subsample":0.5, "nthread":4, "colsample_bytree":0.5, "num_parallel_tree":1, 'gamma':0}
#xgb_params = {"objective": "reg:linear", "booster":"gbtree", "max_depth":10, "eta":0.1, "min_child_weight":8,
# "subsample":0.5, "nthread":4, "colsample_bytree":0.5, "num_parallel_tree":1, 'gamma':0}
models = [
#DLModel(conf),
#LRModel(conf, model=linear_model.BayesianRidge()),
#LRModel(conf, model=linear_model.LassoLars(alpha=.1)),
#LRModel(conf, model=linear_model.Lasso(alpha = 0.1)),
#LRModel(conf, model=Pipeline([('poly', PolynomialFeatures(degree=3)),
#LRModel(conf, model=linear_model.Ridge (alpha = .5))
# ('linear', LinearRegression(fit_intercept=False))])),
XGBoostModel(conf, xgb_params, use_cv=True),
LRModel(conf, model=linear_model.Lasso(alpha = 0.3)),
RFRModel(conf, RandomForestRegressor(oob_score=True, n_jobs=4)),
#LRModel(conf, model=linear_model.Lasso(alpha = 0.2)),
ETRModel(conf, model=ExtraTreesRegressor(n_jobs=4)),
#AdaBoostRModel(conf, model=AdaBoostRegressor(loss='square'))
]
return models
#return [XGBoostModel(conf, xgb_params, use_cv=True)]
def fc_kernel(X, Y, copy_X=True, W=None, B=None, ret_reg=False,fit_intercept=True):
"""
return: n c
"""
assert copy_X == True
assert len(X.shape) == 2
if dcfgs.ls == cfgs.solvers.gd:
w = Worker()
def wo():
from .GDsolver import fc_GD
a,b=fc_GD(X,Y, W, B, n_iters=1)
return {'a':a, 'b':b}
outputs = w.do(wo)
return outputs['a'], outputs['b']
elif dcfgs.ls == cfgs.solvers.tls:
return tls(X,Y, debug=True)
elif dcfgs.ls == cfgs.solvers.keras:
_reg=keras_kernel()
_reg.fit(X, Y, W, B)
return _reg.coef_, _reg.intercept_
elif dcfgs.ls == cfgs.solvers.lightning:
#_reg = SGDRegressor(eta0=1e-8, intercept_decay=0, alpha=0, verbose=2)
_reg = CDRegressor(n_jobs=-1,alpha=0, verbose=2)
if 0:
_reg.intercept_=B
_reg.coef_=W
elif dcfgs.fc_ridge > 0:
_reg = Ridge(alpha=dcfgs.fc_ridge)
else:
_reg = LinearRegression(n_jobs=-1 , copy_X=copy_X, fit_intercept=fit_intercept)
_reg.fit(X, Y)
if ret_reg:
return _reg
return _reg.coef_, _reg.intercept_
def ridge_train(X,y):
alphas = [0.05, 0.1, 0.3, 1, 3, 5, 10, 15, 30, 50, 75]
cv_ridge = [rmse_cv(Ridge(alpha = alpha),X,y).mean() for alpha in alphas]
cv_ridge = pd.Series(cv_ridge, index = alphas)
cv_ridge.plot(title = "Validation - Just Do It")
print ('min cv is : ',cv_ridge.min())
return alphas[cv_ridge.values.argmin()]
#%%
# ridge regression doesn't remove any property
def spot_check(X, y):
if type == 'regression':
models = [
(LinearRegression(), 'Ordinary Least Squares'),
(Ridge(alpha=0.1), 'Ridge (alpha 0.1)'),
(Ridge(), 'Ridge (alpha 1.0)'),
(Lasso(alpha=0.1), 'Lasso (alpha 0.1)'),
(Lasso(), 'Lasso (alpha 1.0)'),
(ElasticNet(alpha=0.1), 'ElasticNet (alpha 0.1)'),
(ElasticNet(), 'ElasticNet (alpha 1.0)'),
(DecisionTreeRegressor(), 'Decision Tree'),
(KNeighborsRegressor(), 'K-Nearest Neighbors'),
# (RandomForestRegressor(), 'Random Forest Regressor'),
# (BaggingRegressor(), 'Bagging Regressor'),
# (GradientBoostingRegressor(), 'Gradient Bosted Regression'),
# (SVR(), 'Support Vector Regression')
]
splits = 5
scores = []
for model, model_name in models:
score = check_model(model, splits, X, y)
# get average score
scores.append(score)
model_names = map(lambda x: x[1], models)
for name, score in zip(model_names, scores):
print('%s: %f' % (name, score))
def get_classifier(self, X, Y):
""" ???????
:param X: ????
:param Y: ??????
:return: ??
"""
clf = Ridge()
clf.fit(X, Y)
return clf
def ridge_regression(data, a):
features = data.columns.tolist()
features.remove('label')
response = ['label']
# ????Ridge Regression model
lr = Ridge(alpha=a)
# ?????: label(????DataFrame)
y = data[response]
# ??features (????DataFrame)
X = data[features]
# _leave_one_out(lr, X.values, y.values)
# fit regression model to the data
model = lr.fit(X, y)
# ?????model?????
predicted_y = model.predict(X) # predicted_y?????numpy array
# ???y?DataFrame?????numpy array???????
y = np.array(y)
# ?????
_print_y_and_predicted_y_and_corr(y, predicted_y)
_print_r2_score(y, predicted_y)
_print_coefficients(model, features, '~/Desktop/??_???_lt30.csv')
_print_MSE(y, predicted_y)
plot_true_and_pred_scatter(y, predicted_y)
# std_error(y, predicted_y)
def _load_model(self, model_id):
_, conn = get_engine()
#todo
models = {
'QXgb': QXgb,
'QXgb2': QXgb2,
'Ridge': Ridge,
'RidgeClassifier': RidgeClassifier,
'KNeighborsClassifier': KNeighborsClassifier,
'QAvg': QAvg,
'QRankedAvg': QRankedAvg,
'QRankedByLineAvg': QRankedByLineAvg,
'QStackModel': QStackModel,
'LogisticRegression': LogisticRegression,
'DecisionTreeClassifier': DecisionTreeClassifier,
'QPostProcessingModel': QPostProcessingModel,
'RandomForestClassifier': RandomForestClassifier,
'ExtraTreesClassifier': ExtraTreesClassifier,
'QAvgOneModelData': QAvgOneModelData,
'QNN1': QNN1,
'QNN2': QNN2,
}
res = conn.execute(
"""
select cls, params, descr, predict_fn
from qml_models
where
model_id='{}'
""".format(model_id)
).fetchone()
if not res:
raise Exception('Missing {} model'.format(model_id))
model = models[res['cls']](**json.loads(res['params']))
self.add(model_id, model, res['descr'], res['predict_fn'])
return model