def __remodel__(self, model_type, regr, __X_train, __Y_train):
"""
Function to retrain certain models based on optimal alphas and/or ratios
"""
if model_type == "ridge":
alpha = regr.alpha_
regr = linear_model.RidgeCV(alphas = self.__realpha__(alpha), cv = 10)
elif model_type == "lasso":
alpha = regr.alpha_
regr = linear_model.LassoCV(alphas = self.__realpha__(alpha), max_iter = 5000, cv = 10)
elif model_type == "elasticnet":
alpha = regr.alpha_
ratio = regr.l1_ratio_
regr = linear_model.ElasticNetCV(l1_ratio = self.__reratio__(ratio), alphas = self.__elasticnet_init["alpha"], max_iter = 1000, cv = 3)
regr.fit(__X_train, __Y_train)
return regr
python类ElasticNetCV()的实例源码
regression.py 文件源码
项目:Building-Machine-Learning-Systems-With-Python-Second-Edition
作者: PacktPublishing
项目源码
文件源码
阅读 31
收藏 0
点赞 0
评论 0
def predict(train):
binary = (train > 0)
reg = ElasticNetCV(fit_intercept=True, alphas=[
0.0125, 0.025, 0.05, .125, .25, .5, 1., 2., 4.])
norm = NormalizePositive()
train = norm.fit_transform(train)
filled = train.copy()
# iterate over all users
for u in range(train.shape[0]):
# remove the current user for training
curtrain = np.delete(train, u, axis=0)
bu = binary[u]
if np.sum(bu) > 5:
reg.fit(curtrain[:,bu].T, train[u, bu])
# Fill the values that were not there already
filled[u, ~bu] = reg.predict(curtrain[:,~bu].T)
return norm.inverse_transform(filled)
def train_EN_model(_train_x, train_y, _predict_x):
print_title("ElasticNet")
train_x, predict_x = \
standarize_feature(_train_x, _predict_x)
#l1_ratios = [1e-4, 1e-3, 1e-2, 1e-1]
#l1_ratios = [1e-5, 1e-4, 1e-3]
l1_ratios = [0.9, 0.92, 0.95, 0.97, 0.99]
#l1_ratios = [0.5]
min_mse = 1
for r in l1_ratios:
t1 = time.time()
reg_en = linear_model.ElasticNetCV(
l1_ratio=r, cv=5, n_jobs=4, verbose=1, precompute=True)
reg_en.fit(train_x, train_y)
n_nonzeros = (reg_en.coef_ != 0).sum()
_mse = np.mean(reg_en.mse_path_, axis=1)[
np.where(reg_en.alphas_ == reg_en.alpha_)[0][0]]
if _mse < min_mse:
min_mse = _mse
best_l1_ratio = r
best_alpha = reg_en.alpha_
t2 = time.time()
print("ratio(%e) -- n: %d -- alpha: %f -- mse: %f -- "
"time: %.2f sec" %
(r, n_nonzeros, reg_en.alpha_, _mse, t2 - t1))
print("Best l1_ratio and alpha: %f, %f" % (best_l1_ratio, best_alpha))
# predict_model
reg = linear_model.ElasticNet(l1_ratio=best_l1_ratio, alpha=best_alpha)
reg.fit(train_x, train_y)
predict_y = reg.predict(predict_x)
train_y_pred = reg.predict(train_x)
return {"y": predict_y, "train_y": train_y_pred, "coef": reg.coef_}
def test_get_errors_param(self):
"""
Test known models we can get the cv errors for alpha selection
"""
# Test original CV models
for model in (RidgeCV, LassoCV, LassoLarsCV, ElasticNetCV):
try:
model = AlphaSelection(model())
X, y = make_regression()
model.fit(X, y)
errors = model._find_errors_param()
self.assertTrue(len(errors) > 0)
except YellowbrickValueError:
self.fail("could not find errors on {}".format(model.name))
def test_regressor_cv(self):
"""
Ensure only "CV" regressors are allowed
"""
for model in (SVR, Ridge, Lasso, LassoLars, ElasticNet):
with self.assertRaises(YellowbrickTypeError):
alphas = AlphaSelection(model())
for model in (RidgeCV, LassoCV, LassoLarsCV, ElasticNetCV):
try:
alphas = AlphaSelection(model())
except YellowbrickTypeError:
self.fail("could not instantiate RegressorCV on alpha selection")
def test_get_alphas_param(self):
"""
Assert that we can get the alphas from ridge, lasso, and elasticnet
"""
alphas = np.logspace(-10, -2, 100)
# Test original CV models
for model in (RidgeCV, LassoCV, ElasticNetCV):
try:
model = AlphaSelection(model(alphas=alphas))
malphas = model._find_alphas_param()
self.assertTrue(np.array_equal(alphas, malphas))
except YellowbrickValueError:
self.fail("could not find alphas on {}".format(model.name))
def predict(self, X):
binary = X > 0
if self.normalize == True:
X = self.norm.fit_transform(X)
num_users, num_movies = X.shape
clf = ElasticNetCV(alphas = [0.1])
predicted = X.copy()
for user in range(num_users):
#bool array for movies rated by user
movie_user = binary[user]
#which users to consider as attributes for regression, in this case all except current user
neighbors = np.ones((num_users), dtype = bool)
neighbors[user] = False
X_train_user = X[neighbors]
X_train_user = X_train_user[:, movie_user].T
y_train_user = X[user, movie_user]
clf.fit(X_train_user, y_train_user)
X_test_user = X[neighbors]
X_test_user = X_test_user[:, ~movie_user].T
predicted[user, ~movie_user] = clf.predict(X_test_user)
if self.normalize == True:
predicted = self.norm.inverse_transform(predicted)
return predicted
def predict(self, demand_fixture_data, params=None, summed=True):
''' Predicts across index using fitted model params
Parameters
----------
demand_fixture_data : pandas.DataFrame
Formatted input data as returned by
:code:`ModelDataFormatter.create_demand_fixture()`
params : dict, default None
Parameters found during model fit. If None, `.fit()` must be called
before this method can be used.
- :code:`X_design_matrix`: patsy design matrix used in
formatting design matrix.
- :code:`formula`: patsy formula used in creating design matrix.
- :code:`coefficients`: ElasticNetCV coefficients.
- :code:`intercept`: ElasticNetCV intercept.
Returns
-------
output : pandas.DataFrame
Dataframe of energy values as given by the fitted model across the
index given in :code:`demand_fixture_data`.
'''
if params is None:
params = self.params
design_info = params["X_design_info"]
model_data = self._model_data_from_demand_fixture_data(
demand_fixture_data)
(X,) = patsy.build_design_matrices([design_info],
model_data,
return_type='dataframe')
model_obj = linear_model.ElasticNetCV(l1_ratio=self.l1_ratio,
fit_intercept=False)
model_obj.coef_ = np.array(params["coefficients"])
model_obj.intercept_ = params["intercept"]
try:
predicted = pd.Series(model_obj.predict(X), index=X.index)
except:
return np.nan, np.nan
if summed:
n = len(predicted)
predicted = np.sum(predicted)
stddev = self.error_fun(n)
variance = stddev ** 2
# Convert to 95% confidence limits
else:
# add NaNs back in
predicted = predicted.reindex(model_data.index)
variance = self.variance
return predicted, variance