def build_ensemble(**kwargs):
"""Generate ensemble."""
ens = SuperLearner(**kwargs)
prep = {'Standard Scaling': [StandardScaler()],
'Min Max Scaling': [MinMaxScaler()],
'No Preprocessing': []}
est = {'Standard Scaling':
[ElasticNet(), Lasso(), KNeighborsRegressor()],
'Min Max Scaling':
[SVR()],
'No Preprocessing':
[RandomForestRegressor(random_state=SEED),
GradientBoostingRegressor()]}
ens.add(est, prep)
ens.add(GradientBoostingRegressor(), meta=True)
return ens
python类ElasticNet()的实例源码
def model_cross_valid(X,Y):
seed = 7
kfold = model_selection.KFold(n_splits=10, random_state=seed)
def bulid_model(model_name):
model = model_name()
return model
scoring = 'neg_mean_squared_error'
# + random fest boost lstm gbdt
for model_name in [LinearRegression,ElasticNet]:
#for model_name in [LinearRegression,Ridge,Lasso,ElasticNet,KNeighborsRegressor,DecisionTreeRegressor,SVR,RandomForestRegressor,AdaBoostRegressor,GradientBoostingRegressor]:
model = bulid_model(model_name)
results = model_selection.cross_val_score(model, X, Y, cv=kfold, scoring=scoring)
print(model_name,results.mean())
def fit_enet(train_X, train_y, test_X):
"""
Use linear regression to predict. Elastic net is LR with L1 and L2
regularisation.
:param train_X:
:param train_y:
:param test_X:
:return:
"""
enet = ElasticNet()
enet.fit(train_X, train_y)
model = "ElasticNet int %.2f coefs %s" % (enet.intercept_, pprint(enet.coef_))
yhat_train = enet.predict(train_X)
yhat_test = enet.predict(test_X)
return model, yhat_train, yhat_test
def train_EN_model(_train_x, train_y, _predict_x):
print_title("ElasticNet")
train_x, predict_x = \
standarize_feature(_train_x, _predict_x)
#l1_ratios = [1e-4, 1e-3, 1e-2, 1e-1]
#l1_ratios = [1e-5, 1e-4, 1e-3]
l1_ratios = [0.9, 0.92, 0.95, 0.97, 0.99]
#l1_ratios = [0.5]
min_mse = 1
for r in l1_ratios:
t1 = time.time()
reg_en = linear_model.ElasticNetCV(
l1_ratio=r, cv=5, n_jobs=4, verbose=1, precompute=True)
reg_en.fit(train_x, train_y)
n_nonzeros = (reg_en.coef_ != 0).sum()
_mse = np.mean(reg_en.mse_path_, axis=1)[
np.where(reg_en.alphas_ == reg_en.alpha_)[0][0]]
if _mse < min_mse:
min_mse = _mse
best_l1_ratio = r
best_alpha = reg_en.alpha_
t2 = time.time()
print("ratio(%e) -- n: %d -- alpha: %f -- mse: %f -- "
"time: %.2f sec" %
(r, n_nonzeros, reg_en.alpha_, _mse, t2 - t1))
print("Best l1_ratio and alpha: %f, %f" % (best_l1_ratio, best_alpha))
# predict_model
reg = linear_model.ElasticNet(l1_ratio=best_l1_ratio, alpha=best_alpha)
reg.fit(train_x, train_y)
predict_y = reg.predict(predict_x)
train_y_pred = reg.predict(train_x)
return {"y": predict_y, "train_y": train_y_pred, "coef": reg.coef_}
cv_ElasticNet.py 文件源码
项目:PySAT_Point_Spectra_GUI
作者: USGS-Astrogeology
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def connectWidgets(self):
en = ElasticNet()
self.minalpha_spin.setValue(0.0000001)
self.maxalpha_spin.setValue(0.01)
self.nalpha_spin.setValue(100)
self.enl1_ratioLineEdit.setText('0.1, 0.5, 0.7, 0.9, 0.95, 0.99, 1.0')
self.enfit_intercept_list.setCurrentItem(self.enfit_intercept_list.findItems(str(en.fit_intercept),QtCore.Qt.MatchExactly)[0])
self.ennormalize_list.setCurrentItem(self.ennormalize_list.findItems(str(en.normalize),QtCore.Qt.MatchExactly)[0])
#self.enprecomputeCheckBox.setChecked(en.precompute)
self.enmax_iterLineEdit.setText(str(en.max_iter))
#self.encopy_XCheckBox.setChecked(en.copy_X)
self.entolLineEdit.setText(str(en.tol))
self.enwarm_start_list.setCurrentItem(self.enwarm_start_list.findItems(str(en.warm_start),QtCore.Qt.MatchExactly)[0])
self.enpositive_list.setCurrentItem(self.enpositive_list.findItems(str(en.positive),QtCore.Qt.MatchExactly)[0])
#self.setComboBox(self.enselectionComboBox, ['cyclic', 'random'])
#self.defaultComboItem(self.enselectionComboBox, en.selection)
def define_model(self):
#if self.modeltype == "AR" :
# return statsmodels.tsa.ar_model.AR(max_order=self.parameters['max_order'])
if self.modeltype == "RandomForest" :
return ensemble.RandomForestRegressor(n_estimators=self.parameters['n_estimators'])
#return ensemble.RandomForestClassifier(
# n_estimators=self.parameters['n_estimators'])
elif self.modeltype == "LinearRegression" :
return linear_model.LinearRegression()
elif self.modeltype == "Lasso" :
return linear_model.Lasso(
alpha=self.parameters['alpha'])
elif self.modeltype == "ElasticNet" :
return linear_model.ElasticNet(
alpha=self.parameters['alpha'],
l1_ratio=self.parameters['l1_ratio'])
elif self.modeltype == "SVR" :
return SVR(
C=self.parameters['C'],
epsilon=self.parameters['epsilon'],
kernel=self.parameters['kernel'])
#elif self.modeltype == 'StaticModel':
# return StaticModel (
# parameters=self.parameters
# )
#elif self.modeltype == 'AdvancedStaticModel':
# return AdvancedStaticModel (
# parameters=self.parameters
# )
# elif self.modeltype == 'SGDRegressor' :
# print(self.parameters)
# return linear_model.SGDRegressor(
# loss=self.parameters['loss'],
# penalty=self.parameters['penalty'],
# l1_ratio=self.parameters['l1_ratio'])
else:
raise ConfigError("Unsupported model {0}".format(self.modeltype))
def test_n_clusters():
# Test that n_clusters param works properly
X, y = make_blobs(n_samples=100, centers=10)
brc1 = Birch(n_clusters=10)
brc1.fit(X)
assert_greater(len(brc1.subcluster_centers_), 10)
assert_equal(len(np.unique(brc1.labels_)), 10)
# Test that n_clusters = Agglomerative Clustering gives
# the same results.
gc = AgglomerativeClustering(n_clusters=10)
brc2 = Birch(n_clusters=gc)
brc2.fit(X)
assert_array_equal(brc1.subcluster_labels_, brc2.subcluster_labels_)
assert_array_equal(brc1.labels_, brc2.labels_)
# Test that the wrong global clustering step raises an Error.
clf = ElasticNet()
brc3 = Birch(n_clusters=clf)
assert_raises(ValueError, brc3.fit, X)
# Test that a small number of clusters raises a warning.
brc4 = Birch(threshold=10000.)
assert_warns(UserWarning, brc4.fit, X)
def model_fit_and_test(TrainX,TrainY,TestX,TestY):
def bulid_model(model_name):
model = model_name()
return model
#for model_name in [LinearRegression, Ridge, Lasso, ElasticNet, KNeighborsRegressor, DecisionTreeRegressor, SVR,RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor]:
for model_name in [LinearRegression, ElasticNet]:
model = bulid_model(model_name)
model.fit(TrainX,TrainY)
print(model_name)
resid = model.predict(TestX) - TestY
#print resid
print("Residual sum of squares: %f"% np.mean(resid ** 2))
#print model.predict(TestX)
#print TestY
# Explained variance score: 1 is perfect prediction
plt.scatter(model.predict(TestX), resid);
plt.axhline(0, color='red')
plt.xlabel('Predicted Values')
plt.ylabel('Residuals')
#plt.xlim([1, 50])
plt.show()
print('Variance score: %.2f' % model.score(TestX, TestY))
from statsmodels.stats.stattools import jarque_bera
_, pvalue, _, _ = jarque_bera(resid)
print ("Test Residuals Normal", pvalue)
from statsmodels import regression, stats
import statsmodels.api as sms
import statsmodels.stats.diagnostic as smd
# xs_with_constant = sms.add_constant(np.column_stack((X1,X2,X3,X4)))
xs_with_constant = sms.add_constant(TestX)
_, pvalue1, _, _ = stats.diagnostic.het_breushpagan(resid, xs_with_constant)
print ("Test Heteroskedasticity", pvalue1)
ljung_box = smd.acorr_ljungbox(resid, lags=10)
#print "Lagrange Multiplier Statistics:", ljung_box[0]
print "Test Autocorrelation P-values:", ljung_box[1]
if any(ljung_box[1] < 0.05):
print "The residuals are autocorrelated."
else:
print "The residuals are not autocorrelated."
def build_ensemble(**kwargs):
"""Generate ensemble."""
ens = SuperLearner(**kwargs)
est = [ElasticNet(copy_X=False),
Lasso(copy_X=False)]
ens.add(est)
ens.add(KNeighborsRegressor())
return ens
def get_regression_coefs(self, category, clf=ElasticNet()):
''' Computes regression score of tdfidf transformed features
Parameters
----------
category : str
category name to score
clf : sklearn regressor
Returns
-------
coefficient array
'''
self._fit_tfidf_model(category, clf)
return clf.coef_
def spot_check(X, y):
if type == 'regression':
models = [
(LinearRegression(), 'Ordinary Least Squares'),
(Ridge(alpha=0.1), 'Ridge (alpha 0.1)'),
(Ridge(), 'Ridge (alpha 1.0)'),
(Lasso(alpha=0.1), 'Lasso (alpha 0.1)'),
(Lasso(), 'Lasso (alpha 1.0)'),
(ElasticNet(alpha=0.1), 'ElasticNet (alpha 0.1)'),
(ElasticNet(), 'ElasticNet (alpha 1.0)'),
(DecisionTreeRegressor(), 'Decision Tree'),
(KNeighborsRegressor(), 'K-Nearest Neighbors'),
# (RandomForestRegressor(), 'Random Forest Regressor'),
# (BaggingRegressor(), 'Bagging Regressor'),
# (GradientBoostingRegressor(), 'Gradient Bosted Regression'),
# (SVR(), 'Support Vector Regression')
]
splits = 5
scores = []
for model, model_name in models:
score = check_model(model, splits, X, y)
# get average score
scores.append(score)
model_names = map(lambda x: x[1], models)
for name, score in zip(model_names, scores):
print('%s: %f' % (name, score))
def test_regressor_cv(self):
"""
Ensure only "CV" regressors are allowed
"""
for model in (SVR, Ridge, Lasso, LassoLars, ElasticNet):
with self.assertRaises(YellowbrickTypeError):
alphas = AlphaSelection(model())
for model in (RidgeCV, LassoCV, LassoLarsCV, ElasticNetCV):
try:
alphas = AlphaSelection(model())
except YellowbrickTypeError:
self.fail("could not instantiate RegressorCV on alpha selection")
def connectWidgets(self):
self.elasticNetCVGroupBox.setHidden(True)
en = ElasticNet()
encv = ElasticNetCV()
self.alpha_text.setText(str(en.alpha))
self.enl1_ratioDoubleSpinBox.setValue(en.l1_ratio)
self.enfit_interceptCheckBox.setChecked(en.fit_intercept)
self.ennormalizeCheckBox.setChecked(en.normalize)
self.enprecomputeCheckBox.setChecked(en.precompute)
self.enmax_iterSpinBox.setValue(en.max_iter)
self.encopy_XCheckBox.setChecked(en.copy_X)
self.entolDoubleSpinBox.setValue(en.tol)
self.enwarm_startCheckBox.setChecked(en.warm_start)
self.enpositiveCheckBox.setChecked(en.positive)
self.setComboBox(self.enselectionComboBox, ['cyclic', 'random'])
self.defaultComboItem(self.enselectionComboBox, en.selection)
self.l1_ratioDoubleSpinBox.setValue(encv.l1_ratio)
self.epsDoubleSpinBox.setValue(encv.eps)
self.n_alphasSpinBox.setValue(encv.n_alphas)
self.alphasLineEdit.setText('None')
self.fit_interceptCheckBox.setChecked(encv.fit_intercept)
self.normalizeCheckBox.setChecked(encv.normalize)
self.setComboBox(self.precomputeComboBox, ['True', 'False', 'auto', 'array-like'])
self.defaultComboItem(self.precomputeComboBox, encv.precompute)
self.max_iterSpinBox.setValue(encv.max_iter)
self.tolDoubleSpinBox.setValue(encv.tol)
self.cVSpinBox.setValue(3)
self.copy_XCheckBox.setChecked(encv.copy_X)
self.verboseCheckBox.setChecked(encv.verbose)
self.n_jobsSpinBox.setValue(encv.n_jobs)
self.positiveCheckBox.setChecked(encv.positive)
self.setComboBox(self.selectionComboBox, ['cyclic', 'random'])
self.defaultComboItem(self.selectionComboBox, encv.selection)
def build_model(train_file, attr_file, model_out, algorithm='ridge'):
classifiers = ['ridge', 'linear', 'lasso', 'rf', 'en']
if algorithm not in classifiers:
raise NotImplementedError("only implemented algorithms: " + str(classifiers))
train_data = pd.read_pickle(train_file)
attrs = read_attrs(attr_file)
target_attr = attrs[0]
usable_attrs = attrs[1:]
if algorithm == 'ridge':
clf = Ridge()
elif algorithm == 'linear':
clf = LinearRegression()
elif algorithm == 'lasso':
clf = Lasso()
elif algorithm == 'en':
clf = ElasticNet()
else:
clf = RandomForestRegressor()
logger.debug("Modeling '%s'", target_attr)
logger.debug(" train set (%d): %s", len(train_data), train_file)
logger.debug(" Algorithm: %s", algorithm)
if hasattr(clf, 'coef_'):
logger.debug('Coefficients:')
for i,c in enumerate(clf.coef_):
logger.debug(' %-20s' % usable_attrs[i] + ':', '%20.4f' % c)
clf.fit(train_data[usable_attrs], train_data[target_attr])
pickle.dump(clf, open(model_out, 'wb'))
def gs_ElasticNet( xM, yV, en_params):
print(xM.shape, yV.shape)
clf = linear_model.ElasticNet()
kf5_c = model_selection.KFold( n_splits = 5, shuffle=True)
kf5 = kf5_c.split( xM)
gs = model_selection.GridSearchCV( clf, en_params, scoring = 'r2', cv = kf5_c, n_jobs = -1)
gs.fit( xM, yV)
return gs
def gs_ElasticNet( xM, yV, en_params):
print(xM.shape, yV.shape)
clf = linear_model.ElasticNet()
kf5_c = model_selection.KFold( n_folds=5, shuffle=True)
kf5 = kf5_c.split( xM)
gs = model_selection.GridSearchCV( clf, en_params, scoring = 'r2', cv = kf5, n_jobs = -1)
gs.fit( xM, yV)
return gs
def gs_ElasticNet( xM, yV, en_params):
print(xM.shape, yV.shape)
clf = linear_model.ElasticNet()
kf5_c = model_selection.KFold( n_splits = 5, shuffle=True)
kf5 = kf5_c.split( xM)
gs = model_selection.GridSearchCV( clf, en_params, scoring = 'r2', cv = kf5_c, n_jobs = -1)
gs.fit( xM, yV)
return gs
def gen_WR_pilot_ch(self, pilot_SNRdB, alpha_l1r = 0, model = "Ridge"):
"""
The reception process with pilot channel estimation
is conducted.
"""
Npilot = self.Npilot
SNRpilot = db2var( pilot_SNRdB)
BPSK, s_a, x_flat_a, x_a = gen_BPSK( Npilot, self.Nt)
# H_a = gen_H( self.Nr, self.Nt)
# H_a = self.H_a
y_a = gen_Rx( self.Nr, Npilot, SNRpilot, self.H_a, x_a)
yT_a = y_a.T
# print( x_a.shape, yT_a.shape)
# Now you can use either Ridge or Lasso methods.
#lm = linear_model.Ridge( alpha)
if model == "ElasticNet":
lm = linear_model.ElasticNet( alpha_l1r[0], alpha_l1r[1])
else:
lm = getattr( linear_model, model)(alpha_l1r)
lm.fit( yT_a, x_a)
self.W_a = lm.coef_
# print( "np.dot( W_a, H_a) =", np.dot( self.W_a, self.H_a))
self.gen_Decoding()
def gen_WR_pilot_only(self, alpha_l1r = 0):
"""
yT_a and x_a was prepared already.
Now, W_a is calculated using alpha and then,
decode data.
For linear regression, alpha_l1r should not be specified except 0.
"""
yT_a = self.rx_p["yT_a"]
x_a = self.rx_p["x_a"]
# for alpha == 0, model is changed to linear regression.
if alpha_l1r == 0:
model = "LinearRegression"
else:
model = self.model
if model == "LinearRegression":
lm = linear_model.LinearRegression()
elif model == "ElasticNet":
lm = linear_model.ElasticNet( alpha_l1r[0], alpha_l1r[1])
else: # This is either Ridge or Lasso
lm = getattr( linear_model, model)(alpha_l1r)
lm.fit( yT_a, x_a)
self.W_a = lm.coef_
# print( "np.dot( W_a, H_a) =", np.dot( self.W_a, self.H_a))
self.gen_Decoding()
def gs_ElasticNet(xM, yV, en_params):
print(xM.shape, yV.shape)
clf = linear_model.ElasticNet()
kf5_c = model_selection.KFold(n_splits=5, shuffle=True)
kf5 = kf5_c.split(xM)
gs = model_selection.GridSearchCV(
clf, en_params, scoring='r2', cv=kf5, n_jobs=-1)
gs.fit(xM, yV)
return gs
jgrid (james-90X3A's conflicted copy 2016-04-21).py 文件源码
项目:jamespy_py3
作者: jskDr
项目源码
文件源码
阅读 35
收藏 0
点赞 0
评论 0
def gs_ElasticNet( xM, yV, en_params):
print(xM.shape, yV.shape)
clf = linear_model.ElasticNet()
kf5 = cross_validation.KFold( xM.shape[0], n_folds=5, shuffle=True)
gs = grid_search.GridSearchCV( clf, en_params, scoring = 'r2', cv = kf5, n_jobs = -1)
gs.fit( xM, yV)
return gs
def gs_ElasticNet( xM, yV, en_params):
print(xM.shape, yV.shape)
clf = linear_model.ElasticNet()
kf5 = cross_validation.KFold( xM.shape[0], n_folds=5, shuffle=True)
gs = grid_search.GridSearchCV( clf, en_params, scoring = 'r2', cv = kf5, n_jobs = -1)
gs.fit( xM, yV)
return gs
def getModels():
result = []
result.append("LinearRegression")
result.append("BayesianRidge")
result.append("ARDRegression")
result.append("ElasticNet")
result.append("HuberRegressor")
result.append("Lasso")
result.append("LassoLars")
result.append("Rigid")
result.append("SGDRegressor")
result.append("SVR")
result.append("MLPClassifier")
result.append("KNeighborsClassifier")
result.append("SVC")
result.append("GaussianProcessClassifier")
result.append("DecisionTreeClassifier")
result.append("RandomForestClassifier")
result.append("AdaBoostClassifier")
result.append("GaussianNB")
result.append("LogisticRegression")
result.append("QuadraticDiscriminantAnalysis")
return result
def test_ElasticNet(*data):
'''
test for Elastic Net
:param data: train_data, test_data, train_value, test_value
:return: None
'''
X_train,X_test,y_train,y_test=data
regr = linear_model.ElasticNet()
regr.fit(X_train, y_train)
print('Coefficients:{0}, intercept {1}'.format(regr.coef_, regr.intercept_))
print("Residual sum of squares: {0}".format(np.mean((regr.predict(X_test) - y_test) ** 2)))
print('Score: {0}'.format(regr.score(X_test, y_test)))
def test_ElasticNet_alpha_rho(*data):
'''
test score with different alpha and l1_ratio
:param data: train_data, test_data, train_value, test_value
:return: None
'''
X_train,X_test,y_train,y_test=data
alphas=np.logspace(-2,2)
rhos=np.linspace(0.01,1)
scores=[]
for alpha in alphas:
for rho in rhos:
regr = linear_model.ElasticNet(alpha=alpha,l1_ratio=rho)
regr.fit(X_train, y_train)
scores.append(regr.score(X_test, y_test))
## graph
alphas, rhos = np.meshgrid(alphas, rhos)
scores=np.array(scores).reshape(alphas.shape)
from mpl_toolkits.mplot3d import Axes3D # this part works well in py3
from matplotlib import cm
fig=plt.figure()
ax=Axes3D(fig)
surf = ax.plot_surface(alphas, rhos, scores, rstride=1, cstride=1, cmap=cm.jet,
linewidth=0, antialiased=False)
fig.colorbar(surf, shrink=0.5, aspect=5)
ax.set_xlabel(r"$\alpha$")
ax.set_ylabel(r"$\rho$")
ax.set_zlabel("score")
ax.set_title("ElasticNet")
plt.show()
def test_elasticnet_convergence(self):
# Check that the SGD output is consistent with coordinate descent
n_samples, n_features = 1000, 5
rng = np.random.RandomState(0)
X = np.random.randn(n_samples, n_features)
# ground_truth linear model that generate y from X and to which the
# models should converge if the regularizer would be set to 0.0
ground_truth_coef = rng.randn(n_features)
y = np.dot(X, ground_truth_coef)
# XXX: alpha = 0.1 seems to cause convergence problems
for alpha in [0.01, 0.001]:
for l1_ratio in [0.5, 0.8, 1.0]:
cd = linear_model.ElasticNet(alpha=alpha, l1_ratio=l1_ratio,
fit_intercept=False)
cd.fit(X, y)
sgd = self.factory(penalty='elasticnet', n_iter=50,
alpha=alpha, l1_ratio=l1_ratio,
fit_intercept=False)
sgd.fit(X, y)
err_msg = ("cd and sgd did not converge to comparable "
"results for alpha=%f and l1_ratio=%f"
% (alpha, l1_ratio))
assert_almost_equal(cd.coef_, sgd.coef_, decimal=2,
err_msg=err_msg)
def test_n_clusters():
# Test that n_clusters param works properly
X, y = make_blobs(n_samples=100, centers=10)
brc1 = Birch(n_clusters=10)
brc1.fit(X)
assert_greater(len(brc1.subcluster_centers_), 10)
assert_equal(len(np.unique(brc1.labels_)), 10)
# Test that n_clusters = Agglomerative Clustering gives
# the same results.
gc = AgglomerativeClustering(n_clusters=10)
brc2 = Birch(n_clusters=gc)
brc2.fit(X)
assert_array_equal(brc1.subcluster_labels_, brc2.subcluster_labels_)
assert_array_equal(brc1.labels_, brc2.labels_)
# Test that the wrong global clustering step raises an Error.
clf = ElasticNet()
brc3 = Birch(n_clusters=clf)
assert_raises(ValueError, brc3.fit, X)
# Test that a small number of clusters raises a warning.
brc4 = Birch(threshold=10000.)
assert_warns(UserWarning, brc4.fit, X)
def submit(self):
""""""
## retrain with the whole training data
self.TrainData = self.TrainData[(self.TrainData['logerror'] > self._low) & (self.TrainData['logerror'] < self._up)]
self.TrainData['longitude'] -= -118600000
self.TrainData['latitude'] -= 34220000
X = self.TrainData.drop(self._l_drop_cols, axis=1)
Y = self.TrainData['logerror']
X = X.values.astype(np.float32, copy=False)
en = ElasticNet(alpha= self._alpha, l1_ratio = self._ratio, max_iter= self._iter, tol= 1e-4, selection= self._sel, random_state= 2017)
self._model = en.fit(X, Y)
del self.TrainData, X, Y
gc.collect()
self.TestData = self._data.LoadFromHdfFile(self.InputDir, 'test')
#self.TestData = self.TestData.sample(frac = 0.01)
self._sub = pd.DataFrame(index=self.TestData.index)
self._sub['ParcelId'] = self.TestData['parcelid']
self.TestData['longitude'] -= -118600000
self.TestData['latitude'] -= 34220000
N = 200000
start = time.time()
for d in self._l_test_predict_columns:
s0 = time.time()
print('Prediction for column %s ' % d)
l_test_columns = ['%s%s' % (c, d) if (c in ['lastgap', 'monthyear', 'buildingage']) else c for c in
self._l_train_columns]
x_test = self.TestData[l_test_columns]
for idx in range(0, len(x_test), N):
x_test_block = x_test[idx:idx + N].values.astype(np.float32, copy=False)
ret = self._model.predict(x_test_block)# * 0.99 + 0.011 * 0.01
self._sub.loc[x_test[idx:idx + N].index, d] = ret
print(np.mean(np.abs(ret)))
e0 = time.time()
print('Prediction for column %s is done. time elapsed %ds' % (d, (e0 - s0)))
## clean
del self.TestData
gc.collect()
end = time.time()
print('Prediction is done. time elapsed %ds' % (end - start))
if (os.path.exists(self.OutputDir) == False):
os.makedirs(self.OutputDir)
self._sub.to_csv(
'{0}/{1}_{2}.csv'.format(self.OutputDir, self.__class__.__name__, datetime.now().strftime('%Y%m%d-%H:%M:%S')),
index=False, float_format='%.4f')
def build(self, dataset):
evaluators = []
cv = 5 # todo: have to adjust to dataset size
if self.field_manager.target.is_categorizable():
parameter_candidates = [
{"kernel": ["linear"], "C": [1, 10, 100]},
{"kernel": ["rbf"], "gamma": [1e-1, 1e-2, 1e-3, 1e-4], "C": [1, 10, 100]}
]
# todo: have to think about scoring parameter (default is accuracy, so f1 related score may be appropriate)
evaluator = GridSearchCV(
SVC(C=1),
parameter_candidates,
cv=cv
)
evaluators.append(evaluator)
else:
evaluator1 = GridSearchCV(
linear_model.ElasticNet(),
{"alpha": [0.1, 0.5, 0.7, 1], "l1_ratio": [(r + 1) / 10 for r in range(10)]},
cv=cv
)
parameter_candidates = [
{"kernel": ["rbf"], "gamma": [1e-3, 1e-4], "C": [1, 10, 100]}
]
# todo: have to think about scoring parameter (default is accuracy, so f1 related score may be appropriate)
evaluator2 = GridSearchCV(
SVR(C=1),
parameter_candidates,
cv=cv
)
evaluators.append(evaluator1)
evaluators.append(evaluator2)
self.model_score = 0
self.model = None
for e in evaluators:
e.fit(dataset.data, dataset.target)
if e.best_score_ > self.model_score:
self.model_score = e.best_score_
self.model = e.best_estimator_
def function(self):
p_attrib = {'False': False, 'True': True, 'Array-like': 'array-like'}
r_attrib = {'None': None}
# TODO Add back the random state later.
# try:
# r_state = int(self.randomStateLineEdit.text())
# except:
# r_state = r_attrib[self.randomStateLineEdit.text()]
index = self.precomputeComboBox.currentIndex()
precomputeComboBox = self.precomputeComboBox.itemText(index)
if self.CVCheckBox.isChecked():
params = {
'l1_ratio': self.l1_ratioDoubleSpinBox.value(),
'eps': self.epsDoubleSpinBox.value(),
'n_alphas': self.n_alphasSpinBox.value(),
'alphas': {'None': None}.get(self.alphasLineEdit.text()),
'fit_intercept': self.fit_interceptCheckBox.isChecked(),
'normalize': self.normalizeCheckBox.isChecked(),
'precompute': self.precomputeComboBox.currentText(),
'max_iter': self.max_iterSpinBox.value(),
'tol': self.max_iterSpinBox.value(),
'cv': self.cVSpinBox.value(),
'copy_X': self.copy_XCheckBox.isChecked(),
'verbose': self.verboseCheckBox.isChecked(),
'n_jobs': self.n_jobsSpinBox.value(),
'positive': self.positiveCheckBox.isChecked(),
'selection': self.selectionComboBox.currentText(),
'CV': self.CVCheckBox.isChecked()}
return params, self.getChangedValues(params, ElasticNetCV())
else:
params = {
'alpha': self.alpha_text.value(),
'l1_ratio': self.enl1_ratioDoubleSpinBox.value(),
'fit_intercept': self.enfit_interceptCheckBox.isChecked(),
'normalize': self.ennormalizeCheckBox.isChecked(),
'precompute': self.enprecomputeCheckBox.isChecked(),
'max_iter': self.enmax_iterSpinBox.value(),
'copy_X': self.encopy_XCheckBox.isChecked(),
'tol': self.entolDoubleSpinBox.value(),
'warm_start': self.enwarm_startCheckBox.isChecked(),
'positive': self.enpositiveCheckBox.isChecked(),
'selection': self.selectionComboBox.currentText(),
'CV': self.CVCheckBox.isChecked()}
return params, self.getChangedValues(params, ElasticNet())