def refit_model(self):
"""Learns a new surrogate model using the data observed so far.
"""
# only fit the model if there is data for it.
if len(self.known_models) > 0:
self._build_feature_maps(self.known_models, self.ngram_maxlen, self.thres)
X = sp.vstack([ self._compute_features(mdl)
for mdl in self.known_models], "csr")
y = np.array(self.known_scores, dtype='float64')
#A = np.dot(X.T, X) + lamb * np.eye(X.shape[1])
#b = np.dot(X.T, y)
self.surr_model = lm.Ridge(self.lamb_ridge)
self.surr_model.fit(X, y)
# NOTE: if the search space has holes, it break. needs try/except module.
python类Ridge()的实例源码
def model_cross_valid(X,Y):
seed = 7
kfold = model_selection.KFold(n_splits=10, random_state=seed)
def bulid_model(model_name):
model = model_name()
return model
scoring = 'neg_mean_squared_error'
# + random fest boost lstm gbdt
for model_name in [LinearRegression,ElasticNet]:
#for model_name in [LinearRegression,Ridge,Lasso,ElasticNet,KNeighborsRegressor,DecisionTreeRegressor,SVR,RandomForestRegressor,AdaBoostRegressor,GradientBoostingRegressor]:
model = bulid_model(model_name)
results = model_selection.cross_val_score(model, X, Y, cv=kfold, scoring=scoring)
print(model_name,results.mean())
def parameterChoosing(self):
# Set the parameters by cross-validation
tuned_parameters = [{'alpha': np.logspace(-5,5)
}
]
reg = GridSearchCV(linear_model.Ridge(alpha = 0.5), tuned_parameters, cv=5, scoring='mean_squared_error')
reg.fit(self.X_train, self.y_train)
print "Best parameters set found on development set:\n"
print reg.best_params_
print "Grid scores on development set:\n"
for params, mean_score, scores in reg.grid_scores_:
print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)
print reg.scorer_
print "MSE for test data set:"
y_true, y_pred = self.y_test, reg.predict(self.X_test)
print mean_squared_error(y_pred, y_true)
def solveSingle(self,inputDF,outputDict,rho,beta_target):
I,J,V,Y=[],[],[],[]
fd = {} # mapping feature names to consecutive integers, starting with 0
for i,(id, x) in enumerate(inputDF.items()):
l = outputDict.get(id)
for k,v in x.items():
I.append(i)
J.append(k)
V.append(v)
upd(fd,k)
Y.append(l)
J = map(lambda k: fd[k], J)
X = sparse.coo_matrix((V,(I,J)),shape=(I[-1]+1,len(fd)))
fd_reverse = [k for k,v in sorted(fd.items(), key = lambda t: t[1])]
# y_new = y - X . beta_target
# converting a proximal least square problem to a ridge regression
ZmUl = np.array([beta_target.get(k,0) for k in fd_reverse])
y_new = np.array(Y) - X * ZmUl
ridge = Ridge(alpha = rho , fit_intercept=False)
ret = ridge.fit(X,y_new)
#ret = self.lr.fit(X,y_new)
# ordered list of feature names according to their integer ids in fd
#raise ValueError('fd_reverse = %s \n X = %s \n J = %s \n I = %s \n V = %s \n Y = %s \n y_new = %s \n ret.coef_ = %s \n ZmUl = %s \n'\
# %(str(fd_reverse), str(X), str(J), str(I), str(V), str(Y), str(y_new), str(ret.coef_), str(ZmUl)))
return dict(zip(fd_reverse, (ret.coef_ + ZmUl).tolist()))
def test_classes__property():
# Test that classes_ property matches best_estimator_.classes_
X = np.arange(100).reshape(10, 10)
y = np.array([0] * 5 + [1] * 5)
Cs = [.1, 1, 10]
grid_search = dcv.GridSearchCV(LinearSVC(random_state=0), {'C': Cs})
grid_search.fit(X, y)
assert_array_equal(grid_search.best_estimator_.classes_,
grid_search.classes_)
# Test that regressors do not have a classes_ attribute
grid_search = dcv.GridSearchCV(Ridge(), {'alpha': [1.0, 2.0]})
grid_search.fit(X, y)
assert not hasattr(grid_search, 'classes_')
# Test that the grid searcher has no classes_ attribute before it's fit
grid_search = dcv.GridSearchCV(LinearSVC(random_state=0), {'C': Cs})
assert not hasattr(grid_search, 'classes_')
# Test that the grid searcher has no classes_ attribute without a refit
grid_search = dcv.GridSearchCV(LinearSVC(random_state=0),
{'C': Cs}, refit=False)
grid_search.fit(X, y)
assert not hasattr(grid_search, 'classes_')
def fit_regression(X, y, regression_class=LinearRegression, regularization_const=.001):
'''
Given a dataset and some solutions (X, y) a regression class (from scikit learn)
and an Lambda which is required if the regression class is Lasso or Ridge
X (pandas DataFrame): The data.
y (pandas DataFrame or Series): The answers.
regression_class (class): One of sklearn.linear_model.[LinearRegression, Ridge, Lasso]
regularization_const: the regularization_const value (regularization parameter) for Ridge or Lasso.
Called alpha by scikit learn for interface reasons.
Return:
tuple, (the_fitted_regressor, mean(cross_val_score)).
'''
if regression_class is LinearRegression:
predictor = regression_class()
else:
predictor = regression_class(alpha=regularization_const, normalize=True)
predictor.fit(X, y)
cross_scores = cross_val_score(predictor, X, y=y, scoring='neg_mean_squared_error')
cross_scores_corrected = np.sqrt(-1 * cross_scores) # Scikit learn returns negative vals && we need root
return (predictor, np.mean(cross_scores_corrected))
def train_ridge_linear_model(_train_x, train_y, _predict_x,
sample_weight=None):
print_title("Ridge Regressor")
train_x, predict_x = \
standarize_feature(_train_x, _predict_x)
# using the default CV
alphas = [0.1, 1, 10, 100, 1e3, 1e4, 2e4, 5e4, 8e4, 1e5, 1e6, 1e7, 1e8]
reg = linear_model.RidgeCV(alphas=alphas, store_cv_values=True)
#reg.fit(train_x, train_y, sample_weight=sample_weight)
reg.fit(train_x, train_y)
cv_mse = np.mean(reg.cv_values_, axis=0)
print("alphas: %s" % alphas)
print("CV MSE: %s" % cv_mse)
print("Best alpha using built-in RidgeCV: %f" % reg.alpha_)
# generate the prediction using the best model
alpha = reg.alpha_
reg = linear_model.Ridge(alpha=alpha)
#reg.fit(train_x, train_y, sample_weight=sample_weight)
reg.fit(train_x, train_y)
predict_y = reg.predict(predict_x)
train_y_pred = reg.predict(train_x)
return {"y": predict_y, "train_y": train_y_pred, "coef": reg.coef_}
def test_clusterer_enforcement(self):
"""
Assert that only clustering estimators can be passed to cluster viz
"""
nomodels = [
SVC, SVR, Ridge, RidgeCV, LinearRegression, RandomForestClassifier
]
for nomodel in nomodels:
with self.assertRaises(YellowbrickTypeError):
visualizer = ClusteringScoreVisualizer(nomodel())
models = [
KMeans, MiniBatchKMeans, AffinityPropagation, MeanShift, DBSCAN, Birch
]
for model in models:
try:
visualizer = ClusteringScoreVisualizer(model())
except YellowbrickTypeError:
self.fail("could not pass clustering estimator to visualizer")
def residual_smooth(trajectory, reg_alpha, back_horizon):
# Alternative method to calculate the smooth coefficients: try to fit y-values directly to explain smoothness
clf = linear_model.Ridge(alpha = reg_alpha)
residual_ar_seg = np.empty(shape = [trajectory.shape[0],back_horizon]) #initialize an empty array to hold the autoregressed position values
residual = trajectory.copy() #initialize position vector to simply be the output vector
for item in inPlay:
for i in range(back_horizon):
temp = np.roll(residual[item[0]:(item[1]+1)],i+1)
for j in range(i+1):
temp[j] = 0
residual_ar_seg[item[0]:(item[1]+1),i] = temp.copy()
rows_to_delete = []
for item in inPlay:
for i in range(2*back_horizon):
rows_to_delete.append(item[0]+i)
residual = np.delete(residual, rows_to_delete,0)
residual_ar_seg = np.delete(residual_ar_seg, rows_to_delete,0)
# Use least square regression to find the best fit set of coefficients for the velocity vectors
#position_smooth_interpolate = np.linalg.lstsq(position_ar_seg,position)[0]
#Note that in practice, the outcome of position_smooth_coeff and position_smooth_interpolate seem to be quite similar
clf.fit(residual_ar_seg,residual) # addition to switch from velocity to position
residual_smooth_interpolate = clf.coef_ # addition to switch from velocity to position
return residual_smooth_interpolate
def __init__(self,
probabilistic_estimator,
stepsize=0.01,
verbose=0,
fit_intercept=False,
sparse_output=True,
**ridge_params
):
"""
Arguments:
probabilistic_estimator -- Estimator capable of predict_proba
Keyword Arguments:
average -- averaging method for f1 score
stepsize -- stepsize for the exhaustive search of optimal threshold
fit_intercept -- fit intercept in Ridge regression
sparse_output -- Predict returns csr in favor of ndarray
**ridge_params -- Passed down to Ridge regression
"""
self.model = probabilistic_estimator
self.verbose = verbose
self.ridge = Ridge(fit_intercept=fit_intercept, **ridge_params)
self.stepsize = stepsize
self.sparse_output = sparse_output
def cv_SVR( xM, yV, svr_params, n_splits = 5, n_jobs = -1, grid_std = None, graph = True, shuffle = True):
"""
method can be 'Ridge', 'Lasso'
cross validation is performed so as to generate prediction output for all input molecules
"""
print(xM.shape, yV.shape)
clf = svm.SVR( **svr_params)
kf_n_c = model_selection.KFold( n_splits=n_splits, shuffle=shuffle)
kf_n = kf5_ext_c.split( xM)
yV_pred = model_selection.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)
if graph:
print('The prediction output using cross-validation is given by:')
jutil.cv_show( yV, yV_pred, grid_std = grid_std)
return yV_pred
def _cv_r0( method, xM, yV, alpha, n_splits = 5, n_jobs = -1, grid_std = None, graph = True):
"""
method can be 'Ridge', 'Lasso'
cross validation is performed so as to generate prediction output for all input molecules
"""
print(xM.shape, yV.shape)
clf = getattr( linear_model, method)( alpha = alpha)
kf_n_c = model_selection.KFold( n_splits = n_splits, shuffle=True)
kf_n = kf5_ext_c.split( xM)
yV_pred = model_selection.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)
if graph:
print('The prediction output using cross-validation is given by:')
jutil.cv_show( yV, yV_pred, grid_std = grid_std)
return yV_pred
def cvLOO( method, xM, yV, alpha, n_jobs = -1, grid_std = None, graph = True):
"""
method can be 'Ridge', 'Lasso'
cross validation is performed so as to generate prediction output for all input molecules
"""
n_splits = xM.shape[0]
# print(xM.shape, yV.shape)
clf = getattr( linear_model, method)( alpha = alpha)
kf_n = model_selection.KFold( xM.shape[0], n_splits=n_splits)
yV_pred = model_selection.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)
if graph:
print('The prediction output using cross-validation is given by:')
jutil.cv_show( yV, yV_pred, grid_std = grid_std)
return yV_pred
def mlr_val_vseq_ridge( RM, yE, v_seq, alpha = .5, disp = True, graph = True):
"""
Validation is peformed using vseq indexed values.
"""
org_seq = list(range( len( yE)))
t_seq = [x for x in org_seq if x not in v_seq]
RMt, yEt = RM[ t_seq, :], yE[ t_seq, 0]
RMv, yEv = RM[ v_seq, :], yE[ v_seq, 0]
clf = linear_model.Ridge( alpha = alpha)
clf.fit( RMt, yEt)
if disp: print('Training result')
mlr_show( clf, RMt, yEt, disp = disp, graph = graph)
if disp: print('Validation result')
r_sqr, RMSE = mlr_show( clf, RMv, yEv, disp = disp, graph = graph)
#if r_sqr < 0:
# print 'v_seq:', v_seq, '--> r_sqr = ', r_sqr
return r_sqr, RMSE
def cv_SVR( xM, yV, svr_params, n_splits = 5, n_jobs = -1, grid_std = None, graph = True, shuffle = True):
"""
method can be 'Ridge', 'Lasso'
cross validation is performed so as to generate prediction output for all input molecules
"""
print(xM.shape, yV.shape)
clf = svm.SVR( **svr_params)
kf_n_c = model_selection.KFold( n_splits=n_splits, shuffle=shuffle)
kf_n = kf5_ext_c.split( xM)
yV_pred = model_selection.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)
if graph:
print('The prediction output using cross-validation is given by:')
kutil.cv_show( yV, yV_pred, grid_std = grid_std)
return yV_pred
def _cv_r0( method, xM, yV, alpha, n_splits = 5, n_jobs = -1, grid_std = None, graph = True):
"""
method can be 'Ridge', 'Lasso'
cross validation is performed so as to generate prediction output for all input molecules
"""
print(xM.shape, yV.shape)
clf = getattr( linear_model, method)( alpha = alpha)
kf_n_c = model_selection.KFold( n_splits = n_splits, shuffle=True)
kf_n = kf5_ext_c.split( xM)
yV_pred = model_selection.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)
if graph:
print('The prediction output using cross-validation is given by:')
kutil.cv_show( yV, yV_pred, grid_std = grid_std)
return yV_pred
def cv( method, xM, yV, alpha, n_splits = 5, n_jobs = -1, grid_std = None, graph = True, shuffle = True):
"""
method can be 'Ridge', 'Lasso'
cross validation is performed so as to generate prediction output for all input molecules
"""
print(xM.shape, yV.shape)
clf = getattr( linear_model, method)( alpha = alpha)
kf_n_c = model_selection.KFold( n_splits=n_splits, shuffle=shuffle)
kf_n = kf_n_c.split( xM)
yV_pred = model_selection.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)
if graph:
print('The prediction output using cross-validation is given by:')
kutil.cv_show( yV, yV_pred, grid_std = grid_std)
return yV_pred
def mlr_val_vseq_ridge( RM, yE, v_seq, alpha = .5, disp = True, graph = True):
"""
Validation is peformed using vseq indexed values.
"""
org_seq = list(range( len( yE)))
t_seq = [x for x in org_seq if x not in v_seq]
RMt, yEt = RM[ t_seq, :], yE[ t_seq, 0]
RMv, yEv = RM[ v_seq, :], yE[ v_seq, 0]
clf = linear_model.Ridge( alpha = alpha)
clf.fit( RMt, yEt)
if disp: print('Training result')
mlr_show( clf, RMt, yEt, disp = disp, graph = graph)
if disp: print('Validation result')
r_sqr, RMSE = mlr_show( clf, RMv, yEv, disp = disp, graph = graph)
#if r_sqr < 0:
# print 'v_seq:', v_seq, '--> r_sqr = ', r_sqr
return r_sqr, RMSE
def predict( self, new_smiles, mode = {'tool': 'sklearn', 'type': 'ridge', 'alpha': 0.5}):
"""
predict for new smiles codes
"""
if mode['type'].lower() == 'ridge':
clf = linear_model.Ridge( alpha = mode['alpha'])
else:
raise TypeError('The requested mode is not supported yet.')
#Find an weight vector
clf.fit( self.xM, self.yV)
#Predict for new molecules
new_xM = jchem.gfpM( new_smiles)
new_yV_pred = clf.predict( new_xM)
return new_yV_pred
def predict( self, new_smiles, mode = {'tool': 'sklearn', 'type': 'ridge', 'alpha': 0.5}):
"""
predict for new smiles codes
"""
if mode['type'].lower() == 'ridge':
clf = linear_model.Ridge( alpha = mode['alpha'])
else:
raise TypeError('The requested mode is not supported yet.')
#Find an weight vector
clf.fit( self.xM, self.yV)
#Predict for new molecules
new_xM = jchem.gfpM( new_smiles)
new_yV_pred = clf.predict( new_xM)
return new_yV_pred
def gs_Ridge(xM, yV, alphas_log=(1, -1, 9), n_folds=5, n_jobs=-1, scoring='r2'):
"""
Parameters
-------------
scoring: mean_absolute_error, mean_squared_error, median_absolute_error, r2
"""
print('If scoring is not r2 but error metric, output score is revered for scoring!')
print(xM.shape, yV.shape)
clf = linear_model.Ridge()
#parmas = {'alpha': np.logspace(1, -1, 9)}
parmas = {'alpha': np.logspace(*alphas_log)}
kf_n_c = model_selection.KFold(n_splits=n_folds, shuffle=True)
kf_n = kf_n_c.split(xM)
gs = model_selection.GridSearchCV(
clf, parmas, scoring=scoring, cv=kf_n, n_jobs=n_jobs)
gs.fit(xM, yV)
return gs
def cv(method, xM, yV, alpha, n_folds=5, n_jobs=-1, grid_std=None, graph=True, shuffle=True):
"""
method can be 'Ridge', 'Lasso'
cross validation is performed so as to generate prediction output for all input molecules
Return
--------
yV_pred
"""
print(xM.shape, yV.shape)
clf = getattr(linear_model, method)(alpha=alpha)
kf_n_c = model_selection.KFold(n_splits=n_folds, shuffle=True)
kf_n = kf_n_c.split(xM)
yV_pred = model_selection.cross_val_predict(
clf, xM, yV, cv=kf_n, n_jobs=n_jobs)
if graph:
print('The prediction output using cross-validation is given by:')
jutil.cv_show(yV, yV_pred, grid_std=grid_std)
return yV_pred
def _cv_LOO_r0(method, xM, yV, alpha, n_jobs=-1, grid_std=None, graph=True):
"""
method can be 'Ridge', 'Lasso'
cross validation is performed so as to generate prediction output for all input molecules
"""
n_folds = xM.shape[0]
print(xM.shape, yV.shape)
clf = getattr(linear_model, method)(alpha=alpha)
# print("Note - shuffling is not applied because of LOO.")
kf_n_c = model_selection.KFold(n_splits=n_folds)
kf_n = kf_n_c.split(xM)
yV_pred = model_selection.cross_val_predict(
clf, xM, yV, cv=kf_n, n_jobs=n_jobs)
if graph:
print('The prediction output using cross-validation is given by:')
jutil.cv_show(yV, yV_pred, grid_std=grid_std)
return yV_pred
jgrid (james-90X3A's conflicted copy 2016-04-21).py 文件源码
项目:jamespy_py3
作者: jskDr
项目源码
文件源码
阅读 27
收藏 0
点赞 0
评论 0
def cv( method, xM, yV, alpha, n_folds = 5, n_jobs = -1, grid_std = None, graph = True):
"""
method can be 'Ridge', 'Lasso'
cross validation is performed so as to generate prediction output for all input molecules
"""
print(xM.shape, yV.shape)
clf = getattr( linear_model, method)( alpha = alpha)
kf_n = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=True)
yV_pred = cross_validation.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)
if graph:
print('The prediction output using cross-validation is given by:')
jutil.cv_show( yV, yV_pred, grid_std = grid_std)
return yV_pred
def gs_Ridge( xM, yV, alphas_log = (1, -1, 9), n_folds = 5, n_jobs = -1, scoring = 'r2'):
"""
Parameters
-------------
scoring: mean_absolute_error, mean_squared_error, median_absolute_error, r2
"""
print(xM.shape, yV.shape)
clf = linear_model.Ridge()
#parmas = {'alpha': np.logspace(1, -1, 9)}
parmas = {'alpha': np.logspace( *alphas_log)}
kf_n = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=True)
gs = grid_search.GridSearchCV( clf, parmas, scoring = scoring, cv = kf_n, n_jobs = n_jobs)
gs.fit( xM, yV)
return gs
def cv( method, xM, yV, alpha, n_folds = 5, n_jobs = -1, grid_std = None, graph = True, shuffle = True):
"""
method can be 'Ridge', 'Lasso'
cross validation is performed so as to generate prediction output for all input molecules
"""
print(xM.shape, yV.shape)
clf = getattr( linear_model, method)( alpha = alpha)
kf_n = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=shuffle)
yV_pred = cross_validation.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)
if graph:
print('The prediction output using cross-validation is given by:')
jutil.cv_show( yV, yV_pred, grid_std = grid_std)
return yV_pred
def _cv_LOO_r0( method, xM, yV, alpha, n_jobs = -1, grid_std = None, graph = True):
"""
method can be 'Ridge', 'Lasso'
cross validation is performed so as to generate prediction output for all input molecules
"""
n_folds = xM.shape[0]
print(xM.shape, yV.shape)
clf = getattr( linear_model, method)( alpha = alpha)
kf_n = cross_validation.KFold( xM.shape[0], n_folds=n_folds)
yV_pred = cross_validation.cross_val_predict( clf, xM, yV, cv = kf_n, n_jobs = n_jobs)
if graph:
print('The prediction output using cross-validation is given by:')
jutil.cv_show( yV, yV_pred, grid_std = grid_std)
return yV_pred
def test_cross_val_score_with_score_func_regression():
X, y = make_regression(n_samples=30, n_features=20, n_informative=5,
random_state=0)
reg = Ridge()
# Default score of the Ridge regression estimator
scores = cross_val_score(reg, X, y, cv=5)
assert_array_almost_equal(scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)
# R2 score (aka. determination coefficient) - should be the
# same as the default estimator score
r2_scores = cross_val_score(reg, X, y, scoring="r2", cv=5)
assert_array_almost_equal(r2_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)
# Mean squared error; this is a loss function, so "scores" are negative
mse_scores = cross_val_score(reg, X, y, cv=5, scoring="mean_squared_error")
expected_mse = np.array([-763.07, -553.16, -274.38, -273.26, -1681.99])
assert_array_almost_equal(mse_scores, expected_mse, 2)
# Explained variance
scoring = make_scorer(explained_variance_score)
ev_scores = cross_val_score(reg, X, y, cv=5, scoring=scoring)
assert_array_almost_equal(ev_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)
def test_cross_val_score_with_score_func_regression():
X, y = make_regression(n_samples=30, n_features=20, n_informative=5,
random_state=0)
reg = Ridge()
# Default score of the Ridge regression estimator
scores = cval.cross_val_score(reg, X, y, cv=5)
assert_array_almost_equal(scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)
# R2 score (aka. determination coefficient) - should be the
# same as the default estimator score
r2_scores = cval.cross_val_score(reg, X, y, scoring="r2", cv=5)
assert_array_almost_equal(r2_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)
# Mean squared error; this is a loss function, so "scores" are negative
mse_scores = cval.cross_val_score(reg, X, y, cv=5,
scoring="mean_squared_error")
expected_mse = np.array([-763.07, -553.16, -274.38, -273.26, -1681.99])
assert_array_almost_equal(mse_scores, expected_mse, 2)
# Explained variance
scoring = make_scorer(explained_variance_score)
ev_scores = cval.cross_val_score(reg, X, y, cv=5, scoring=scoring)
assert_array_almost_equal(ev_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)
def __init__(self, info, verbose=True, debug_mode=False):
self.label_num=info['label_num']
self.target_num=info['target_num']
self.task = info['task']
self.metric = info['metric']
self.postprocessor = None
#self.postprocessor = MultiLabelEnsemble(LogisticRegression(), balance=True) # To calibrate proba
self.postprocessor = MultiLabelEnsemble(LogisticRegression(), balance=False) # To calibrate proba
if debug_mode>=2:
self.name = "RandomPredictor"
self.model = RandomPredictor(self.target_num)
self.predict_method = self.model.predict_proba
return
if info['task']=='regression':
if info['is_sparse']==True:
self.name = "BaggingRidgeRegressor"
self.model = BaggingRegressor(base_estimator=Ridge(), n_estimators=1, verbose=verbose) # unfortunately, no warm start...
else:
self.name = "GradientBoostingRegressor"
self.model = GradientBoostingRegressor(n_estimators=1, max_depth=4, min_samples_split=14, verbose=verbose, warm_start = True)
self.predict_method = self.model.predict # Always predict probabilities
else:
if info['has_categorical']: # Out of lazziness, we do not convert categorical variables...
self.name = "RandomForestClassifier"
self.model = RandomForestClassifier(n_estimators=1, verbose=verbose) # unfortunately, no warm start...
elif info['is_sparse']:
self.name = "BaggingNBClassifier"
self.model = BaggingClassifier(base_estimator=BernoulliNB(), n_estimators=1, verbose=verbose) # unfortunately, no warm start...
else:
self.name = "GradientBoostingClassifier"
self.model = eval(self.name + "(n_estimators=1, verbose=" + str(verbose) + ", random_state=1, warm_start = True)")
if info['task']=='multilabel.classification':
self.model = MultiLabelEnsemble(self.model)
self.predict_method = self.model.predict_proba