def gs_Ridge_BIKE( A_list, yV, XX = None, alphas_log = (1, -1, 9), n_splits = 5, n_jobs = -1):
"""
As is a list of A matrices where A is similarity matrix.
X is a concatened linear descriptors.
If no X is used, X can be empty
"""
clf = binary_model.BIKE_Ridge( A_list, XX)
parmas = {'alpha': np.logspace( *alphas_log)}
ln = A_list[0].shape[0] # ls is the number of molecules.
kf_n_c = model_selection.KFold( n_splits = n_splits, shuffle=True)
#kf_n = kf5_ext_c.split( A_list[0])
gs = model_selection.GridSearchCV( clf, parmas, scoring = 'r2', cv = kf_n_c, n_jobs = n_jobs)
AX_idx = np.array([list(range( ln))]).T
gs.fit( AX_idx, yV)
return gs
python类GridSearchCV()的实例源码
def gs_Lasso(xM, yV, alphas_log=(-1, 1, 9), n_folds=5, n_jobs=-1):
print(xM.shape, yV.shape)
clf = linear_model.Lasso()
#parmas = {'alpha': np.logspace(1, -1, 9)}
parmas = {'alpha': np.logspace(*alphas_log)}
kf5_c = model_selection.KFold(n_folds=n_folds, shuffle=True)
kf5 = kf5_c.split(xM)
gs = model_selection.GridSearchCV(
clf, parmas, scoring='r2', cv=kf5, n_jobs=n_jobs)
gs.fit(xM, yV)
return gs
def _gs_SVC_r0(xM, yVc, params):
"""
Since classification is considered, we use yVc which includes digital values
whereas yV can include float point values.
"""
print(xM.shape, yVc.shape)
clf = svm.SVC()
#parmas = {'alpha': np.logspace(1, -1, 9)}
kf5_c = model_selection.KFold(n_splits=5, shuffle=True)
kf5 = kf5_c.split(xM)
gs = model_selection.GridSearchCV(clf, params, cv=kf5, n_jobs=-1)
gs.fit(xM, yVc)
return gs
def gs_SVC(xM, yVc, params, n_folds=5):
"""
Since classification is considered, we use yVc which includes digital values
whereas yV can include float point values.
"""
print(xM.shape, yVc.shape)
clf = svm.SVC()
#parmas = {'alpha': np.logspace(1, -1, 9)}
kf5_c = model_selection.KFold(n_splits=n_folds, shuffle=True)
kf5 = kf5_c.split(xM)
gs = model_selection.GridSearchCV(clf, params, cv=kf5, n_jobs=-1)
gs.fit(xM, yVc)
return gs
def gs_Ridge(xM, yV, alphas_log=(1, -1, 9), n_folds=5, n_jobs=-1, scoring='r2'):
"""
Parameters
-------------
scoring: mean_absolute_error, mean_squared_error, median_absolute_error, r2
"""
print('If scoring is not r2 but error metric, output score is revered for scoring!')
print(xM.shape, yV.shape)
clf = linear_model.Ridge()
#parmas = {'alpha': np.logspace(1, -1, 9)}
parmas = {'alpha': np.logspace(*alphas_log)}
kf_n_c = model_selection.KFold(n_splits=n_folds, shuffle=True)
kf_n = kf_n_c.split(xM)
gs = model_selection.GridSearchCV(
clf, parmas, scoring=scoring, cv=kf_n, n_jobs=n_jobs)
gs.fit(xM, yV)
return gs
def gs_Ridge_BIKE(A_list, yV, XX=None, alphas_log=(1, -1, 9), n_folds=5, n_jobs=-1):
"""
As is a list of A matrices where A is similarity matrix.
X is a concatened linear descriptors.
If no X is used, X can be empty
"""
clf = binary_model.BIKE_Ridge(A_list, XX)
parmas = {'alpha': np.logspace(*alphas_log)}
ln = A_list[0].shape[0] # ls is the number of molecules.
kf_n_c = model_selection.KFold(n_splits=n_folds, shuffle=True)
kf_n = kf_n_c.split(A_list)
gs = model_selection.GridSearchCV(
clf, parmas, scoring='r2', cv=kf_n, n_jobs=n_jobs)
AX_idx = np.array([list(range(ln))]).T
gs.fit(AX_idx, yV)
return gs
train_novelty_detection.py 文件源码
项目:keras-transfer-learning-for-oxford102
作者: Arsey
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def train_logistic():
df = pd.read_csv(config.activations_path)
df, y, classes = encode(df)
X_train, X_test, y_train, y_test = train_test_split(df.values, y, test_size=0.2, random_state=17)
params = {'C': [10, 2, .9, .4, .1], 'tol': [0.0001, 0.001, 0.0005]}
log_reg = LogisticRegression(solver='lbfgs', multi_class='multinomial', class_weight='balanced')
clf = GridSearchCV(log_reg, params, scoring='neg_log_loss', refit=True, cv=3, n_jobs=-1)
clf.fit(X_train, y_train)
print("best params: " + str(clf.best_params_))
print("Accuracy: ", accuracy_score(y_test, clf.predict(X_test)))
setattr(clf, '__classes', classes)
# save results for further using
joblib.dump(clf, config.get_novelty_detection_model_path())
def perform():
# Create a new grid search classifier from a sci-kit pipeline
model = GridSearchCV(pipeline(), gs_clf_params(), n_jobs=-1)
# Get your training and testing sets of data with 50/50 split
(train_data, train_targets), (test_data, test_targets) = dp.get_data()
# Train your model
model = model.fit(train_data, train_targets)
# Test it's accuracy
predictions = model.predict(test_data)
# Display the model's accuracy
print "\nModel Accuracy: {}\n".format(np.mean(predictions == test_targets))
# Save the trained model to disk
save_model(model)
def test_gridsearch():
# Check that base trees can be grid-searched.
# AdaBoost classification
boost = AdaBoostClassifier(base_estimator=DecisionTreeClassifier())
parameters = {'n_estimators': (1, 2),
'base_estimator__max_depth': (1, 2),
'algorithm': ('SAMME', 'SAMME.R')}
clf = GridSearchCV(boost, parameters)
clf.fit(iris.data, iris.target)
# AdaBoost regression
boost = AdaBoostRegressor(base_estimator=DecisionTreeRegressor(),
random_state=0)
parameters = {'n_estimators': (1, 2),
'base_estimator__max_depth': (1, 2)}
clf = GridSearchCV(boost, parameters)
clf.fit(boston.data, boston.target)
def test_grid_search():
# Test that the best estimator contains the right value for foo_param
clf = MockClassifier()
grid_search = GridSearchCV(clf, {'foo_param': [1, 2, 3]}, verbose=3)
# make sure it selects the smallest parameter in case of ties
old_stdout = sys.stdout
sys.stdout = StringIO()
grid_search.fit(X, y)
sys.stdout = old_stdout
assert_equal(grid_search.best_estimator_.foo_param, 2)
for i, foo_i in enumerate([1, 2, 3]):
assert_true(grid_search.grid_scores_[i][0]
== {'foo_param': foo_i})
# Smoke test the score etc:
grid_search.score(X, y)
grid_search.predict_proba(X)
grid_search.decision_function(X)
grid_search.transform(X)
# Test exception handling on scoring
grid_search.scoring = 'sklearn'
assert_raises(ValueError, grid_search.fit, X, y)
def test_grid_search_labels():
# Check if ValueError (when labels is None) propagates to GridSearchCV
# And also check if labels is correctly passed to the cv object
rng = np.random.RandomState(0)
X, y = make_classification(n_samples=15, n_classes=2, random_state=0)
labels = rng.randint(0, 3, 15)
clf = LinearSVC(random_state=0)
grid = {'C': [1]}
label_cvs = [LeaveOneLabelOut(), LeavePLabelOut(2), LabelKFold(),
LabelShuffleSplit()]
for cv in label_cvs:
gs = GridSearchCV(clf, grid, cv=cv)
assert_raise_message(ValueError,
"The labels parameter should not be None",
gs.fit, X, y)
gs.fit(X, y, labels)
non_label_cvs = [StratifiedKFold(), StratifiedShuffleSplit()]
for cv in non_label_cvs:
gs = GridSearchCV(clf, grid, cv=cv)
# Should not raise an error
gs.fit(X, y)
def test_grid_search_sparse():
# Test that grid search works with both dense and sparse matrices
X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0)
clf = LinearSVC()
cv = GridSearchCV(clf, {'C': [0.1, 1.0]})
cv.fit(X_[:180], y_[:180])
y_pred = cv.predict(X_[180:])
C = cv.best_estimator_.C
X_ = sp.csr_matrix(X_)
clf = LinearSVC()
cv = GridSearchCV(clf, {'C': [0.1, 1.0]})
cv.fit(X_[:180].tocoo(), y_[:180])
y_pred2 = cv.predict(X_[180:])
C2 = cv.best_estimator_.C
assert_true(np.mean(y_pred == y_pred2) >= .9)
assert_equal(C, C2)
def test_pandas_input():
# check cross_val_score doesn't destroy pandas dataframe
types = [(MockDataFrame, MockDataFrame)]
try:
from pandas import Series, DataFrame
types.append((DataFrame, Series))
except ImportError:
pass
X = np.arange(100).reshape(10, 10)
y = np.array([0] * 5 + [1] * 5)
for InputFeatureType, TargetType in types:
# X dataframe, y series
X_df, y_ser = InputFeatureType(X), TargetType(y)
check_df = lambda x: isinstance(x, InputFeatureType)
check_series = lambda x: isinstance(x, TargetType)
clf = CheckingClassifier(check_X=check_df, check_y=check_series)
grid_search = GridSearchCV(clf, {'foo_param': [1, 2, 3]})
grid_search.fit(X_df, y_ser).score(X_df, y_ser)
grid_search.predict(X_df)
assert_true(hasattr(grid_search, "grid_scores_"))
def test_ridgecv_sample_weight():
rng = np.random.RandomState(0)
alphas = (0.1, 1.0, 10.0)
# There are different algorithms for n_samples > n_features
# and the opposite, so test them both.
for n_samples, n_features in ((6, 5), (5, 10)):
y = rng.randn(n_samples)
X = rng.randn(n_samples, n_features)
sample_weight = 1.0 + rng.rand(n_samples)
cv = KFold(5)
ridgecv = RidgeCV(alphas=alphas, cv=cv)
ridgecv.fit(X, y, sample_weight=sample_weight)
# Check using GridSearchCV directly
parameters = {'alpha': alphas}
fit_params = {'sample_weight': sample_weight}
gs = GridSearchCV(Ridge(), parameters, fit_params=fit_params,
cv=cv)
gs.fit(X, y)
assert_equal(ridgecv.alpha_, gs.best_estimator_.alpha)
assert_array_almost_equal(ridgecv.coef_, gs.best_estimator_.coef_)
def print_training_summary(self, gs):
print('The best CV score from GridSearchCV (by default averaging across k-fold CV) for ' + self.output_column + ' is:')
if self.took_log_of_y:
print(' Note that this score is calculated using the natural logs of the y values.')
print(gs.best_score_)
print('The best params were')
# Remove 'final_model__model' from what we print- it's redundant with model name, and is difficult to read quickly in a list since it's a python object.
if 'model' in gs.best_params_:
printing_copy = {}
for k, v in gs.best_params_.items():
if k != 'model':
printing_copy[k] = v
else:
printing_copy[k] = utils_models.get_name_from_model(v)
else:
printing_copy = gs.best_params_
print(printing_copy)
if self.verbose:
print('Here are all the hyperparameters that were tried:')
raw_scores = gs.grid_scores_
sorted_scores = sorted(raw_scores, key=lambda x: x[1], reverse=True)
for score in sorted_scores:
for k, v in score[0].items():
if k == 'model':
score[0][k] = utils_models.get_name_from_model(v)
print(score)
def test_model_assessment():
X, y = make_classification(n_samples=40, n_features=100, n_informative=2,
n_classes=2, n_redundant=0)
pipe = Pipeline([('enet', ElasticNetFeatureSelection()),
('ridge', RidgeClassifier())])
ma = ModelAssessment(GridSearchCV(pipe, {'enet__l1_ratio': [2]})).fit(X, y)
assert len(ma.cv_results_) == 0
def _get_best_params(obj):
# if obj is a ModelAssessment, then get the first GridSearch
if isinstance(obj, ModelAssessment):
obj = pd.DataFrame(obj.cv_results_).sort_values(
'test_score', ascending=False).iloc[0].estimator
elif not isinstance(obj, GridSearchCV):
raise NotImplementedError("This can only work with a ModelAssessment "
"or GridSearchCV object. You passed "
"a %s object" % obj.__class__.__name__)
return obj.best_params_
def cv_results_(self):
"""Get GridSearchCV results."""
check_is_fitted(self, 'gs_')
return self.gs_.cv_results_
def best_params_(self):
"""Get GridSearchCV best_params."""
check_is_fitted(self, 'gs_')
return self.gs_.best_params_
def tune_n_estimators_cv(estimator,params,X_train,Y_train):
grid_search = GridSearchCV(estimator,param_grid=params,scoring='roc_auc',n_jobs=-1,cv=10,verbose=10)
grid_search.fit(X_train,Y_train)
return grid_search.best_params_