def cross_validate(self, X, y):
print "fitting {} to the training set".format(self.name)
if self.param_grid is not None:
param_sets = list(ParameterGrid(self.param_grid))
n_param_sets = len(param_sets)
param_scores = []
for j, param_set in enumerate(param_sets):
print "--------------"
print "training the classifier..."
print "parameter set:"
for k, v in param_set.iteritems():
print "{}:{}".format(k, v)
param_score = self.evaluate(X, y, param_set=param_set)
param_scores.append(param_score)
p = np.argmax(np.array(param_scores))
self.best_param_set = param_sets[p]
print "best parameter set", self.best_param_set
print "best score:", param_scores[p]
else:
score = self.evaluate(X, y)
python类ParameterGrid()的实例源码
def __get_param_iterable(self, param_grid):
if self.ramdonized_search_enable:
parameter_iterable = ParameterSampler(param_grid,
self.randomized_search_n_iter,
random_state=self.ramdonized_search_random_state)
else:
parameter_iterable = ParameterGrid(param_grid)
return parameter_iterable
def fit(self, frame):
"""Fit the grid search.
Parameters
----------
frame : H2OFrame, shape=(n_samples, n_features)
The training frame on which to fit.
"""
return self._fit(frame, ParameterGrid(self.param_grid))
def fit(self, X, y=None):
"""Run fit with all sets of parameters.
Parameters
----------
X : array-like, shape = [n_samples, n_features]
Training vector, where n_samples is the number of samples and
n_features is the number of features.
y : array-like, shape = [n_samples] or [n_samples, n_output], optional
Target relative to X for classification or regression;
None for unsupervised learning.
"""
return self._fit(X, y, ParameterGrid(self.param_grid))
def __init__(self, experiment, args, job_module_config):
super(self.__class__, self).__init__(experiment, args, job_module_config)
# pre-format the experiment dict
# Sklearn needs all the params to be in a list for the grid to work
# properly
for param in experiment['params']:
if type(experiment['params'][param]) is not list:
experiment['params'][param] = [experiment['params'][param] ]
self.searcher = ParameterGrid(experiment['params'])
def __init__(self, params,progressbar = None):
if progressbar is not None:
self.progress = progressbar
print(params)
self.paramgrid = list(ParameterGrid(params)) # create a grid of parameter permutations
#self.paramgrid = ParameterGrid(params).param_grid
def search_test_params(base_clf, cv_params, X, y, train, test, scoring):
parameter_iterable = ParameterGrid(cv_params)
grid_scores = Parallel(n_jobs=-1)(
delayed(_fit_and_score)(clone(base_clf), X, y, scoring,
train, test, 0, parameters,
None, return_parameters=True)
for parameters in parameter_iterable)
# grid_scores = [_fit_and_score(clone(base_clf), X, y, scoring, train, test, 0, parameters, None, return_parameters=True) for parameters in parameter_iterable]
grid_scores = sorted(grid_scores, key=lambda x: x[0], reverse=True)
scores, _, _, parameters = grid_scores[0]
return scores, parameters
def generate_models(self, input_shape, output_dim):
loss_type = self.grid.params_grid["loss"][0]
for layers in self.create_network_structures(self.grid.params_grid["layers"], self.grid.params_grid["layer_nums"], input_shape):
print "Current network: %s" % "->".join(layers)
flat_params_grid = self.grid.create_flat_layers_grid(layers, input_shape, output_dim)
for optimizer_name in self.grid.params_grid["optimizers"]:
flat_grid = flat_params_grid.copy()
flat_grid.update(self.grid.create_flat_optimizer_grid(optimizer_name))
for params in ParameterGrid(flat_grid):
nn_params = self.grid.fold_params(params)
yield self.model_factory.create_model(layers, nn_params, loss_type)
# Example.
def generate_models(self, input_shape, output_dim):
loss_type = self.grid.params_grid["loss"][0]
for layers in self.create_network_structures(self.grid.params_grid["layers"], self.grid.params_grid["layer_nums"], input_shape):
print "Current network: %s" % "->".join(layers)
flat_params_grid = self.grid.create_flat_layers_grid(layers, input_shape, output_dim)
for optimizer_name in self.grid.params_grid["optimizers"]:
flat_grid = flat_params_grid.copy()
flat_grid.update(self.grid.create_flat_optimizer_grid(optimizer_name))
n_samples = min(self.params_sample_size, len(ParameterGrid(flat_grid)))
for params in ParameterSampler(flat_grid, n_samples):
nn_params = self.grid.fold_params(params)
yield self.model_factory.create_model(layers, nn_params, loss_type)
# Example.
def test_iforest():
"""Check Isolation Forest for various parameter settings."""
X_train = np.array([[0, 1], [1, 2]])
X_test = np.array([[2, 1], [1, 1]])
grid = ParameterGrid({"n_estimators": [3],
"max_samples": [0.5, 1.0, 3],
"bootstrap": [True, False]})
with ignore_warnings():
for params in grid:
IsolationForest(random_state=rng,
**params).fit(X_train).predict(X_test)
def test_iforest_sparse():
"""Check IForest for various parameter settings on sparse input."""
rng = check_random_state(0)
X_train, X_test, y_train, y_test = train_test_split(boston.data[:50],
boston.target[:50],
random_state=rng)
grid = ParameterGrid({"max_samples": [0.5, 1.0],
"bootstrap": [True, False]})
for sparse_format in [csc_matrix, csr_matrix]:
X_train_sparse = sparse_format(X_train)
X_test_sparse = sparse_format(X_test)
for params in grid:
# Trained on sparse format
sparse_classifier = IsolationForest(
n_estimators=10, random_state=1, **params).fit(X_train_sparse)
sparse_results = sparse_classifier.predict(X_test_sparse)
# Trained on dense format
dense_classifier = IsolationForest(
n_estimators=10, random_state=1, **params).fit(X_train)
dense_results = dense_classifier.predict(X_test)
assert_array_equal(sparse_results, dense_results)
assert_array_equal(sparse_results, dense_results)
def main(task_num,sample_size=''):
embedding_size = 100
epoch = 300
best_accuracy = 0.0
grind_ris={}
if not os.path.exists('data/ris/task_{}'.format(task_num)):
os.makedirs('data/ris/task_{}'.format(task_num))
param_grid = {'nb': [20,30],
'lr': [0.001],
'tr': [[0,0,0,0]],
'L2': [0.0,0.0001],# [0.0,0.1,0.01,0.001,0.0001]
'bz': [32],
'dr': [0.5],
}
grid = list(ParameterGrid(param_grid))
np.random.shuffle(grid)
for params in list(grid):
data = Dataset('data/tasks_1-20_v1-2/en-valid{}/'.format(sample_size),int(task_num))
## for sentence
par = get_parameters(data,epoch,data._data['sent_len'],data._data['sent_numb'],embedding_size,params)
t = train(epoch,params['bz'], data, par, dr=params['dr'], _test=False)
acc = sorted([v for k,v in t[5].items()])[-1]
if (acc > best_accuracy):
best_accuracy = acc
grind_ris[str(params)] = acc
f_save = 'data/ris/task_{}/{}.PIK'.format(task_num,str(params)+str(acc))
with open(f_save, 'w') as f:
pickle.dump((t), f)
# batch_size = 32
# epoch = 200
# if not os.path.exists('data/ris/task_{}'.format(task_num)):
# os.makedirs('data/ris/task_{}'.format(task_num))
# data = Dataset('data/tasks_1-20_v1-2/en-valid{}/'.format(sample_size),int(task_num))
def main(task_num,sample_size=''):
embedding_size = 100
epoch = 300
best_accuracy = 0.0
grind_ris={}
if not os.path.exists('data/ris/task_{}'.format(task_num)):
os.makedirs('data/ris/task_{}'.format(task_num))
param_grid = {'nb': [20],
'lr': [0.001],
'tr': [[0,0,0,0]],
'L2': [0.001],# [0.0,0.1,0.01,0.001,0.0001]
'bz': [32],
'dr': [0.5],
}
grid = list(ParameterGrid(param_grid))
np.random.shuffle(grid)
for params in list(grid):
data = Dataset('data/tasks_1-20_v1-2/en-valid{}/'.format(sample_size),int(task_num))
## for sentence
par = get_parameters(data,epoch,data._data['sent_len'],data._data['sent_numb'],embedding_size,params)
t = train(epoch,params['bz'], data, par, dr=params['dr'], _test=True)
acc = sorted([v for k,v in t[5].items()])[-1]
if (acc > best_accuracy):
best_accuracy = acc
grind_ris[str(params)] = acc
f_save = 'data/ris/task_{}/{}.PIK'.format(task_num,str(params)+str(acc))
with open(f_save, 'w') as f:
pickle.dump((t), f)
# batch_size = 32
# epoch = 200
# if not os.path.exists('data/ris/task_{}'.format(task_num)):
# os.makedirs('data/ris/task_{}'.format(task_num))
# data = Dataset('data/tasks_1-20_v1-2/en-valid{}/'.format(sample_size),int(task_num))
def main():
embedding_size = 100
epoch = 300
best_accuracy = 0.0
sent_numb,sent_len = None,None
grind_ris={}
param_grid = {'nb': [5],
'lr': [0.01,0.001,0.0001],
'tr': [[1,1,0,0]],
'L2': [0.001,0.0001],
'bz': [64],
'dr': [0.5],
'mw': [150],
'w' : [3,4,5],
'op': ['Adam']
}
grid = list(ParameterGrid(param_grid))
np.random.shuffle(grid)
for params in list(grid):
data = Dataset(train_size=10000,dev_size=None,test_size=None,sent_len=sent_len,
sent_numb=sent_numb, embedding_size=embedding_size,
max_windows=params['mw'],win=params['w'])
# ## for sentence
# # par = get_parameters(data,epoch,sent_len,sent_numb,embedding_size)
# ## for windows
par = get_parameters(data,epoch,(params['w']*2)+1,params['mw'],embedding_size,params)
t = train(epoch,params['bz'], data, par, dr=params['dr'], _test=False)
acc = sorted([v for k,v in t[3].items()])[-1]
if (acc > best_accuracy):
best_accuracy = acc
grind_ris[str(params)] = acc
f_save = 'checkpoints/CNN_WIND/{}.PIK'.format(str(params)+str(acc))
with open(f_save, 'w') as f:
pickle.dump((t), f)
def cv_trials(X, y, folds, model, hyper):
N = len(y)
cv_scores = []
predictions = {
'pred': np.zeros(N, dtype=np.bool),
'proba': np.zeros(N),
'foldno': np.zeros(N, dtype=np.int32) - 1,
}
pg = list(ParameterGrid(hyper))
for foldno, (train, val, test) in enumerate(folds):
train_X, train_y = X[train], y[train]
val_X, val_y = X[val], y[val]
test_X, test_y = X[test], y[test]
best_params = None
best_val_f1 = None
for these_params in pg:
model.set_params(**these_params)
model.fit(train_X, train_y)
this_val_f1 = metrics.f1_score(val_y, model.predict(val_X), average="weighted")
if not best_params or this_val_f1 > best_val_f1:
best_params = these_params
best_val_f1 = this_val_f1
if len(pg) > 1:
model.set_params(**best_params)
model.fit(train_X, train_y)
train_f1 = metrics.f1_score(train_y, model.predict(train_X), average="weighted")
preds_y = model.predict(test_X)
predictions['pred'][test] = preds_y
predictions['foldno'][test] = foldno
fold_eval = {'f1': metrics.f1_score(test_y, preds_y, average="weighted"),
'p': metrics.precision_score(test_y, preds_y, average="weighted"),
'r': metrics.recall_score(test_y, preds_y, average="weighted"),
'a': metrics.accuracy_score(test_y, preds_y)}
print "[%02d] Best hyper [train %.3f -> val %.3f -> test %.3f] %s" % (foldno, train_f1, best_val_f1, fold_eval['f1'], best_params)
cv_scores.append(fold_eval)
np.set_printoptions(suppress=True)
# now we want to compute global evaluations, and consolidate metrics
cv_scores = consolidate(cv_scores)
preds_y = predictions['pred']
pooled_eval = {'f1': metrics.f1_score(y, preds_y, average="weighted"),
'p': metrics.precision_score(y, preds_y, average="weighted"),
'r': metrics.recall_score(y, preds_y, average="weighted"),
'a': metrics.accuracy_score(y, preds_y)}
return pooled_eval, predictions, cv_scores
modifiedGridSearchCV.py 文件源码
项目:CerebralCortex-2.0-legacy
作者: MD2Korg
项目源码
文件源码
阅读 29
收藏 0
点赞 0
评论 0
def fit(self, X, y):
"""Actual fitting, performing the search over parameters."""
parameter_iterable = ParameterGrid(self.param_grid)
estimator = self.estimator
cv = self.cv
n_samples = _num_samples(X)
X, y = indexable(X, y)
if y is not None:
if len(y) != n_samples:
raise ValueError('Target variable (y) has a different number '
'of samples (%i) than data (X: %i samples)'
% (len(y), n_samples))
cv = check_cv(cv, X, y, classifier=is_classifier(estimator))
if self.verbose > 0:
if isinstance(parameter_iterable, Sized):
n_candidates = len(parameter_iterable)
print("Fitting {0} folds for each of {1} candidates, totalling"
" {2} fits".format(len(cv), n_candidates,
n_candidates * len(cv)))
base_estimator = clone(self.estimator)
pre_dispatch = self.pre_dispatch
out = Parallel(
n_jobs=self.n_jobs, verbose=self.verbose,
pre_dispatch=pre_dispatch
)(delayed(cv_fit_and_score)(clone(base_estimator), X, y, self.scoring,
parameters, cv=cv)
for parameters in parameter_iterable)
best = sorted(out, key=lambda x: x[0])[-1]
self.best_params_ = best[1]
self.best_score_ = best[0]
if self.refit:
# fit the best estimator using the entire dataset
# clone first to work around broken estimators
best_estimator = clone(base_estimator).set_params(
**best[1])
if y is not None:
best_estimator.fit(X, y, **self.fit_params)
else:
best_estimator.fit(X, **self.fit_params)
self.best_estimator_ = best_estimator
return self