def fit(self, X, y=None):
"""Run fit on the estimator with randomly drawn parameters.
Parameters
----------
X : array-like, shape = [n_samples, n_features]
Training vector, where n_samples in the number of samples and
n_features is the number of features.
y : array-like, shape = [n_samples] or [n_samples, n_output], optional
Target relative to X for classification or regression;
None for unsupervised learning.
"""
sampled_params = ParameterSampler(self.param_distributions,
self.n_iter,
random_state=self.random_state)
# the super class will handle the X, y validation
return self._fit(X, y, sampled_params)
python类ParameterSampler()的实例源码
def random_search(clf, param_distribution, n_iter_search, X_train, y_train):
'''
random search with optimization without nested resampling
@return: best_estimator, best score
'''
param_list = ParameterSampler(param_distribution, n_iter = n_iter_search)
best_score = 0.0
opt_clf = None
for params in param_list:
clf.set_params(**params)
clf.fit(X_train, y_train)
clf_accuracy = accuracy_score(y_train, clf.predict(X_train))
if clf_accuracy > best_score:
best_score = clf_accuracy
opt_clf = clone(clf)
opt_clf.fit(X_train, y_train)
return opt_clf, best_score
def __get_param_iterable(self, param_grid):
if self.ramdonized_search_enable:
parameter_iterable = ParameterSampler(param_grid,
self.randomized_search_n_iter,
random_state=self.ramdonized_search_random_state)
else:
parameter_iterable = ParameterGrid(param_grid)
return parameter_iterable
def fit(self, frame):
"""Fit the grid search.
Parameters
----------
frame : H2OFrame, shape=(n_samples, n_features)
The training frame on which to fit.
"""
sampled_params = ParameterSampler(self.param_grid,
self.n_iter,
random_state=self.random_state)
return self._fit(frame, sampled_params)
def fit(self, frame):
"""Fit the grid search.
Parameters
----------
frame : H2OFrame, shape=(n_samples, n_features)
The training frame on which to fit.
"""
sampled_params = ParameterSampler(self.param_grid,
self.n_iter,
random_state=self.random_state)
# set our score class
self.scoring_class_ = GainsStatisticalReport(**self.grsttngs_)
# we can do this once to avoid many as_data_frame operations
exp, loss, prem = _val_exp_loss_prem(self.exposure_feature, self.loss_feature, self.premium_feature)
self.extra_args_ = {
'expo': _as_numpy(frame[exp]),
'loss': _as_numpy(frame[loss]),
'prem': _as_numpy(frame[prem]) if prem is not None else None
}
# for validation set
self.extra_names_ = {
'expo': exp,
'loss': loss,
'prem': prem
}
# do fit
the_fit = self._fit(frame, sampled_params)
# clear extra_args_, because they might take lots of mem
# we can do this because a re-fit will re-assign them anyways.
# don't delete the extra_names_ though, because they're used in
# scoring the incoming frame.
del self.extra_args_
return the_fit
def generate_models(self, input_shape, output_dim):
loss_type = self.grid.params_grid["loss"][0]
for layers in self.create_network_structures(self.grid.params_grid["layers"], self.grid.params_grid["layer_nums"], input_shape):
print "Current network: %s" % "->".join(layers)
flat_params_grid = self.grid.create_flat_layers_grid(layers, input_shape, output_dim)
for optimizer_name in self.grid.params_grid["optimizers"]:
flat_grid = flat_params_grid.copy()
flat_grid.update(self.grid.create_flat_optimizer_grid(optimizer_name))
n_samples = min(self.params_sample_size, len(ParameterGrid(flat_grid)))
for params in ParameterSampler(flat_grid, n_samples):
nn_params = self.grid.fold_params(params)
yield self.model_factory.create_model(layers, nn_params, loss_type)
# Example.
def main():
args = parse_cli()
config = yread(args.conf)
config['experiment_name'] = {}
clfs, param_space, _ = get_param_space()
for clf_name in clfs:
print("processing {}".format(clf_name))
# sampling max_evals parameters
param_iter = ParameterSampler(param_space[clf_name], n_iter=config['max_evals'])
# create the database collection's skeleton
experiment_name = param_space_to_experiment_name(clf_name, user.param_space[clf_name])
jobs = JobsDB(config['project_name'], experiment_name,
host=config['MongoDB']['host'],
port=config['MongoDB']['port'])
jobs.create_jobs(config['features'], param_iter)
# collect database collection's name
config['experiment_name'][clf_name] = experiment_name
# add collection's info to config.
ywrite(config, args.save)
def main():
args = parse_cli()
config = yread(args.conf)
# sample parameters
param_space = get_param_space()
param_iter = ParameterSampler(param_space, n_iter=config['number_examples'])
# create MongoDB collection
jobs = JobsDB(config['project_name'],
config['experiment_name'],
host=config['MongoDB']['host'],
port=config['MongoDB']['port'])
jobs.create_jobs(None, param_iter)
modifiedRandomizedSearchCV.py 文件源码
项目:CerebralCortex-2.0-legacy
作者: MD2Korg
项目源码
文件源码
阅读 31
收藏 0
点赞 0
评论 0
def fit(self, X, y):
"""Actual fitting, performing the search over parameters."""
parameter_iterable = ParameterSampler(self.param_distributions,
self.n_iter,
random_state=self.random_state)
estimator = self.estimator
cv = self.cv
n_samples = _num_samples(X)
X, y = indexable(X, y)
if y is not None:
if len(y) != n_samples:
raise ValueError('Target variable (y) has a different number '
'of samples (%i) than data (X: %i samples)'
% (len(y), n_samples))
cv = check_cv(cv, X, y, classifier=is_classifier(estimator))
if self.verbose > 0:
if isinstance(parameter_iterable, Sized):
n_candidates = len(parameter_iterable)
print("Fitting {0} folds for each of {1} candidates, totalling"
" {2} fits".format(len(cv), n_candidates,
n_candidates * len(cv)))
base_estimator = clone(self.estimator)
pre_dispatch = self.pre_dispatch
out = Parallel(
n_jobs=self.n_jobs, verbose=self.verbose,
pre_dispatch=pre_dispatch
)(
delayed(cv_fit_and_score)(clone(base_estimator), X, y, self.scoring,
parameters, cv=cv)
for parameters in parameter_iterable)
best = sorted(out, reverse=True)[0]
self.best_params_ = best[1]
self.best_score_ = best[0]
if self.refit:
# fit the best estimator using the entire dataset
# clone first to work around broken estimators
best_estimator = clone(base_estimator).set_params(
**best[1])
if y is not None:
best_estimator.fit(X, y, **self.fit_params)
else:
best_estimator.fit(X, **self.fit_params)
self.best_estimator_ = best_estimator
return self