def fit(self, X, y=None):
"""Run fit on the estimator with randomly drawn parameters.
Parameters
----------
X : array-like, shape = [n_samples, n_features]
Training vector, where n_samples in the number of samples and
n_features is the number of features.
y : array-like, shape = [n_samples] or [n_samples, n_output], optional
Target relative to X for classification or regression;
None for unsupervised learning.
"""
sampled_params = ParameterSampler(self.param_distributions,
self.n_iter,
random_state=self.random_state)
# the super class will handle the X, y validation
return self._fit(X, y, sampled_params)
python类ParameterSampler()的实例源码
def sample_hyperparameters(random_state, num):
space = {
'n_iter': N_ITER,
'batch_size': BATCH_SIZE,
'l2': L2,
'learning_rate': LEARNING_RATES,
'loss': LOSSES,
'embedding_dim': EMBEDDING_DIM,
}
sampler = ParameterSampler(space,
n_iter=num,
random_state=random_state)
for params in sampler:
yield params
def sample_cnn_hyperparameters(random_state, num):
space = {
'n_iter': N_ITER,
'batch_size': BATCH_SIZE,
'l2': L2,
'learning_rate': LEARNING_RATES,
'loss': LOSSES,
'embedding_dim': EMBEDDING_DIM,
'kernel_width': [3, 5, 7],
'num_layers': list(range(1, 10)),
'dilation_multiplier': [1, 2],
'nonlinearity': ['tanh', 'relu'],
'residual': [True, False]
}
sampler = ParameterSampler(space,
n_iter=num,
random_state=random_state)
for params in sampler:
params['dilation'] = list(params['dilation_multiplier'] ** (i % 8)
for i in range(params['num_layers']))
yield params
def sample_lstm_hyperparameters(random_state, num):
space = {
'n_iter': N_ITER,
'batch_size': BATCH_SIZE,
'l2': L2,
'learning_rate': LEARNING_RATES,
'loss': LOSSES,
'embedding_dim': EMBEDDING_DIM,
}
sampler = ParameterSampler(space,
n_iter=num,
random_state=random_state)
for params in sampler:
yield params
def sample_pooling_hyperparameters(random_state, num):
space = {
'n_iter': N_ITER,
'batch_size': BATCH_SIZE,
'l2': L2,
'learning_rate': LEARNING_RATES,
'loss': LOSSES,
'embedding_dim': EMBEDDING_DIM,
}
sampler = ParameterSampler(space,
n_iter=num,
random_state=random_state)
for params in sampler:
yield params
def test_param_sampler():
# test basic properties of param sampler
param_distributions = {"kernel": ["rbf", "linear"],
"C": uniform(0, 1)}
sampler = ParameterSampler(param_distributions=param_distributions,
n_iter=10, random_state=0)
samples = [x for x in sampler]
assert_equal(len(samples), 10)
for sample in samples:
assert_true(sample["kernel"] in ["rbf", "linear"])
assert_true(0 <= sample["C"] <= 1)
# test that repeated calls yield identical parameters
param_distributions = {"C": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}
sampler = ParameterSampler(param_distributions=param_distributions,
n_iter=3, random_state=0)
assert_equal([x for x in sampler], [x for x in sampler])
if sp_version >= (0, 16):
param_distributions = {"C": uniform(0, 1)}
sampler = ParameterSampler(param_distributions=param_distributions,
n_iter=10, random_state=0)
assert_equal([x for x in sampler], [x for x in sampler])
def fit(self, frame):
"""Fit the grid search.
Parameters
----------
frame : H2OFrame, shape=(n_samples, n_features)
The training frame on which to fit.
"""
sampled_params = ParameterSampler(self.param_grid,
self.n_iter,
random_state=self.random_state)
return self._fit(frame, sampled_params)
def fit(self, frame):
"""Fit the grid search.
Parameters
----------
frame : H2OFrame, shape=(n_samples, n_features)
The training frame on which to fit.
"""
sampled_params = ParameterSampler(self.param_grid,
self.n_iter,
random_state=self.random_state)
# set our score class
self.scoring_class_ = GainsStatisticalReport(**self.grsttngs_)
# we can do this once to avoid many as_data_frame operations
exp, loss, prem = _val_exp_loss_prem(self.exposure_feature, self.loss_feature, self.premium_feature)
self.extra_args_ = {
'expo': _as_numpy(frame[exp]),
'loss': _as_numpy(frame[loss]),
'prem': _as_numpy(frame[prem]) if prem is not None else None
}
# for validation set
self.extra_names_ = {
'expo': exp,
'loss': loss,
'prem': prem
}
# do fit
the_fit = self._fit(frame, sampled_params)
# clear extra_args_, because they might take lots of mem
# we can do this because a re-fit will re-assign them anyways.
# don't delete the extra_names_ though, because they're used in
# scoring the incoming frame.
del self.extra_args_
return the_fit
def _get_param_iterator(self):
"""Return ParameterSampler instance for the given distributions"""
return model_selection.ParameterSampler(self.param_distributions,
self.n_iter,
random_state=self.random_state)
def test_parameters_sampler_replacement():
# raise error if n_iter too large
params = {'first': [0, 1], 'second': ['a', 'b', 'c']}
sampler = ParameterSampler(params, n_iter=7)
assert_raises(ValueError, list, sampler)
# degenerates to GridSearchCV if n_iter the same as grid_size
sampler = ParameterSampler(params, n_iter=6)
samples = list(sampler)
assert_equal(len(samples), 6)
for values in ParameterGrid(params):
assert_true(values in samples)
# test sampling without replacement in a large grid
params = {'a': range(10), 'b': range(10), 'c': range(10)}
sampler = ParameterSampler(params, n_iter=99, random_state=42)
samples = list(sampler)
assert_equal(len(samples), 99)
hashable_samples = ["a%db%dc%d" % (p['a'], p['b'], p['c'])
for p in samples]
assert_equal(len(set(hashable_samples)), 99)
# doesn't go into infinite loops
params_distribution = {'first': bernoulli(.5), 'second': ['a', 'b', 'c']}
sampler = ParameterSampler(params_distribution, n_iter=7)
samples = list(sampler)
assert_equal(len(samples), 7)
def maximize(self, score_optimum=None, realize=True):
"""
Find the next best hyper-parameter setting to optimizer.
Parameters
----------
score_optimum: float
An optional score to use inside the EI formula instead of the optimizer's current_best_score
realize: bool
Whether or not to give a more realistic estimate of the EI (default=True)
Returns
-------
best_setting: dict
The setting with the highest expected improvement
best_score: float
The highest EI (per second)
"""
start = time.time()
# Select a sample of parameters
sampled_params = ParameterSampler(self.param_distributions, self.draw_samples)
# Set score optimum
if score_optimum is None:
score_optimum = self.current_best_score
# Determine the best parameters
best_setting, best_score = self._maximize_on_sample(sampled_params, score_optimum)
if self.local_search:
best_setting, best_score = self._local_search(best_setting, best_score, score_optimum,
max_steps=self.ls_max_steps)
if realize:
best_setting, best_score = self._realize(best_setting, best_score, score_optimum)
# Store running time
running_time = (time.time() - start) / self.simulate_speedup
self.maximize_times.append(running_time)
return best_setting, best_score