def hyperopt_search(args, data, model, param_grid, max_evals):
def objective(param_grid):
args.num_hidden = param_grid['num_hidden']
args.dropout_output = param_grid['dropout_output']
args.dropout_input = param_grid['dropout_input']
args.clip_norm = param_grid['clip_norm']
args.batch_size = param_grid['batch_size']
# args.learning_rate = param_grid['learning_rate']
print(args)
print()
scores = run_network(args, data, model, tuning=args.tune)
test_score, eval_score = scores
tf.reset_default_graph()
eval_score = -eval_score[0]
return {'loss': eval_score, 'params': args, 'status': STATUS_OK}
trials = Trials()
results = fmin(
objective, param_grid, algo=tpe.suggest,
trials=trials, max_evals=max_evals)
return results, trials.results
python类Trials()的实例源码
def run_all_dl(csvfile = saving_fp,
space = [hp.quniform('h1', 100, 550, 1),
hp.quniform('h2', 100, 550, 1),
hp.quniform('h3', 100, 550, 1),
#hp.choice('activation', ["RectifierWithDropout", "TanhWithDropout"]),
hp.uniform('hdr1', 0.001, 0.3),
hp.uniform('hdr2', 0.001, 0.3),
hp.uniform('hdr3', 0.001, 0.3),
hp.uniform('rho', 0.9, 0.999),
hp.uniform('epsilon', 1e-10, 1e-4)]):
# maxout works well with dropout (Goodfellow et al 2013), and rectifier has worked well with image recognition (LeCun et al 1998)
start_save(csvfile = csvfile)
trials = Trials()
print "Deep learning..."
best = fmin(objective,
space = space,
algo=tpe.suggest,
max_evals=evals,
trials=trials)
print best
print trials.losses()
with open('output/dlbest.pkl', 'w') as output:
pickle.dump(best, output, -1)
with open('output/dltrials.pkl', 'w') as output:
pickle.dump(trials, output, -1)
optimize.py 文件源码
项目:Hyperopt-Keras-CNN-CIFAR-100
作者: guillaume-chevalier
项目源码
文件源码
阅读 32
收藏 0
点赞 0
评论 0
def run_a_trial():
"""Run one TPE meta optimisation step and save its results."""
max_evals = nb_evals = 1
print("Attempt to resume a past training if it exists:")
try:
# https://github.com/hyperopt/hyperopt/issues/267
trials = pickle.load(open("results.pkl", "rb"))
print("Found saved Trials! Loading...")
max_evals = len(trials.trials) + nb_evals
print("Rerunning from {} trials to add another one.".format(
len(trials.trials)))
except:
trials = Trials()
print("Starting from scratch: new trials.")
best = fmin(
build_and_optimize_cnn,
space,
algo=tpe.suggest,
trials=trials,
max_evals=max_evals
)
pickle.dump(trials, open("results.pkl", "wb"))
print("\nOPTIMIZATION STEP COMPLETE.\n")
print("Best results yet (note that this is NOT calculated on the 'loss' "
"metric despite the key is 'loss' - we rather take the negative "
"best accuracy throughout learning as a metric to minimize):")
print(best)
def fit(self, X_train, y_train, X_test=None, y_test=None, n_iters=10, start_vals=None):
"""
"""
if (X_test is None) and (y_test is None):
X_test = X_train
y_test = y_train
elif (X_test is None) or (y_test is None):
raise MissingValueException("Need to provide 'X_test' and 'y_test'")
def objective(params):
model_params = self.model.get_params()
model_params.update(params)
self.model = self.build_new_model(model_params)
self.model.fit(X_train, y_train)
y_pred = self.model.predict(X_test)
y_true = y_test
score = -self.eval_func(y_true, y_pred)
return score
self.trials = Trials()
best_params = fmin(objective,
self.param_space,
algo=tpe.suggest,
max_evals=n_iters,
trials=self.trials)
self.hyperparam_history = []
for i, loss in enumerate(self.trials.losses()):
param_vals = {k:v[i] for k,v in self.trials.vals.items()}
self.hyperparam_history.append((-loss, param_vals))
model_params = self.model.get_params()
model_params.update(best_params)
best_model = self.build_new_model(model_params)
return best_params, best_model
def test_simple():
X_train, Y_train, X_test, Y_test = data()
best_run, best_model = optim.minimize(model=model,
data=data,
algo=tpe.suggest,
max_evals=1,
trials=Trials(),
verbose=False)
def test_ensemble():
X_train, X_test, Y_train, Y_test = data()
optim.best_ensemble(nb_ensemble_models=2,
model=model,
data=data,
algo=rand.suggest,
max_evals=1,
trials=Trials(),
voting='hard')
dsb_create_voxel_model_predictions.py 文件源码
项目:data-science-bowl-2017
作者: tondonia
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def optimize(self):
trials = Trials()
print('Tuning Parameters')
best = fmin(self.score, self.h_param_grid, algo=tpe.suggest, trials=trials, max_evals=200)
print('\n\nBest Scoring Value')
print(best)
self.change_to_int(best, self.to_int_params)
self.level0.set_params(**best)
self.level0.fit(self.trainX, self.trainY)
joblib.dump(self.level0,'model_best.pkl', compress=True)
def run_all_gbm(csvfile = saving_fp,
space = [hp.quniform('ntrees', 200, 750, 1), hp.quniform('max_depth', 5, 15, 1), hp.uniform('learn_rate', 0.03, 0.35)]):
# Search space is a stochastic argument-sampling program:
start_save(csvfile = csvfile)
trials = Trials()
best = fmin(objective,
space = space,
algo=tpe.suggest,
max_evals=evals,
trials=trials)
print best
# from hyperopt import space_eval
# print space_eval(space, best)
# trials.trials # list of dictionaries representing everything about the search
# trials.results # list of dictionaries returned by 'objective' during the search
print trials.losses() # list of losses (float for each 'ok' trial)
# trials.statuses() # list of status strings
with open('output/gbmbest.pkl', 'w') as output:
pickle.dump(best, output, -1)
with open('output/gbmtrials.pkl', 'w') as output:
pickle.dump(trials, output, -1)
# with open('output/gbmtrials.pkl', 'rb') as input:
# trials = pickle.load(input)
# with open('output/gbmbest.pkl', 'rb') as input:
# best = pickle.load(input)
def run(self):
start = time.time()
trials = Trials()
best = fmin(self._obj, self.model_param_space._build_space(), tpe.suggest, self.max_evals, trials)
best_params = space_eval(self.model_param_space._build_space(), best)
best_params = self.model_param_space._convert_int_param(best_params)
trial_rmses = np.asarray(trials.losses(), dtype=float)
best_ind = np.argmin(trial_rmses)
best_rmse_mean = trial_rmses[best_ind]
best_rmse_std = trials.trial_attachments(trials.trials[best_ind])["std"]
self.logger.info("-"*50)
self.logger.info("Best RMSE")
self.logger.info(" Mean: %.6f"%best_rmse_mean)
self.logger.info(" std: %.6f"%best_rmse_std)
self.logger.info("Best param")
self.task._print_param_dict(best_params)
end = time.time()
_sec = end - start
_min = int(_sec/60.)
self.logger.info("Time")
if _min > 0:
self.logger.info(" %d mins"%_min)
else:
self.logger.info(" %d secs"%_sec)
self.logger.info("-"*50)
#------------------------ Main -------------------------
def xgb2(train2, y, test2, v, z):
cname = sys._getframe().f_code.co_name
N_splits = 9
N_seeds = 4
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials, space_eval
dtrain = xgb.DMatrix(train2, y)
def step_xgb(params):
cv = xgb.cv(params=params,
dtrain=dtrain,
num_boost_round=10000,
early_stopping_rounds=100,
nfold=10,
seed=params['seed'])
score = cv.ix[len(cv)-1, 0]
print(cname, score, len(cv), params)
return dict(loss=score, status=STATUS_OK)
space_xgb = dict(
max_depth = hp.choice('max_depth', range(2, 8)),
subsample = hp.quniform('subsample', 0.6, 1, 0.05),
colsample_bytree = hp.quniform('colsample_bytree', 0.6, 1, 0.05),
learning_rate = hp.quniform('learning_rate', 0.005, 0.03, 0.005),
min_child_weight = hp.quniform('min_child_weight', 1, 6, 1),
gamma = hp.quniform('gamma', 0.5, 10, 0.05),
objective = 'binary:logistic',
eval_metric = 'logloss',
seed = 1,
silent = 1
)
trs = load_state(cname + '_trials')
if trs == None:
tr = Trials()
else:
tr, _ = trs
if len(tr.trials) > 0: print('reusing %d trials, best was:'%(len(tr.trials)), space_eval(space_xgb, tr.argmin))
for n in range(5):
best = fmin(step_xgb, space_xgb, algo=tpe.suggest, max_evals=len(tr.trials) + 1, trials = tr)
save_state(cname + '_trials', (tr, space_xgb))
xgb_params = space_eval(space_xgb, best)
print(xgb_params)
xgb_common(train2, y, test2, v, z, N_seeds, N_splits, cname, xgb_params)
def xgb2(train2, y, test2, v, z):
cname = sys._getframe().f_code.co_name
N_splits = 9
N_seeds = 4
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials, space_eval
dtrain = xgb.DMatrix(train2, y)
def step_xgb(params):
cv = xgb.cv(params=params,
dtrain=dtrain,
num_boost_round=10000,
early_stopping_rounds=100,
nfold=10,
seed=params['seed'])
score = cv.ix[len(cv)-1, 0]
print(cname, score, len(cv), params)
return dict(loss=score, status=STATUS_OK)
space_xgb = dict(
max_depth = hp.choice('max_depth', range(2, 8)),
subsample = hp.quniform('subsample', 0.6, 1, 0.05),
colsample_bytree = hp.quniform('colsample_bytree', 0.6, 1, 0.05),
learning_rate = hp.quniform('learning_rate', 0.005, 0.03, 0.005),
min_child_weight = hp.quniform('min_child_weight', 1, 6, 1),
gamma = hp.quniform('gamma', 0.5, 10, 0.05),
objective = 'binary:logistic',
eval_metric = 'logloss',
seed = 1,
silent = 1
)
trs = load_state(cname + '_trials')
if trs == None:
tr = Trials()
else:
tr, _ = trs
if len(tr.trials) > 0: print('reusing %d trials, best was:'%(len(tr.trials)), space_eval(space_xgb, tr.argmin))
for n in range(15):
best = fmin(step_xgb, space_xgb, algo=tpe.suggest, max_evals=len(tr.trials) + 1, trials = tr)
save_state(cname + '_trials', (tr, space_xgb))
xgb_params = space_eval(space_xgb, best)
print(xgb_params)
xgb_common(train2, y, test2, v, z, N_seeds, N_splits, cname, xgb_params)
def xgb2(train2, y, test2, v, z):
cname = sys._getframe().f_code.co_name
N_splits = 9
N_seeds = 4
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials, space_eval
dtrain = xgb.DMatrix(train2, y)
def step_xgb(params):
cv = xgb.cv(params=params,
dtrain=dtrain,
num_boost_round=10000,
early_stopping_rounds=100,
nfold=10,
seed=params['seed'])
score = cv.ix[len(cv)-1, 0]
print(cname, score, len(cv), params)
return dict(loss=score, status=STATUS_OK)
space_xgb = dict(
max_depth = hp.choice('max_depth', range(2, 8)),
subsample = hp.quniform('subsample', 0.6, 1, 0.05),
colsample_bytree = hp.quniform('colsample_bytree', 0.6, 1, 0.05),
learning_rate = hp.quniform('learning_rate', 0.005, 0.03, 0.005),
min_child_weight = hp.quniform('min_child_weight', 1, 6, 1),
gamma = hp.quniform('gamma', 0.5, 10, 0.05),
objective = 'binary:logistic',
eval_metric = 'logloss',
seed = 1,
silent = 1
)
trs = load_state(cname + '_trials')
if trs == None:
tr = Trials()
else:
tr, _ = trs
if len(tr.trials) > 0: print('reusing %d trials, best was:'%(len(tr.trials)), space_eval(space_xgb, tr.argmin))
for n in range(25):
best = fmin(step_xgb, space_xgb, algo=tpe.suggest, max_evals=len(tr.trials) + 1, trials = tr)
save_state(cname + '_trials', (tr, space_xgb))
xgb_params = space_eval(space_xgb, best)
print(xgb_params)
xgb_common(train2, y, test2, v, z, N_seeds, N_splits, cname, xgb_params)
def run(self):
trials = Trials()
Writer.create_empty(self.fpath)
Writer.append_line_list(self.fpath, [c for c in self.columns] + self.output_items)
best = fmin(self.score, self.space, algo=tpe.suggest, trials=trials, max_evals=self.max_evals)
print best
def optimize(self, hyper_param_iterations, old_trials=None):
hp_info = self.model_runner.hyper_parameter_info()
if old_trials is None:
trials = Trials()
else:
trials = old_trials
if hp_info.get('fixed') is True:
del hp_info['fixed']
result = self.model_runner.train_and_cv_error(self.features, hp_info)
trials.trials.append({
'result': result,
'misc': {'tid': 0, 'vals': hp_info},
})
return trials, hp_info
best_hps = fmin(self.to_minimize,
hp_info,
algo=tpe.suggest,
max_evals=hyper_param_iterations,
trials=trials)
filtered_hps = {}
for hp in self.model_runner.hyper_parameter_info():
try:
filtered_hps[hp] = best_hps[hp]
except KeyError:
filtered_hps[hp] = None
return trials, filtered_hps
def xgb3(train2, y, test2, v, z):
cname = sys._getframe().f_code.co_name
N_splits = 9
N_seeds = 4
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials, space_eval
dtrain = xgb.DMatrix(train2, y)
def step_xgb(params):
cv = xgb.cv(params=params,
dtrain=dtrain,
num_boost_round=10000,
early_stopping_rounds=100,
nfold=10,
seed=params['seed'])
score = cv.ix[len(cv)-1, 0]
print(cname, score, len(cv), params)
return dict(loss=score, status=STATUS_OK)
space_xgb = dict(
max_depth = hp.choice('max_depth', range(2, 8)),
subsample = hp.quniform('subsample', 0.6, 1, 0.05),
colsample_bytree = hp.quniform('colsample_bytree', 0.6, 1, 0.05),
learning_rate = hp.quniform('learning_rate', 0.005, 0.03, 0.005),
min_child_weight = hp.quniform('min_child_weight', 1, 6, 1),
gamma = hp.quniform('gamma', 0, 10, 0.05),
alpha = hp.quniform('alpha', 0.0, 1, 0.0001),
objective = 'binary:logistic',
eval_metric = 'logloss',
seed = 1,
silent = 1
)
trs = load_state(cname + '_trials')
if trs == None:
tr = Trials()
else:
tr, _ = trs
if len(tr.trials) > 0: print('reusing %d trials, best was:'%(len(tr.trials)), space_eval(space_xgb, tr.argmin))
for n in range(25):
best = fmin(step_xgb, space_xgb, algo=tpe.suggest, max_evals=len(tr.trials) + 1, trials = tr)
save_state(cname + '_trials', (tr, space_xgb))
xgb_params = space_eval(space_xgb, best)
print(xgb_params)
xgb_common(train2, y, test2, v, z, N_seeds, N_splits, cname, xgb_params)
def tune(self, train_X, train_y, test_X, max_evals=2500):
self.train_X = train_X
self.train_y = train_y.reshape(len(train_y),)
self.test_X = test_X
np.random.seed(0)
trials = Trials()
params = self.optimize(trials, max_evals=max_evals)
# Average of best iteration 64.5
# Score 0.6018852
# best parameters {'colsample_bytree': 0.6000000000000001, 'min_child_weight': 7.0, 'subsample': 0.9, 'eta': 0.2, 'max_depth': 6.0, 'gamma': 0.9}
# best parameters {'colsample_bytree': 0.55, 'learning_rate': 0.03,
# 'min_child_weight': 9.0, 'n_estimators': 580.0,
# 'subsample': 1.0, 'eta': 0.2, 'max_depth': 7.0, 'gamma': 0.75}
# best params : 2
# {'colsample_bytree': 0.45, 'eta': 0.2,
# 'gamma': 0.9500000000000001, 'learning_rate': 0.04,
# 'max_depth': 6.0, 'min_child_weight': 9.0,
# 'n_estimators': 750.0, 'subsample': 1.84}
# Adapt best params
# params = {'objective': 'multi:softprob',
# 'eval_metric': 'mlogloss',
# 'colsample_bytree': 0.55,
# 'min_child_weight': 9.0,
# 'subsample': 1.0,
# 'learning_rate': 0.03,
# 'eta': 0.2,
# 'max_depth': 7.0,
# 'gamma': 0.75,
# 'num_class': 2,
# 'n_estimators': 580.0
# }
params_result = self.score(params)
# Training with params :
# train-mlogloss:0.564660 eval-mlogloss:0.608842
# Average of best iteration 32.0
# Score 0.6000522
return params, params_result