def tune(insights, x_train, y_train, x_test, y_test, models='all', requirements=None, maximize=False):
if requirements is None:
requirements = requirements_bare_minimum(y_train)
# do vanilla models satisfy the requirements?
# assuming decision tree is the most intuitive, then logistic regression and then random forest
# TODO: extend this to metrics other than accuracy using the confusion matrix
for model_name in ['dt', 'lr', 'rf']:
model_insights = insights[model_name]
model_variation = np.std(model_insights['accuracy_folds'])
if check_requirements(model_insights, requirements) and not maximize:
pass
# TODO: turn this back on
# return model_name
# model selection and tuning loop
models_to_train = []
if models == 'all':
models_to_train += models_linear + models_nonlinear_cheap + models_nonlinear_expensive
elif models == 'linear':
models_to_train += models_online
elif models_to_train == 'cheap':
models_to_train += models_linear + models_nonlinear_cheap
# TODO: using all of the training data, need to use less data if runtime for insights models is large (how large?)
for model in models_to_train:
# TODO: add the looping logic
if model == LogisticRegression:
number_configurations = np.prod(np.array([len(_) for _ in hyperparameters[model]]))
random_search_iterations = np.min([random_search_iterations_max, number_configurations])
random_search = RandomizedSearchCV(model(n_jobs=-1, random_state=random_state),
param_distributions=hyperparameters[model], n_iter=random_search_iterations, n_jobs=-1, random_state=0)
runtime = time()
random_search.fit(x_train, y_train)
runtime = time() - runtime
info = dict()
info['runtime'] = runtime
# info['accuracy'] = min(scores)
# info['accuracy_test'] = accuracy_score(y_test, y_test_predicted)
# info['accuracy_folds'] = scores
# info['confusion_matrix'] = confusion_matrix(y_test, y_test_predicted)
# clf.fit(x_train, y_train)
# fpr, tpr, _ = roc_curve(y_test, clf_predict_proba(clf, x_test))
# info['fpr'] = fpr
# info['tpr'] = tpr
# info['auc'] = auc(fpr, tpr)
return random_search
return None
评论列表
文章目录