tuning.py 文件源码-python代码片段

def tune(insights, x_train, y_train, x_test, y_test, models='all', requirements=None, maximize=False):
    if requirements is None:
        requirements = requirements_bare_minimum(y_train)

    # do vanilla models satisfy the requirements?
    # assuming decision tree is the most intuitive, then logistic regression and then random forest
    # TODO: extend this to metrics other than accuracy using the confusion matrix
    for model_name in ['dt', 'lr', 'rf']:
        model_insights = insights[model_name]
        model_variation = np.std(model_insights['accuracy_folds'])

        if check_requirements(model_insights, requirements) and not maximize:
            pass
            # TODO: turn this back on
            # return model_name

    # model selection and tuning loop
    models_to_train = []

    if models == 'all':
        models_to_train += models_linear + models_nonlinear_cheap + models_nonlinear_expensive
    elif models == 'linear':
        models_to_train += models_online
    elif models_to_train == 'cheap':
        models_to_train += models_linear + models_nonlinear_cheap

    # TODO: using all of the training data, need to use less data if runtime for insights models is large (how large?)
    for model in models_to_train:
        # TODO: add the looping logic
        if model == LogisticRegression:
            number_configurations = np.prod(np.array([len(_) for _ in hyperparameters[model]]))
            random_search_iterations = np.min([random_search_iterations_max, number_configurations])
            random_search = RandomizedSearchCV(model(n_jobs=-1, random_state=random_state),
                param_distributions=hyperparameters[model], n_iter=random_search_iterations, n_jobs=-1, random_state=0)
            runtime = time()
            random_search.fit(x_train, y_train)
            runtime = time() - runtime

            info = dict()
            info['runtime'] = runtime
            # info['accuracy'] = min(scores)
            # info['accuracy_test'] = accuracy_score(y_test, y_test_predicted)
            # info['accuracy_folds'] = scores
            # info['confusion_matrix'] = confusion_matrix(y_test, y_test_predicted)
            # clf.fit(x_train, y_train)
            # fpr, tpr, _ = roc_curve(y_test, clf_predict_proba(clf, x_test))
            # info['fpr'] = fpr
            # info['tpr'] = tpr
            # info['auc'] = auc(fpr, tpr)

            return random_search

    return None