applications.peptide.binding.py 文件源码-python代码片段

def cross_validation_spectrum_kernel(seq, is_train, folds):
    # Cross-validation
    # Repeat for each combination of candidate hyperparameter values
    substring_length = 3
    C_values = [0.01, 0.1, 1.0, 10., 100.]

    best_result = {"score": -np.infty}

    seq_vec = sequences_to_vec(seq, substring_length)
    K = np.dot(seq_vec, seq_vec.T)
    K_train = K[is_train][:, is_train]
    K_test = K[~is_train][:, is_train]
    y_train = labels[is_train]
    y_test = labels[~is_train]

    for C in C_values:

        print "Parameters: C: {0:.4f}".format(C)

        # Cross-validation
        fold_scores = []
        for fold in np.unique(folds):
            print "...Fold {0:d}".format(fold + 1)
            fold_K_train = K_train[folds != fold][:, folds != fold]
            fold_K_test = K_train[folds == fold][:, folds != fold]
            fold_y_train = y_train[folds != fold]
            fold_y_test = y_train[folds == fold]
            fold_estimator = SupportVectorRegression(kernel="precomputed", C=C)
            fold_estimator.fit(fold_K_train.copy(), fold_y_train)
            fold_scores.append(fold_estimator.score(fold_K_test, fold_y_test))
        cv_score = np.mean(fold_scores)
        print "...... cv score:", cv_score

        if cv_score > best_result["score"]:
            best_result["score"] = cv_score
            best_result["K"] = dict(train=K_train, test=K_test, full=K)
            best_result["estimator"] = SupportVectorRegression(kernel="precomputed", C=C).fit(K_train, y_train)
            best_result["hps"] = dict(C=C)

        print
        print
    return best_result