learning_rfc.py 文件源码-python代码片段

def loo_proba(x, y, clf_used='rf', use_pca=False, params=None):
    """Perform leave-one-out

    Parameters
    ----------
    x : np.array
        features
    y : np.array
        labels
    clf_used : str
        classifier
    use_pca : bool
        perform principal component analysis on features x in advance
    params : dict
        parameter for classifier

    Returns
    -------
    np.array, np.array
        class probability, hard classification
    """
    # print "Performing LOO with %s and %d features. Using PCA: %s" % \
    #       (clf_used, x.shape[1], str(use_pca))
    if use_pca:
        old_dim = x.shape[1]
        pca = PCA(n_components=0.999)
        x = pca.fit_transform(x)
        # print pca.explained_variance_ratio_
        # print "Reduced feature space dimension %d, instead of %d" % (x.shape[1],
        #                                                              old_dim)
    nans_in_X = np.sum(np.isnan(x))
    if nans_in_X > 0:
        # print np.where(np.isnan(x))
        # print "Found %d nans in features, converting to number." % nans_in_X
        x = np.nan_to_num(x)
    loo = cross_validation.LeaveOneOut(len(x))
    shape = (len(x), len(list(set(y))))
    prob = np.zeros(shape, dtype=np.float)
    pred = np.zeros((len(x), 1), dtype=np.int)
    cnt = 0
    # print "rf params:", rf_params
    for train_ixs, test_ixs in loo:
        x_train = x[train_ixs]
        x_test = x[test_ixs]
        y_train = y[train_ixs]
        clf = init_clf(clf_used, params)
        clf.fit(x_train, y_train)
        prob[cnt] = clf.predict_proba(x_test)
        pred[cnt] = clf.predict(x_test)
        np.set_printoptions(formatter={'float': '{: 0.3f}'.format})
        # if pred[cnt] == y[test_ixs]:
        #     print test_ixs, "\t", prob[cnt], pred[cnt], y[test_ixs]
        # else:
        #     print test_ixs, "\t", prob[cnt], pred[cnt], y[test_ixs], "\t WRONG"
        cnt += 1
    return prob, pred