python类precision_recall_curve()的实例源码-面圈网

mainPEP.py 文件源码项目：PEP 作者: ma-compbio 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def threshold_estimate(x,y):
    x_train, x_test, y_train, y_test = cross_validation.train_test_split(x, y, test_size=0.1, random_state=0)
    weight = float(len(y_train[y_train == 0]))/float(len(y_train[y_train == 1]))
    w1 = np.array([1]*y_train.shape[0])
    w1[y_train==1]=weight
    print("samples: %d %d %f" % (x_train.shape[0], x_test.shape[0], weight))
    estimator = xgb.XGBClassifier(max_depth=10, learning_rate=0.1, n_estimators=1000, nthread=50)
    estimator.fit(x_train, y_train, sample_weight=w1)
    y_scores = estimator.predict_proba(x_test)[:,1]
    precision, recall, thresholds = precision_recall_curve(y_test, y_scores)
    f1 = 2*precision[2:]*recall[2:]/(precision[2:]+recall[2:])
    m_idx = np.argmax(f1)
    m_thresh = thresholds[2+m_idx]
    print("%d %f %f" % (precision.shape[0], f1[m_idx], m_thresh))
    return m_thresh

# Estimate threshold for the classifier using inner-round cross validation

generic_classifier.py 文件源码项目：2020plus 作者: KarchinLab 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def _update_tsg_metrics(self, y_true, y_pred, prob):
        self.tsg_gene_pred = pd.Series(y_pred, self.y.index)
        self.tsg_gene_score = pd.Series(prob, self.y.index)

        # compute metrics for classification
        self.tsg_gene_count[self.num_pred] = sum(y_pred)
        prec, recall, fscore, support = metrics.precision_recall_fscore_support(y_true, y_pred)
        tsg_col = 1  # column for metrics relate to tsg
        self.tsg_precision[self.num_pred] = prec[tsg_col]
        self.tsg_recall[self.num_pred] = recall[tsg_col]
        self.tsg_f1_score[self.num_pred] = fscore[tsg_col]
        self.logger.debug('Tsg Iter %d: Precission=%s, Recall=%s, f1_score=%s' % (
                          self.num_pred + 1, str(prec), str(recall), str(fscore)))

        # compute ROC curve metrics
        fpr, tpr, thresholds = metrics.roc_curve(y_true, prob)
        self.tsg_tpr_array[self.num_pred, :] = interp(self.tsg_fpr_array, fpr, tpr)
        #self.tsg_tpr_array[0] = 0.0

        # compute Precision-Recall curve metrics
        p, r, thresh = metrics.precision_recall_curve(y_true, prob)
        p, r, thresh = p[::-1], r[::-1], thresh[::-1]  # reverse order of results
        self.tsg_precision_array[self.num_pred, :] = interp(self.tsg_recall_array, r, p)

metrics.py 文件源码项目：qtim_ROP 作者: QTIM-Lab 项目源码文件源码阅读 18 收藏 0 点赞 0 评论 0

def plot_PR_by_class(y_pred, y_true, classes, out_path):

    best_thresh = {}
    for class_name, c in classes.items():  # for each class

        # Compute ROC curve
        precision, recall, thresholds = precision_recall_curve(y_true[:, c], y_pred[:, c])
        pr_auc = auc(recall, precision)

        # Plot PR curve
        plt.plot(recall, precision, label='{}, AUC = {:.3f}'.format(class_name, pr_auc))

        # Calculate J statistic
        J = [j_statistic(y_true, y_pred, t) for t in thresholds]
        j_best = np.argmax(J)

        # Store best threshold for each class
        best_thresh[class_name] = J[j_best]

    return best_thresh

knock79.py 文件源码项目：100knock2017 作者: tmu-nlp 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def cv(feature_dict, feature, polarity, folds):
    kfold = KFold(len(polarity), n_folds = folds)
    count, f1, recall, precision, accuracy = 0, 0, 0, 0, 0
    for train, test in kfold:
        LR = LogisticRegression()
        count += 1
        x = [(feature[i]) for i in train]
        y = [(polarity[i])for i in train]
        LR.fit(scipy.sparse.vstack(x), (y))

        test_label = []
        answer_label = [(polarity[j]) for j in test]
        for j in test:
            query = feature[j]
            result = -1 if query.shape[1] != len(feature_dict) else predict(LR, query)
            test_label.append(result[1][1])
        pre, rec, thr = precision_recall_curve(answer_label, test_label)
        return pre, rec, thr
    return accuracy, precision, recall, f1

distributions.py 文件源码项目：hco-experiments 作者: zooniverse 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def sklearn_purity_completeness(score_export):
    golds, probs = zip(*score_export.roc())
    golds = np.array(golds)
    probs = np.array(probs)

    purity, completeness, _ = precision_recall_curve(golds, probs)

    plt.clf()
    plt.plot(completeness, purity, lw=2, color='navy',
             label='Precision-Recall curve')
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.ylim([0.0, 1.05])
    plt.xlim([0.0, 1.0])
    # plt.title('Precision-Recall example: AUC={0:0.2f}'.format(average_precision[0]))
    plt.legend(loc="lower left")
    # plt.show()

test_ranking.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 19 收藏 0 点赞 0 评论 0

def test_precision_recall_curve():
    y_true, _, probas_pred = make_prediction(binary=True)
    _test_precision_recall_curve(y_true, probas_pred)

    # Use {-1, 1} for labels; make sure original labels aren't modified
    y_true[np.where(y_true == 0)] = -1
    y_true_copy = y_true.copy()
    _test_precision_recall_curve(y_true, probas_pred)
    assert_array_equal(y_true_copy, y_true)

    labels = [1, 0, 0, 1]
    predict_probas = [1, 2, 3, 4]
    p, r, t = precision_recall_curve(labels, predict_probas)
    assert_array_almost_equal(p, np.array([0.5, 0.33333333, 0.5, 1., 1.]))
    assert_array_almost_equal(r, np.array([1., 0.5, 0.5, 0.5, 0.]))
    assert_array_almost_equal(t, np.array([1, 2, 3, 4]))
    assert_equal(p.size, r.size)
    assert_equal(p.size, t.size + 1)

test_ranking.py 文件源码项目：Parallel-SGD 作者: angadgill 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def _test_precision_recall_curve(y_true, probas_pred):
    # Test Precision-Recall and aread under PR curve
    p, r, thresholds = precision_recall_curve(y_true, probas_pred)
    precision_recall_auc = auc(r, p)
    assert_array_almost_equal(precision_recall_auc, 0.85, 2)
    assert_array_almost_equal(precision_recall_auc,
                              average_precision_score(y_true, probas_pred))
    assert_almost_equal(_average_precision(y_true, probas_pred),
                        precision_recall_auc, 1)
    assert_equal(p.size, r.size)
    assert_equal(p.size, thresholds.size + 1)
    # Smoke test in the case of proba having only one value
    p, r, thresholds = precision_recall_curve(y_true,
                                              np.zeros_like(probas_pred))
    precision_recall_auc = auc(r, p)
    assert_array_almost_equal(precision_recall_auc, 0.75, 3)
    assert_equal(p.size, r.size)
    assert_equal(p.size, thresholds.size + 1)

task8.py 文件源码项目：Machine-Learning 作者: elluscinia 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def drawGraphsPeriod(data, start, end, date):
    '''
    ??????? ?????? ?????? ????????-??????? ?? ??????
    :param data: ?????? ??
    :param start: ?????? ?????
    :param end: ????? ?????
    :param date: ????
    :param return: ?????? ?? ??????????
    '''
    plt.clf()

    for i in xrange(3, 4):
        actual, predictions = getData(list(data['p' + str(i) + '_Fraud'][start:end]), list(data['CLASS'][start:end]))

        precision, recall, thresholds = precision_recall_curve(actual, predictions)

        plt.plot(recall, precision, label='%s PRC' % ('p' + str(i) + '_Fraud'))

    plt.title('Precision-recall curve for ' + str((date - datetime.timedelta(days=1)).strftime('%Y/%m/%d')))
    plt.legend(loc='lower right', fontsize='small')
    plt.xlim([0.0,1.0])
    plt.ylim([0.0,1.0])
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.show()

recognition_utils.py 文件源码项目：pybot 作者: spillai 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def multilabel_precision_recall(y_score, y_test, clf_target_ids, clf_target_names): 
    from sklearn.metrics import precision_recall_curve
    from sklearn.metrics import average_precision_score
    from sklearn.preprocessing import label_binarize

    # Compute Precision-Recall and plot curve
    precision = dict()
    recall = dict()
    average_precision = dict()

    # Find indices that have non-zero detections
    clf_target_map = { k: v for k,v in zip(clf_target_ids, clf_target_names)}
    id2ind = {tid: idx for (idx,tid) in enumerate(clf_target_ids)}

    # Only handle the targets encountered
    unique = np.unique(y_test)
    nzinds = np.int64([id2ind[target] for target in unique])

    # Binarize and create precision-recall curves
    y_test_multi = label_binarize(y_test, classes=unique)
    for i,target in enumerate(unique):
        index = id2ind[target]
        name = clf_target_map[target]
        precision[name], recall[name], _ = precision_recall_curve(y_test_multi[:, i],
                                                                  y_score[:, index])
        average_precision[name] = average_precision_score(y_test_multi[:, i], y_score[:, index])

    # Compute micro-average ROC curve and ROC area
    precision["average"], recall["average"], _ = precision_recall_curve(y_test_multi.ravel(),
                                                                        y_score[:,nzinds].ravel())
    average_precision["micro"] = average_precision_score(y_test_multi, y_score[:,nzinds],
                                                         average="micro") 
    average_precision["macro"] = average_precision_score(y_test_multi, y_score[:,nzinds],
                                                         average="macro") 
    return precision, recall, average_precision

classify.py 文件源码项目：ISM2017 作者: ybayle 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def plot_precision_recall(indir, gts_file, outdir):
    groundtruths = read_item_tag(gts_file)
    plt.figure(1)

    indir = utils.abs_path_dir(indir)
    for item in os.listdir(indir):
        if ".csv" in item:
            isrcs = read_preds(indir + "/" + item)
            test_groundtruths = []
            predictions = []
            for isrc in isrcs:
                if isrc in groundtruths:
                    test_groundtruths.append(groundtruths[isrc])
                    predictions.append(isrcs[isrc])
            test_groundtruths = [tag=="s" for tag in test_groundtruths]
            precision, recall, _ = precision_recall_curve(test_groundtruths, predictions)
            plt.plot(recall, precision, label=item[:-4] + " (" + str(round(average_precision_score(test_groundtruths, predictions), 3)) + ")")

    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.ylim([0.0, 1.05])
    plt.xlim([-0.05, 1.05])
    plt.title('Precision-Recall curve for Algo (AUC)')
    plt.legend(loc='best')
    plt.savefig(outdir + "precision_recall.png", dpi=200, bbox_inches="tight")
    # plt.show()
    plt.close()
    utils.print_success("Precision-Recall curve created in " + outdir)

util.py 文件源码项目：code-uai16 作者: thanhan 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def plot_pr(gold, predicted_prob, lb):
    pp1 = predicted_prob[:,1] # prob for class 1
    p, r, th = precision_recall_curve(gold, pp1)
    ap = average_precision_score(gold, pp1)
    plt.plot(r, p, label= lb + ' (area = {0:0.2f})'
                   ''.format(ap))

    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Precision and Recall')
    plt.legend(loc="upper right")
    #plt.show()

1.py 文件源码项目：sentiment-analysis 作者: lplping 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def Precision(clf):
    doc_class_predicted = clf.predict(x_test) 
    print(np.mean(doc_class_predicted == y_test))#?????????
    #???????  
    precision, recall, thresholds = precision_recall_curve(y_test, clf.predict(x_test))  
    answer = clf.predict_proba(x_test)[:,1]  
    report = answer > 0.5  
    print(classification_report(y_test, report, target_names = ['neg', 'pos']))
    print("--------------------")
    from sklearn.metrics import accuracy_score
    print('???: %.2f' % accuracy_score(y_test, doc_class_predicted))

evaluator.py 文件源码项目：bionlp17 作者: leebird 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def generate_prec_recall_points(clf, test_examples, test_labels, pk_file):
    # Generate precision-recall points and store in a pickle file.

    precision = dict()
    recall = dict()
    average_precision = dict()
    thresholds = dict()

    n_classes = len(clf.model.classes_)
    y_test = label_binarize(test_labels, clf.model.classes_)

    y_score = clf.predict_raw_prob(test_examples)
    # It only output 1 column of positive probability.
    y_score = y_score[:, 1:]

    for i in range(n_classes - 1):
        precision[i], recall[i], thresholds[i] = precision_recall_curve(
            y_test[:, i],
            y_score[:, i])
        average_precision[i] = average_precision_score(y_test[:, i],
                                                       y_score[:, i])
    # Compute micro-average ROC curve and ROC area
    precision["micro"], recall["micro"], thresholds['micro'] = \
        precision_recall_curve(y_test.ravel(), y_score.ravel())
    average_precision["micro"] = average_precision_score(y_test, y_score,
                                                         average="micro")

    if pk_file is not None:
        with open(pk_file, 'wb') as f:
            pickle.dump((precision, recall, average_precision, thresholds), f)

davoli_performance.py 文件源码项目：2020plus 作者: KarchinLab 项目源码文件源码阅读 80 收藏 0 点赞 0 评论 0

def calc_pr_metrics(truth_df, score_df):
    recall_array = np.linspace(0, 1, 100)
    p, r, thresh = metrics.precision_recall_curve(truth_df, score_df)
    p, r, thresh = p[::-1], r[::-1], thresh[::-1]  # reverse order of results
    thresh = np.insert(thresh, 0, 1.0)
    precision_array = interp(recall_array, r, p)
    threshold_array = interp(recall_array, r, thresh)
    pr_auc = metrics.auc(recall_array, precision_array)
    return precision_array, recall_array, pr_auc

pr_curve.py 文件源码项目：2020plus 作者: KarchinLab 项目源码文件源码阅读 16 收藏 0 点赞 0 评论 0

def calc_pr_metrics(truth_df, score_df):
    recall_array = np.linspace(0, 1, 100)
    p, r, thresh = metrics.precision_recall_curve(truth_df, score_df)
    p, r, thresh = p[::-1], r[::-1], thresh[::-1]  # reverse order of results
    thresh = np.insert(thresh, 0, 1.0)
    precision_array = interp(recall_array, r, p)
    threshold_array = interp(recall_array, r, thresh)
    pr_auc = metrics.auc(recall_array, precision_array)
    return precision_array, recall_array, pr_auc

generic_classifier.py 文件源码项目：2020plus 作者: KarchinLab 项目源码文件源码阅读 22 收藏 0 点赞 0 评论 0

def _update_metrics(self, y_true, y_pred,
                        onco_prob, tsg_prob):
        # record which genes were predicted what
        self.driver_gene_pred = pd.Series(y_pred, self.y.index)
        self.driver_gene_score = pd.Series(onco_prob+tsg_prob, self.y.index)

        # evaluate performance
        prec, recall, fscore, support = metrics.precision_recall_fscore_support(y_true, y_pred,
                                                                                average='macro')
        cancer_gene_pred = ((onco_prob + tsg_prob)>.5).astype(int)
        self.cancer_gene_count[self.num_pred] = np.sum(cancer_gene_pred)
        self.precision[self.num_pred] = prec
        self.recall[self.num_pred] = recall
        self.f1_score[self.num_pred] = fscore

        # compute Precision-Recall curve metrics
        driver_prob = onco_prob + tsg_prob
        driver_true = (y_true > 0).astype(int)
        p, r, thresh = metrics.precision_recall_curve(driver_true, driver_prob)
        p, r, thresh = p[::-1], r[::-1], thresh[::-1]  # reverse order of results
        thresh = np.insert(thresh, 0, 1.0)
        self.driver_precision_array[self.num_pred, :] = interp(self.driver_recall_array, r, p)
        self.driver_threshold_array[self.num_pred, :] = interp(self.driver_recall_array, r, thresh)

        # calculate prediction summary statistics
        prec, recall, fscore, support = metrics.precision_recall_fscore_support(driver_true, cancer_gene_pred)
        self.driver_precision[self.num_pred] = prec[1]
        self.driver_recall[self.num_pred] = recall[1]

        # save driver metrics
        fpr, tpr, thresholds = metrics.roc_curve(driver_true, driver_prob)
        self.driver_tpr_array[self.num_pred, :] = interp(self.driver_fpr_array, fpr, tpr)

generic_classifier.py 文件源码项目：2020plus 作者: KarchinLab 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def _update_onco_metrics(self, y_true, y_pred, prob):
        self.onco_gene_pred = pd.Series(y_pred, self.y.index)
        self.onco_gene_score = pd.Series(prob, self.y.index)

        # compute metrics for classification
        self.onco_gene_count[self.num_pred] = sum(y_pred)
        prec, recall, fscore, support = metrics.precision_recall_fscore_support(y_true, y_pred)
        self.onco_precision[self.num_pred] = prec[self.onco_num]
        self.onco_recall[self.num_pred] = recall[self.onco_num]
        self.onco_f1_score[self.num_pred] = fscore[self.onco_num]
        self.logger.debug('Onco Iter %d: Precission=%s, Recall=%s, f1_score=%s' % (
                          self.num_pred + 1, str(prec), str(recall), str(fscore)))

        # compute ROC curve metrics
        fpr, tpr, thresholds = metrics.roc_curve(y_true, prob)
        self.onco_tpr_array[self.num_pred, :] = interp(self.onco_fpr_array, fpr, tpr)
        #self.onco_mean_tpr[0] = 0.0

        # compute Precision-Recall curve metrics
        p, r, thresh = metrics.precision_recall_curve(y_true, prob)
        p, r, thresh = p[::-1], r[::-1], thresh[::-1]  # reverse order of results
        thresh = np.insert(thresh, 0, 1.0)
        self.onco_precision_array[self.num_pred, :] = interp(self.onco_recall_array, r, p)
        self.onco_threshold_array[self.num_pred, :] = interp(self.onco_recall_array, r, thresh)

vwoptimize.py 文件源码项目：vwoptimize 作者: denik 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def recall_at_precision(*args, **kwargs):
    from sklearn.metrics import precision_recall_curve
    metric_param = kwargs.pop('metric_param')
    required_precision = _parse_number_or_fraction(metric_param)
    precision, recall, thresholds = precision_recall_curve(*args, **kwargs)

    for pr, r in izip(precision, recall):
        if pr >= required_precision:
            return r

auc_pr_roc.py 文件源码项目：script 作者: 9468305 项目源码文件源码阅读 20 收藏 0 点赞 0 评论 0

def auc_pr(real_csv, result_csv):
    '''??real.csv?result.csv??????PR???AUC?'''
    label, prob = load_label_prob(real_csv, result_csv)
    precision, recall, _thresholds = metrics.precision_recall_curve(label, prob)
    area = metrics.auc(recall, precision)
    #print(area)
    return area

evaluate.py 文件源码项目：relation_classification 作者: hxy8149989 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def save_prcurve(prob, answer, model_name, save_fn, use_neg=True):
    """
    save prc curve
    """
    if not use_neg:
        prob_dn = []
        ans_dn = []
        for p in prob:
            prob_dn.append(p[1:])
        for ans in answer:
            ans_dn.append(ans[1:])
        prob = np.reshape(np.array(prob_dn), (-1))
        ans = np.reshape(np.array(ans_dn), (-1))
    else:
        prob = np.reshape(prob, (-1))
        ans = np.reshape(answer, (-1))

    precision, recall, threshold = precision_recall_curve(ans, prob)
    average_precision = average_precision_score(ans, prob)

    plt.clf()
    plt.plot(recall[:], precision[:], lw=2, color='navy', label=model_name)
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    # plt.ylim([0.3, 1.0])
    # plt.xlim([0.0, 0.4])
    plt.title('Precision-Recall Area={0:0.2f}'.format(average_precision))
    plt.legend(loc="upper right")
    plt.grid(True)
    plt.savefig(save_fn)

mainPEP.py 文件源码项目：PEP 作者: ma-compbio 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def threshold_estimate_cv(x,y,k_fold):
    print "%d %d %d" % (y.shape[0], sum(y==1), sum(y==0))
    kf1 = StratifiedKFold(y, n_folds=k_fold, shuffle=True, random_state=0)
    threshold = np.zeros((k_fold),dtype="float32")
    cnt = 0
    for train_index, test_index in kf1:
        x_train, x_test = x[train_index], x[test_index]
        y_train, y_test = y[train_index], y[test_index]

        w1 = np.array([1]*y_train.shape[0])
        weight = float(len(y_train[y_train == 0]))/float(len(y_train[y_train == 1]))
        w1 = np.array([1]*y_train.shape[0])
        w1[y_train==1]=weight

        estimator = xgb.XGBClassifier(max_depth=10, learning_rate=0.1, n_estimators=1000, nthread=50)
        estimator.fit(x_train, y_train, sample_weight=w1)
        y_scores = estimator.predict_proba(x_test)[:,1]
        precision, recall, thresholds = precision_recall_curve(y_test, y_scores)
        f1 = 2*precision[2:]*recall[2:]/(precision[2:]+recall[2:])
        m_idx = np.argmax(f1)
        threshold[cnt] = thresholds[2+m_idx]
        cnt += 1
        print("%d %f %f" % (precision.shape[0], f1[m_idx], thresholds[2+m_idx]))
    return np.mean(threshold), threshold

# Cross validation using gradient tree boosting

base.py 文件源码项目：mappings-autogeneration 作者: dbpedia 项目源码文件源码阅读 19 收藏 0 点赞 0 评论 0

def scores(self, mdl):
        scores = mdl._scores(self.ss, self.ps, self.os)
        pr, rc, _ = precision_recall_curve(self.ys, scores)
        roc = roc_auc_score(self.ys, scores)
        return auc(rc, pr), roc

crat.py 文件源码项目：enhancement 作者: lwzswufe 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def classify(y, x, test_y, test_x):
    global data_df, factor_name, left, right, feature, ratio, threshold
    y_c = np.zeros(len(y))
    y_c[y > 0.02] = 1
    y_c[y < -0.02] = -1
    min_n = int(0.05 * len(y))
    clf = DecisionTreeClassifier(max_depth=4, min_samples_leaf=min_n)
    clf.fit(x, y_c)
    y_p = clf.predict(x)
    fname = "D:\\Cache\\tree.txt"
    test_y = y
    with open(fname, 'w') as f:
        tree.export_graphviz(clf, out_file=f)
        f.close()
    factor_exchange(factor_name, fname)
    left = clf.tree_.children_left
    right = clf.tree_.children_right
    feature = clf.tree_.feature
    threshold = clf.tree_.threshold
    disp_tree()
    # precision, recall, thresholds = precision_recall_curve(y_c, clf.predict(x))
    '''''???????'''
    print("mean income is:", str(np.average(test_y)),
          "\nwin ratio is: ", str(np.sum(test_y > 0) / len(test_y)))
    print("after training\n"
          "mean class_1 is: ", str(np.average(test_y[y_p > 0])),
          "\nwin ratio is: ", str(np.sum(test_y[y_p > 0] > 0) / np.sum(y_p > 0)),
          "\ntotal class_1 is:", str(np.sum(np.sum(y_p > 0))),
          "\nmean class_0 is: ", str(np.average(test_y[y_p < 0])))

custom_classifiers.py 文件源码项目：emnlp2016 作者: stephenroller 项目源码文件源码阅读 19 收藏 0 点赞 0 评论 0

def fit(self, X, y):
        feature = X[:,0]
        p, r, t = precision_recall_curve(y, feature)
        #nonzero = (p > 0) & (r > 0)
        #p, r, t = p[nonzero], r[nonzero], t[nonzero[1:]]
        f1 = np.divide(2 * np.multiply(p, r), p + r)
        f1[np.isnan(f1)] = -1.0
        self.threshold_ = t[f1.argmax()]

dcpg_eval_perf.py 文件源码项目：deepcpg 作者: cangermueller 项目源码文件源码阅读 19 收藏 0 点赞 0 评论 0

def get_curve_fun(name):
    """Return performance curve function by its name."""
    if name == 'roc':
        return skm.roc_curve
    elif name == 'pr':
        return skm.precision_recall_curve
    else:
        raise ValueError('Invalid performance curve "%s"!' % name)

precision_recall.py 文件源码项目：scikit-viz 作者: fernavid 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def plot_precision_recall(y, y_pred, spacing=0.2):
    precision, recall, thresholds = precision_recall_curve(y, y_pred)
    roc_auc = auc(recall, precision)

    plt.figure(figsize=(10,10))
    plt.title('Precision vs Recall Curve', fontsize=18)
    plt.plot(recall, precision, 'b', label='AUC = %0.2f'% roc_auc)
    plt.legend(loc='lower right')
    plt.xlim([-0.1,1.2])
    plt.ylim([-0.1,1.2])
    plt.ylabel('Precision', fontsize=16)
    plt.xlabel('Recall', fontsize=16)

    acc = 0
    euc = spacing
    lx = 0 
    ly = 0
    for idx, t in enumerate(thresholds):
        if acc >= spacing or idx == len(thresholds)-1:
            plt.text(recall[idx], 
                     precision[idx], 
                     '%0.2f' % t, 
                     backgroundcolor='lightgray', 
                     color='black')
            acc = 0
        else:
            acc += euc

        euc = ((recall[idx] - lx)**2 + (precision[idx] - ly)**2)**0.5
        lx = recall[idx]
        ly = precision[idx]

    plt.show()

experiment.py 文件源码项目：kge-server 作者: vfrico 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def scores(self, mdl):
        scores = mdl._scores(self.ss, self.ps, self.os)
        pr, rc, _ = precision_recall_curve(self.ys, scores)
        roc = roc_auc_score(self.ys, scores)
        return auc(rc, pr), roc

model_eval.py 文件源码项目：healthcareai-py 作者: HealthCatalyst 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def compute_pr(y_test, probability_predictions):
    """
    Compute Precision-Recall, thresholds and PR AUC.

    Args:
        y_test (list) : true label values corresponding to the predictions. Also length n.
        probability_predictions (list) : predictions coming from an ML algorithm of length n.

    Returns:
        dict: 

    """
    _validate_predictions_and_labels_are_equal_length(probability_predictions, y_test)

    # Calculate PR
    precisions, recalls, pr_thresholds = skmetrics.precision_recall_curve(y_test, probability_predictions)
    pr_auc = skmetrics.average_precision_score(y_test, probability_predictions)

    # get ideal cutoffs for suggestions (upper right or 1,1)
    pr_distances = (precisions - 1) ** 2 + (recalls - 1) ** 2

    # To prevent the case where there are two points with the same minimum distance, return only the first
    # np.where returns a tuple (we want the first element in the first array)
    pr_index = np.where(pr_distances == np.min(pr_distances))[0][0]
    best_precision = precisions[pr_index]
    best_recall = recalls[pr_index]
    ideal_pr_cutoff = pr_thresholds[pr_index]

    return {'pr_auc': pr_auc,
            'best_pr_cutoff': ideal_pr_cutoff,
            'best_precision': best_precision,
            'best_recall': best_recall,
            'precisions': precisions,
            'recalls': recalls,
            'pr_thresholds': pr_thresholds}

pipeline.py 文件源码项目：syracuse_public 作者: dssg 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def plot_precision_recall_n(y_true, y_prob, model_name, pdf=None):
    y_score = y_prob
    precision_curve, recall_curve, pr_thresholds = precision_recall_curve(
        y_true, y_score)
    precision_curve = precision_curve[:-1]
    recall_curve = recall_curve[:-1]
    pct_above_per_thresh = []
    number_scored = len(y_score)
    for value in pr_thresholds:
        num_above_thresh = len(y_score[y_score >= value])
        pct_above_thresh = num_above_thresh / float(number_scored)
        pct_above_per_thresh.append(pct_above_thresh)
    pct_above_per_thresh = np.array(pct_above_per_thresh)
    plt.clf()
    fig, ax1 = plt.subplots()
    ax1.plot(pct_above_per_thresh, precision_curve, 'b')
    ax1.set_xlabel('percent of population')
    ax1.set_ylabel('precision', color='b')
    ax2 = ax1.twinx()
    ax2.plot(pct_above_per_thresh, recall_curve, 'r')
    ax2.set_ylabel('recall', color='r')

    name = model_name
    plt.title(name)
    if pdf:
        pdf.savefig()
        plt.close()
    else:
        plt.show()

threshold.py 文件源码项目：tartarus 作者: sergiooramas 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def get_threshold(model_id):
    trained_models = pd.read_csv(common.DEFAULT_TRAINED_MODELS_FILE, sep='\t')
    model_config = trained_models[trained_models["model_id"] == model_id]
    if model_config.empty:
        raise ValueError("Can't find the model %s in %s" %
                         (model_id, common.DEFAULT_TRAINED_MODELS_FILE))
    model_config = model_config.to_dict(orient="list")
    model_settings=eval(model_config['dataset_settings'][0])

    Y_test = np.load(common.DATASETS_DIR+'/item_factors_test_%s_%s_%s.npy' % (model_settings['fact'],model_settings['dim'],model_settings['dataset']))
    Y_pred = np.load(common.FACTORS_DIR+'/factors_%s.npy' % model_id)

    good_scores = Y_pred[Y_test==1]
    th = good_scores.mean()
    std = good_scores.std()
    print 'Mean th',th
    print 'Std',std

    p, r, thresholds = precision_recall_curve(Y_test.flatten(), Y_pred.flatten())
    f = np.nan_to_num((2 * (p*r) / (p+r)) * (p>r))
    print f
    max_f = np.argmax(f)
    fth = thresholds[max_f]
    print f[max_f],p[max_f],r[max_f]
    print 'F th %.2f' % fth
    plt.plot(r, p, 
             label='Precision-recall curve of class {0}')

    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Extension of Precision-Recall curve to multi-class')
    plt.savefig("pr_curve.png")