python类roc_curve()的实例源码

blend.py 文件源码 项目:melanoma-transfer 作者: learningtitans 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def calc_auc(y_pred_proba, labels, exp_run_folder, classifier, fold):

    auc = roc_auc_score(labels, y_pred_proba)
    fpr, tpr, thresholds = roc_curve(labels, y_pred_proba)
    curve_roc = np.array([fpr, tpr])
    dataile_id = open(exp_run_folder+'/data/roc_{}_{}.txt'.format(classifier, fold), 'w+')
    np.savetxt(dataile_id, curve_roc)
    dataile_id.close()
    plt.plot(fpr, tpr, label='ROC curve: AUC={0:0.2f}'.format(auc))
    plt.xlabel('1-Specificity')
    plt.ylabel('Sensitivity')
    plt.ylim([0.0, 1.05])
    plt.xlim([0.0, 1.0])
    plt.grid(True)
    plt.title('ROC Fold {}'.format(fold))
    plt.legend(loc="lower left")
    plt.savefig(exp_run_folder+'/data/roc_{}_{}.pdf'.format(classifier, fold), format='pdf')
    return auc
evaluation.py 文件源码 项目:fingerprint-securedrop 作者: freedomofpress 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def plot_ROC(test_labels, test_predictions):
    fpr, tpr, thresholds = metrics.roc_curve(
        test_labels, test_predictions, pos_label=1)
    auc = "%.2f" % metrics.auc(fpr, tpr)
    title = 'ROC Curve, AUC = '+str(auc)
    with plt.style.context(('ggplot')):
        fig, ax = plt.subplots()
        ax.plot(fpr, tpr, "#000099", label='ROC curve')
        ax.plot([0, 1], [0, 1], 'k--', label='Baseline')
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.legend(loc='lower right')
        plt.title(title)
    return fig
MTMKL.py 文件源码 项目:PersonalizedMultitaskLearning 作者: mitmedialab 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def getAUC(self,test_tasks):
        mean_tpr = 0.0
        mean_fpr = np.linspace(0, 1, 100)
        for t in range(self.n_tasks):
            X_t, Y_t = self.extractTaskData(self.train_tasks,t)
            X_test_t, Y_test_t = self.extractTaskData(test_tasks, t)

            overallKernel = self.constructKernelFunction(t)

            self.classifiers[t] = SVC(C=self.C, kernel=overallKernel, probability=True, max_iter=self.max_iter_internal, tol=self.tolerance)
            probas_ = self.classifiers[t].fit(X_t, Y_t).predict_proba(X_test_t)
            fpr, tpr, thresholds = roc_curve(Y_test_t, probas_[:, 1])

            mean_tpr += interp(mean_fpr, fpr, tpr)
            mean_tpr[0] = 0.0

        mean_tpr /= self.n_tasks
        mean_tpr[-1] = 1.0
        mean_auc = auc(mean_fpr, mean_tpr)

        return mean_auc, mean_fpr, mean_tpr
pleiopred_main.py 文件源码 项目:PleioPred 作者: yiminghu 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def pred_accuracy(y_true, y_pred):
    y_true = sp.copy(y_true)
    if len(sp.unique(y_true))==2:
        print 'dichotomous trait, calculating AUC'
        y_min = y_true.min()
        y_max = y_true.max()
        if y_min!= 0 or y_max!=1:
            y_true[y_true==y_min]=0
            y_true[y_true==y_max]=1
        fpr, tpr, thresholds = metrics.roc_curve(y_true, y_pred)
        auc = metrics.auc(fpr, tpr)
        return auc
    else:
        print 'continuous trait, calculating COR'
        cor = sp.corrcoef(y_true,y_pred)[0,1]
        return cor
noduleCADEvaluationLUNA16.py 文件源码 项目:cancer 作者: yancz1989 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def computeFROC(FROCGTList, FROCProbList, totalNumberOfImages, excludeList):
  # Remove excluded candidates
  FROCGTList_local = []
  FROCProbList_local = []
  for i in range(len(excludeList)):
    if excludeList[i] == False:
      FROCGTList_local.append(FROCGTList[i])
      FROCProbList_local.append(FROCProbList[i])

  numberOfDetectedLesions = sum(FROCGTList_local)
  totalNumberOfLesions = sum(FROCGTList)
  totalNumberOfCandidates = len(FROCProbList_local)
  fpr, tpr, thresholds = skl_metrics.roc_curve(FROCGTList_local, FROCProbList_local)
  if sum(FROCGTList) == len(FROCGTList): # Handle border case when there are no false positives and ROC analysis give nan values.
    print "WARNING, this system has no false positives.."
    fps = np.zeros(len(fpr))
  else:
    fps = fpr * (totalNumberOfCandidates - numberOfDetectedLesions) / totalNumberOfImages
  sens = (tpr * numberOfDetectedLesions) / totalNumberOfLesions
  return fps, sens, thresholds
generic_classifier.py 文件源码 项目:2020plus 作者: KarchinLab 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def _update_tsg_metrics(self, y_true, y_pred, prob):
        self.tsg_gene_pred = pd.Series(y_pred, self.y.index)
        self.tsg_gene_score = pd.Series(prob, self.y.index)

        # compute metrics for classification
        self.tsg_gene_count[self.num_pred] = sum(y_pred)
        prec, recall, fscore, support = metrics.precision_recall_fscore_support(y_true, y_pred)
        tsg_col = 1  # column for metrics relate to tsg
        self.tsg_precision[self.num_pred] = prec[tsg_col]
        self.tsg_recall[self.num_pred] = recall[tsg_col]
        self.tsg_f1_score[self.num_pred] = fscore[tsg_col]
        self.logger.debug('Tsg Iter %d: Precission=%s, Recall=%s, f1_score=%s' % (
                          self.num_pred + 1, str(prec), str(recall), str(fscore)))

        # compute ROC curve metrics
        fpr, tpr, thresholds = metrics.roc_curve(y_true, prob)
        self.tsg_tpr_array[self.num_pred, :] = interp(self.tsg_fpr_array, fpr, tpr)
        #self.tsg_tpr_array[0] = 0.0

        # compute Precision-Recall curve metrics
        p, r, thresh = metrics.precision_recall_curve(y_true, prob)
        p, r, thresh = p[::-1], r[::-1], thresh[::-1]  # reverse order of results
        self.tsg_precision_array[self.num_pred, :] = interp(self.tsg_recall_array, r, p)
ROC.py 文件源码 项目:SecuML 作者: ANSSI-FR 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def addFold(self, fold_id, true_labels, predicted_proba, predicted_scores):
        if len(true_labels) == 0:
            return
        if self.probabilist_model:
            scores = predicted_proba
        else:
            scores = predicted_scores
        fpr, tpr, thresholds = roc_curve(true_labels, scores)
        self.mean_tpr += interp(self.mean_fpr, fpr, tpr)
        self.thresholds = interp(self.mean_fpr, fpr, thresholds)
        self.mean_tpr[0] = 0.0
        self.thresholds[0] = 1.0
        self.thresholds[-1] = 0.0
        roc_auc = auc(fpr, tpr)
        if self.num_folds > 1:
            self.ax1.plot(fpr, tpr, lw = 1,
                    label = 'ROC fold %d (area = %0.2f)' % (fold_id, roc_auc))
        else:
            self.ax1.plot(fpr, tpr, lw = 3,
                    color = colors_tools.getLabelColor('all'),
                    label = 'ROC (area = %0.2f)' % (roc_auc))
noduleCADEvaluationLUNA16.py 文件源码 项目:luna16 作者: gzuidhof 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def computeFROC(FROCGTList, FROCProbList, totalNumberOfImages, excludeList):
    # Remove excluded candidates
    FROCGTList_local = []
    FROCProbList_local = []
    for i in range(len(excludeList)):
        if excludeList[i] == False:
            FROCGTList_local.append(FROCGTList[i])
            FROCProbList_local.append(FROCProbList[i])

    numberOfDetectedLesions = sum(FROCGTList_local)
    totalNumberOfLesions = sum(FROCGTList)
    totalNumberOfCandidates = len(FROCProbList_local)
    fpr, tpr, thresholds = skl_metrics.roc_curve(FROCGTList_local, FROCProbList_local)
    if sum(FROCGTList) == len(FROCGTList): # Handle border case when there are no false positives and ROC analysis give nan values.
      print "WARNING, this system has no false positives.."
      fps = np.zeros(len(fpr))
    else:
      fps = fpr * (totalNumberOfCandidates - numberOfDetectedLesions) / totalNumberOfImages
    sens = (tpr * numberOfDetectedLesions) / totalNumberOfLesions
    return fps, sens, thresholds
reporter.py 文件源码 项目:postlearn 作者: TomAugspurger 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def plot_roc_curve(y_true, y_score, ax=None):
    '''
    Plot the Receiving Operator Characteristic curved, including the
    Area under the Curve (AUC) score.

    Parameters
    ----------
    y_true : array
    y_score : array
    ax : matplotlib.axes, defaults to new axes

    Returns
    -------
    ax : matplotlib.axes
    '''
    ax = ax or plt.axes()
    auc = metrics.roc_auc_score(y_true, y_score)
    fpr, tpr, _ = metrics.roc_curve(y_true, y_score)
    ax.plot(fpr, tpr)
    ax.annotate('AUC: {:.2f}'.format(auc), (.8, .2))
    ax.plot([0, 1], [0, 1], linestyle='--', color='k')
    return ax
metrics.py 文件源码 项目:adaware-nlp 作者: mhw32 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def get_auc(outputs, probas):
    ''' AUC is a common metric for binary classification
        methods by comparing true & false positive rates

        Args
        ----
        outputs : numpy array
                 true outcomes (OxTxN)

        probas : numpy array
                 predicted probabilities (OxTxN)

        Returns
        -------
        auc : integer
    '''

    fpr, tpr, _ = roc_curve(outputs, probas[:, 1])
    return auc(fpr, tpr)
misc.py 文件源码 项目:mriqc 作者: poldracklab 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def plot_roc_curve(true_y, prob_y, out_file=None):
    from sklearn.metrics import roc_curve

    fpr, tpr, _ = roc_curve(true_y, prob_y)

    fig = plt.figure()
    plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve')
    plt.plot([0, 1], [0, 1], color='navy', lw=1, linestyle='--')
    plt.xlim([-0.025, 1.025])
    plt.ylim([-0.025, 1.025])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('RoC Curve')
    if out_file is not None:
        fig.savefig(out_file)
    return fig
asa.py 文件源码 项目:ar-embeddings 作者: iamaziz 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def plot_auc(self, estimator, estimator_name, neg, pos):
        try:
            classifier_probas = estimator.decision_function(self.X_test)
        except AttributeError:
            classifier_probas = estimator.predict_proba(self.X_test)[:, 1]

        false_positive_r, true_positive_r, thresholds = metrics.roc_curve(self.y_test, classifier_probas)
        roc_auc = metrics.auc(false_positive_r, true_positive_r)

        label = '{:.1f}% neg:{} pos:{} {}'.format(roc_auc * 100, neg, pos, estimator_name)
        plt.plot(false_positive_r, true_positive_r, label=label)
        plt.plot([0, 1], [0, 1], 'k--')
        plt.xlim([-0.05, 1.0])
        plt.ylim([0.0, 1.05])
        plt.title('ROC score(s)')
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.legend(loc='lower right', prop={'size': 10})
        plt.savefig("ROC.png", dpi=300, bbox_inches='tight')
        plt.grid()
chrom_hmm_cnn.py 文件源码 项目:dsde-deep-learning 作者: broadinstitute 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def get_fpr_tpr_roc(model, test_data, test_truth, labels):
    y_pred = model.predict(test_data, batch_size=32, verbose=0)

    # Compute ROC curve and ROC area for each class
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for k in labels.keys():
        cur_idx = labels[k]
        fpr[labels[k]], tpr[labels[k]], _ = roc_curve(test_truth[:,cur_idx], y_pred[:,cur_idx])
        roc_auc[labels[k]] = auc(fpr[labels[k]], tpr[labels[k]])

    # Compute micro-average ROC curve and ROC area
    fpr["micro"], tpr["micro"], _ = roc_curve(test_truth.ravel(), y_pred.ravel())
    roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
    return fpr, tpr, roc_auc
noduleCADEvaluationLUNA16.py 文件源码 项目:kaggle_dsb 作者: syagev 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def computeFROC(FROCGTList, FROCProbList, totalNumberOfImages, excludeList):
    # Remove excluded candidates
    FROCGTList_local = []
    FROCProbList_local = []
    for i in range(len(excludeList)):
        if excludeList[i] == False:
            FROCGTList_local.append(FROCGTList[i])
            FROCProbList_local.append(FROCProbList[i])

    numberOfDetectedLesions = sum(FROCGTList_local)
    totalNumberOfLesions = sum(FROCGTList)
    totalNumberOfCandidates = len(FROCProbList_local)
    fpr, tpr, thresholds = skl_metrics.roc_curve(FROCGTList_local, FROCProbList_local)
    if sum(FROCGTList) == len(FROCGTList): # Handle border case when there are no false positives and ROC analysis give nan values.
      print "WARNING, this system has no false positives.."
      fps = np.zeros(len(fpr))
    else:
      fps = fpr * (totalNumberOfCandidates - numberOfDetectedLesions) / totalNumberOfImages
    sens = (tpr * numberOfDetectedLesions) / totalNumberOfLesions
    return fps, sens, thresholds
test.py 文件源码 项目:EasyLSTM 作者: cullengao 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def test_all_metrics(model, data=None, usage_ratio=1):
    if data is None:
        X_train, y_train, X_test, y_test = read_data(usage_ratio=usage_ratio)
    else:
        # You ought to use the same training & testing set from your initial input.
        X_train, y_train, X_test, y_test = data

    y_pred = model.predict_classes(X_test)
    y_ground = np.argmax(y_test, axis=1)
    # y_proba = model.predict_proba(X_test)

    # overall_acc = (y_pred == y_ground).sum() * 1. / y_pred.shape[0]
    precision = sk.metrics.precision_score(y_ground, y_pred)
    recall = sk.metrics.recall_score(y_ground, y_pred)
    f1_score = sk.metrics.f1_score(y_ground, y_pred)
    # confusion_matrix = sk.metrics.confusion_matrix(y_ground, y_pred)
    # fpr, tpr, thresholds = sk.metrics.roc_curve(y_ground, y_pred)

    print "precision_score = ", precision
    print "recall_score = ", recall
    print "f1_score = ", f1_score

    # plot_roc_curve(y_test, y_proba)
    plot_confusion_matrix(y_ground, y_pred)
metrics.py 文件源码 项目:qtim_ROP 作者: QTIM-Lab 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def plot_ROC_by_class(y_true, y_pred, classes, ls='-'):

    print y_true.shape
    print y_pred.shape

    best_thresh = {}
    for class_name, c in classes.items():  # for each class

        # Compute ROC curve
        fpr, tpr, thresholds = roc_curve(y_true[:, c], y_pred[:, c])
        roc_auc = auc(fpr, tpr)

        # Plot ROC curve
        plt.plot(fpr, tpr, label='{}, AUC = {:.3f}'.format(class_name, roc_auc), linestyle=ls)

        # Calculate J statistic
        J = [j_statistic(y_true[:, c], y_pred[:, c], t) for t in thresholds]
        j_best = np.argmax(J)

        # Store best threshold for each class
        best_thresh[class_name] = J[j_best]

    return best_thresh
evaluate_ensemble.py 文件源码 项目:qtim_ROP 作者: QTIM-Lab 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def plot_roc_auc(predictions, ground_truth, name=''):

    # Calculate ROC curve
    y_pred = np.asarray(predictions).ravel()
    y_true = np.asarray(ground_truth).ravel()

    fpr, tpr, thresholds = roc_curve(y_true, y_pred)
    roc_auc = auc(fpr, tpr)

    # Plot
    plt.plot(fpr, tpr, label='{}, AUC = {:.3f}'.format(name, roc_auc))

    # # Return index of best model by J statistic
    # J = [j_statistic(y_true, y_pred, t) for t in thresholds]
    #
    # return thresholds[np.argmax(J)]  # TODO test this out!
model.py 文件源码 项目:fake-news-detection 作者: aldengolab 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def print_roc(self, y_true, y_scores, filename):
        '''
        Prints the ROC for this model.
        '''
        fpr, tpr, thresholds = metrics.roc_curve(y_true, y_scores)
        plt.figure()
        plt.plot(fpr, tpr, color='darkorange', label='ROC curve (area = %0.2f)' % self.roc_auc)
        plt.plot([0, 1], [0, 1], color='navy', linestyle='--')
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('Receiver operating characteristic')
        plt.legend(loc="lower right")
        plt.savefig(filename)
        plt.close()
utils.py 文件源码 项目:deepjets 作者: deepjets 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def default_inv_roc_curve(Y_true, var, sample_weight=None):
    """Default ROC curve for a single variable.

    Args:
        Y_true: array of true classes (n*2).
        var: array of variable values.
        sample_weight: array of sample weights.
    Returns:
        Array of (signal efficiency, 1/[background efficiency]) pairs.
    """
    fpr, tpr, _ = roc_curve(Y_true[:, 0], var, sample_weight=sample_weight)
    print("AUC: {0:.4f}".format(auc(fpr, tpr, reorder=True)))
    res = 1./len(Y_true)
    return np.array([[tp, 1./max(fp, res)]
                     for tp,fp in zip(tpr,fpr)
                     if fp > 0.])
tutorial_helpers.py 文件源码 项目:ml_sampler 作者: facebookincubator 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def plot_roc(y_test, y_pred, label=''):
    """Compute ROC curve and ROC area"""

    fpr, tpr, _ = roc_curve(y_test, y_pred)
    roc_auc = auc(fpr, tpr)

    # Plot of a ROC curve for a specific class
    plt.figure()
    plt.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % roc_auc)
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver operating characteristic' + label)
    plt.legend(loc="lower right")
    plt.show()
util.py 文件源码 项目:detecting-adversarial-samples 作者: rfeinman 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def compute_roc(probs_neg, probs_pos, plot=False):
    """
    TODO
    :param probs_neg:
    :param probs_pos:
    :param plot:
    :return:
    """
    probs = np.concatenate((probs_neg, probs_pos))
    labels = np.concatenate((np.zeros_like(probs_neg), np.ones_like(probs_pos)))
    fpr, tpr, _ = roc_curve(labels, probs)
    auc_score = auc(fpr, tpr)
    if plot:
        plt.figure(figsize=(7, 6))
        plt.plot(fpr, tpr, color='blue',
                 label='ROC (AUC = %0.4f)' % auc_score)
        plt.legend(loc='lower right')
        plt.title("ROC Curve")
        plt.xlabel("FPR")
        plt.ylabel("TPR")
        plt.show()

    return fpr, tpr, auc_score
MLNPCapstone.py 文件源码 项目:machine-learning-nanodegree-program-capstone 作者: harrylippy 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def plot_roc(self):

        for learner, clf in self._clf.iteritems():
            # Make the predictions 
            (X_test, y_test) = self._test_data 
            y_pred = clf.predict(X_test)

            # Get (f)alse (p)ositive (r)ate, (t)rue (p)ositive (r)ate
            fpr, tpr, _ = roc_curve(y_test, y_pred)

            # Add this classifier's results to the plot
            plt.plot(fpr, tpr, label='%s (area = %0.2f)'\
                % (learner, auc(fpr, tpr)))

        # Now do the plot
        # NOTE:  plot code stolen from scikit-learn docs (http://bit.ly/236k6M3)
        plt.xlim([-0.05, 1.05])
        plt.ylim([-0.05, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('Receiver Operating Characteristic (ROC)')
        plt.legend(loc="lower right")
        plt.show()
classify.py 文件源码 项目:hco-experiments 作者: zooniverse 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def print_misclassified(y, pred, files, fom_func, threshold):

    #fpr, tpr, thresholds = roc_curve(y, pred)

    #fom = 0.01

    #FoMs.append(1-tpr[np.where(fpr<=FPR)[0][-1]])
    #FoM, threshold, fpr, tpr = fom_func(y, pred, fom)
    negatives = np.where(y==0)
    positives = np.where(y==1)

    falsePositives = files[negatives][np.where(pred[negatives]>threshold)]

    print "[+] False positives (%d):" % len(falsePositives)
    for i,falsePositive in enumerate(falsePositives):
        print "\t " + str(falsePositive), pred[negatives][np.where(pred[negatives]>threshold)][i]
    print
    missedDetections = files[positives][np.where(pred[positives]<=threshold)]
    print "[+] Missed Detections (%d):" % len(missedDetections)
    for i,missedDetection in enumerate(missedDetections):
        print "\t " + str(missedDetection), pred[positives][np.where(pred[positives]<=threshold)][i]
    print
models_siamese.py 文件源码 项目:gcn_metric_learning 作者: sk1712 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def evaluate(self, data, labels, site, sess=None):
        """
        Runs one evaluation against the full epoch of data.
        Return the precision and the number of correct predictions.
        Batch evaluation saves memory and enables this to run on smaller GPUs.

        sess: the session in which the model has been trained.
        op: the Tensor that returns the number of correct predictions.
        data: size N x M
            N: number of signals (samples)
            M: number of vertices (features)
        labels: size N
            N: number of signals (samples)
        """
        t_process, t_wall = time.process_time(), time.time()
        scores, loss = self.predict(data, labels, site, sess)

        fpr, tpr, _ = roc_curve(labels, scores)
        roc_auc = auc(fpr, tpr)

        string = 'samples: {:d}, AUC : {:.2f}, loss: {:.4e}'.format(len(labels), roc_auc, loss)

        if sess is None:
            string += '\ntime: {:.0f}s (wall {:.0f}s)'.format(time.process_time() - t_process, time.time() - t_wall)
        return string, roc_auc, loss, scores
insights.py 文件源码 项目:menrva 作者: amirziai 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def clf_scores(clf, x_train, y_train, x_test, y_test):
    info = dict()

    # TODO: extend this to a confusion matrix per fold for more flexibility downstream (tuning)
    # TODO: calculate a set of ROC curves per fold instead of running it on test, currently introducing bias
    scores = cross_val_score(clf, x_train, y_train, cv=cv, n_jobs=-1)
    runtime = time()
    clf.fit(x_train, y_train)
    runtime = time() - runtime
    y_test_predicted = clf.predict(x_test)
    info['runtime'] = runtime
    info['accuracy'] = min(scores)
    info['accuracy_test'] = accuracy_score(y_test, y_test_predicted)
    info['accuracy_folds'] = scores
    info['confusion_matrix'] = confusion_matrix(y_test, y_test_predicted)
    clf.fit(x_train, y_train)
    fpr, tpr, _ = roc_curve(y_test, clf_predict_proba(clf, x_test))
    info['fpr'] = fpr
    info['tpr'] = tpr
    info['auc'] = auc(fpr, tpr)

    return info
test_ranking.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def test_roc_returns_consistency():
    # Test whether the returned threshold matches up with tpr
    # make small toy dataset
    y_true, _, probas_pred = make_prediction(binary=True)
    fpr, tpr, thresholds = roc_curve(y_true, probas_pred)

    # use the given thresholds to determine the tpr
    tpr_correct = []
    for t in thresholds:
        tp = np.sum((probas_pred >= t) & y_true)
        p = np.sum(y_true)
        tpr_correct.append(1.0 * tp / p)

    # compare tpr and tpr_correct to see if the thresholds' order was correct
    assert_array_almost_equal(tpr, tpr_correct, decimal=2)
    assert_equal(fpr.shape, tpr.shape)
    assert_equal(fpr.shape, thresholds.shape)
test_ranking.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def test_roc_nonrepeating_thresholds():
    # Test to ensure that we don't return spurious repeating thresholds.
    # Duplicated thresholds can arise due to machine precision issues.
    dataset = datasets.load_digits()
    X = dataset['data']
    y = dataset['target']

    # This random forest classifier can only return probabilities
    # significant to two decimal places
    clf = ensemble.RandomForestClassifier(n_estimators=100, random_state=0)

    # How well can the classifier predict whether a digit is less than 5?
    # This task contributes floating point roundoff errors to the probabilities
    train, test = slice(None, None, 2), slice(1, None, 2)
    probas_pred = clf.fit(X[train], y[train]).predict_proba(X[test])
    y_score = probas_pred[:, :5].sum(axis=1)  # roundoff errors begin here
    y_true = [yy < 5 for yy in y[test]]

    # Check for repeating values in the thresholds
    fpr, tpr, thresholds = roc_curve(y_true, y_score, drop_intermediate=False)
    assert_equal(thresholds.size, np.unique(np.round(thresholds, 2)).size)
test_ranking.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def test_roc_curve_one_label():
    y_true = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
    y_pred = [0, 1, 0, 1, 0, 1, 0, 1, 0, 1]
    # assert there are warnings
    w = UndefinedMetricWarning
    fpr, tpr, thresholds = assert_warns(w, roc_curve, y_true, y_pred)
    # all true labels, all fpr should be nan
    assert_array_equal(fpr,
                       np.nan * np.ones(len(thresholds)))
    assert_equal(fpr.shape, tpr.shape)
    assert_equal(fpr.shape, thresholds.shape)

    # assert there are warnings
    fpr, tpr, thresholds = assert_warns(w, roc_curve,
                                        [1 - x for x in y_true],
                                        y_pred)
    # all negative labels, all tpr should be nan
    assert_array_equal(tpr,
                       np.nan * np.ones(len(thresholds)))
    assert_equal(fpr.shape, tpr.shape)
    assert_equal(fpr.shape, thresholds.shape)
utils.py 文件源码 项目:drnns-prediction 作者: jvpoulos 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def plot_ROC(actual, predictions):
    # plot the FPR vs TPR and AUC for a two class problem (0,1)
    import matplotlib.pyplot as plt
    from sklearn.metrics import roc_curve, auc

    false_positive_rate, true_positive_rate, thresholds = roc_curve(actual, predictions)
    roc_auc = auc(false_positive_rate, true_positive_rate)

    plt.title('Receiver Operating Characteristic')
    plt.plot(false_positive_rate, true_positive_rate, 'b',
    label='AUC = %0.2f'% roc_auc)
    plt.legend(loc='lower right')
    plt.plot([0,1],[0,1],'r--')
    plt.xlim([-0.1,1.2])
    plt.ylim([-0.1,1.2])
    plt.ylabel('True Positive Rate')
    plt.xlabel('False Positive Rate')
    plt.show()
metrics.py 文件源码 项目:data-preppy 作者: gurgeh 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def metric(model, test_csv, fname):
    X, Y_true, headers = get_XY(test_csv)
    Y_pred = model.predict(X)
    try:
        print confusion_matrix(Y_true, [a[0] > 0.5 for a in Y_pred])
    except IndexError:
        print confusion_matrix(Y_true, [a > 0.5 for a in Y_pred])

    fpr, tpr, _ = roc_curve(Y_true, Y_pred)
    roc_auc = roc_auc_score(Y_true, Y_pred)

    plt.figure()
    plt.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % roc_auc)
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC - %s' % fname.split('/')[-1])
    plt.legend(loc="lower right")
    plt.show()
    plt.savefig(fname + ' - roc.png')
    return plt


问题


面经


文章

微信
公众号

扫码关注公众号