python类roc_auc_score()的实例源码-面圈网

blend.py 文件源码项目：melanoma-transfer 作者: learningtitans 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def calc_auc(y_pred_proba, labels, exp_run_folder, classifier, fold):

    auc = roc_auc_score(labels, y_pred_proba)
    fpr, tpr, thresholds = roc_curve(labels, y_pred_proba)
    curve_roc = np.array([fpr, tpr])
    dataile_id = open(exp_run_folder+'/data/roc_{}_{}.txt'.format(classifier, fold), 'w+')
    np.savetxt(dataile_id, curve_roc)
    dataile_id.close()
    plt.plot(fpr, tpr, label='ROC curve: AUC={0:0.2f}'.format(auc))
    plt.xlabel('1-Specificity')
    plt.ylabel('Sensitivity')
    plt.ylim([0.0, 1.05])
    plt.xlim([0.0, 1.0])
    plt.grid(True)
    plt.title('ROC Fold {}'.format(fold))
    plt.legend(loc="lower left")
    plt.savefig(exp_run_folder+'/data/roc_{}_{}.pdf'.format(classifier, fold), format='pdf')
    return auc

classifier_tf.py 文件源码项目：human-rl 作者: gsastry 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def classification_metrics(y, y_pred, threshold):
    metrics = {}
    metrics['threshold'] = threshold_from_predictions(y, y_pred, 0)
    metrics['np.std(y_pred)'] = np.std(y_pred)
    metrics['positive_frac_batch'] = float(np.count_nonzero(y == True)) / len(y)
    denom = np.count_nonzero(y == False)
    num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
    if denom > 0:
        metrics['fpr'] = float(num) / float(denom)
    if any(y) and not all(y):
        metrics['auc'] = roc_auc_score(y, y_pred)
        y_pred_bool = y_pred >= threshold
        if (any(y_pred_bool) and not all(y_pred_bool)):
            metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
            metrics['recall'] = recall_score(y, y_pred_bool)
    return metrics

fresult.py 文件源码项目：aed-by-cnn 作者: tweihaha 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def auc_score(res_list):
    gp_list = np.array([])
    anno_list = np.array([])
    for res in res_list:
        g_pred = res.g_pred
        anno = res.annotation
        if g_pred.shape[-1] < anno.shape[-1]:
            anno = np.delete(anno, range(g_pred.shape[-1], anno.shape[-1]), axis=-1)
        elif g_pred.shape[-1] > anno.shape[-1]:
            g_pred = np.delete(g_pred, range(anno.shape[-1], g_pred.shape[-1]), axis=-1)
        gp_list = g_pred.T if len(gp_list) == 0 else np.append(gp_list, g_pred.T, axis=0)
        anno_list = anno.T if len(anno_list) == 0 else np.append(anno_list, anno.T, axis=0)

    assert(gp_list.shape == anno_list.shape)

    from sklearn.metrics import roc_auc_score
    class_auc = roc_auc_score(anno_list, gp_list, average=None)
    print('AUC of Classes:')
    print(class_auc)
    all_micro_auc = roc_auc_score(anno_list, gp_list, average='micro')
    print('Total micro AUC: {}'.format(all_micro_auc))

    all_macro_auc = roc_auc_score(anno_list, gp_list, average='macro')
    print('Total macro AUC: {}'.format(all_macro_auc))

classifier_utils.py 文件源码项目：human-rl 作者: gsastry 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def metrics(self, X, y):
        metrics = {}
        y_pred_pair, loss = self.predict_proba_with_loss(X, y)
        y_pred = y_pred_pair[:,1]  ## From softmax pair to prob of catastrophe

        metrics['loss'] = loss
        threshold = self.threshold_from_data(X, y)
        metrics['threshold'] = threshold
        metrics['np.std(y_pred)'] = np.std(y_pred)
        denom = np.count_nonzero(y == False)
        num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
        metrics['fpr'] = float(num) / float(denom)
        if any(y) and not all(y):
            metrics['auc'] = roc_auc_score(y, y_pred)
            y_pred_bool = y_pred >= threshold
            if (any(y_pred_bool) and not all(y_pred_bool)):
                metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
                metrics['recall'] = recall_score(y, y_pred_bool)

        return metrics

classifier_tf.py 文件源码项目：human-rl 作者: gsastry 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def classification_metrics(y, y_pred, threshold):
    metrics = {}
    metrics['threshold'] = threshold_from_predictions(y, y_pred, 0)
    metrics['np.std(y_pred)'] = np.std(y_pred)
    metrics['positive_frac_batch'] = float(np.count_nonzero(y == True)) / len(y)
    denom = np.count_nonzero(y == False)
    num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
    if denom > 0:
        metrics['fpr'] = float(num) / float(denom)
    if any(y) and not all(y):
        metrics['auc'] = roc_auc_score(y, y_pred)
        y_pred_bool = y_pred >= threshold
        if (any(y_pred_bool) and not all(y_pred_bool)):
            metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
            metrics['recall'] = recall_score(y, y_pred_bool)
    return metrics

classifier_utils.py 文件源码项目：human-rl 作者: gsastry 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def metrics(self, X, y):
        metrics = {}
        y_pred_pair, loss = self.predict_proba_with_loss(X, y)
        y_pred = y_pred_pair[:,1]  ## From softmax pair to prob of catastrophe

        metrics['loss'] = loss
        threshold = self.threshold_from_data(X, y)
        metrics['threshold'] = threshold
        metrics['np.std(y_pred)'] = np.std(y_pred)
        denom = np.count_nonzero(y == False)
        num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
        metrics['fpr'] = float(num) / float(denom)
        if any(y) and not all(y):
            metrics['auc'] = roc_auc_score(y, y_pred)
            y_pred_bool = y_pred >= threshold
            if (any(y_pred_bool) and not all(y_pred_bool)):
                metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
                metrics['recall'] = recall_score(y, y_pred_bool)

        return metrics

classifier_utils.py 文件源码项目：human-rl 作者: gsastry 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def metrics(self, X, y):
        metrics = {}
        y_pred_pair, loss = self.predict_proba_with_loss(X, y)
        y_pred = y_pred_pair[:,1]  ## From softmax pair to prob of catastrophe

        metrics['loss'] = loss
        threshold = self.threshold_from_data(X, y)
        metrics['threshold'] = threshold
        metrics['np.std(y_pred)'] = np.std(y_pred)
        denom = np.count_nonzero(y == False)
        num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
        metrics['fpr'] = float(num) / float(denom)
        if any(y) and not all(y):
            metrics['auc'] = roc_auc_score(y, y_pred)
            y_pred_bool = y_pred >= threshold
            if (any(y_pred_bool) and not all(y_pred_bool)):
                metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
                metrics['recall'] = recall_score(y, y_pred_bool)

        return metrics

classifier_utils.py 文件源码项目：human-rl 作者: gsastry 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def metrics(self, X, y):
        metrics = {}
        y_pred_pair, loss = self.predict_proba_with_loss(X, y)
        y_pred = y_pred_pair[:,1]  ## From softmax pair to prob of catastrophe

        metrics['loss'] = loss
        threshold = self.threshold_from_data(X, y)
        metrics['threshold'] = threshold
        metrics['np.std(y_pred)'] = np.std(y_pred)
        denom = np.count_nonzero(y == False)
        num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
        metrics['fpr'] = float(num) / float(denom)
        if any(y) and not all(y):
            metrics['auc'] = roc_auc_score(y, y_pred)
            y_pred_bool = y_pred >= threshold
            if (any(y_pred_bool) and not all(y_pred_bool)):
                metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
                metrics['recall'] = recall_score(y, y_pred_bool)

        return metrics

classifier_tf.py 文件源码项目：human-rl 作者: gsastry 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def classification_metrics(y, y_pred, threshold):
    metrics = {}
    metrics['threshold'] = threshold_from_predictions(y, y_pred, 0)
    metrics['np.std(y_pred)'] = np.std(y_pred)
    metrics['positive_frac_batch'] = float(np.count_nonzero(y == True)) / len(y)
    denom = np.count_nonzero(y == False)
    num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
    if denom > 0:
        metrics['fpr'] = float(num) / float(denom)
    if any(y) and not all(y):
        metrics['auc'] = roc_auc_score(y, y_pred)
        y_pred_bool = y_pred >= threshold
        if (any(y_pred_bool) and not all(y_pred_bool)):
            metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
            metrics['recall'] = recall_score(y, y_pred_bool)
    return metrics

classifier_utils.py 文件源码项目：human-rl 作者: gsastry 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def metrics(self, X, y):
        metrics = {}
        y_pred_pair, loss = self.predict_proba_with_loss(X, y)
        y_pred = y_pred_pair[:,1]  ## From softmax pair to prob of catastrophe

        metrics['loss'] = loss
        threshold = self.threshold_from_data(X, y)
        metrics['threshold'] = threshold
        metrics['np.std(y_pred)'] = np.std(y_pred)
        denom = np.count_nonzero(y == False)
        num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
        metrics['fpr'] = float(num) / float(denom)
        if any(y) and not all(y):
            metrics['auc'] = roc_auc_score(y, y_pred)
            y_pred_bool = y_pred >= threshold
            if (any(y_pred_bool) and not all(y_pred_bool)):
                metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
                metrics['recall'] = recall_score(y, y_pred_bool)

        return metrics

classifier_tf.py 文件源码项目：human-rl 作者: gsastry 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def classification_metrics(y, y_pred, threshold):
    metrics = {}
    metrics['threshold'] = threshold_from_predictions(y, y_pred, 0)
    metrics['np.std(y_pred)'] = np.std(y_pred)
    metrics['positive_frac_batch'] = float(np.count_nonzero(y == True)) / len(y)
    denom = np.count_nonzero(y == False)
    num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
    if denom > 0:
        metrics['fpr'] = float(num) / float(denom)
    if any(y) and not all(y):
        metrics['auc'] = roc_auc_score(y, y_pred)
        y_pred_bool = y_pred >= threshold
        if (any(y_pred_bool) and not all(y_pred_bool)):
            metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
            metrics['recall'] = recall_score(y, y_pred_bool)
    return metrics

model.py 文件源码项目：ProtScan 作者: gianlucacorrado 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def score(self, profiles, bin_sites):
        """Compute AUC ROC from predictions."""
        app_profiles = list()
        app_true_vals = list()
        for k, profile in profiles.iteritems():
            app_profiles.append(profile)
            true_vals = np.zeros(len(profile))
            bins = bin_sites.get(k, False)
            if bins is not False:
                for s, e, _ in bins:
                    true_vals[s:e] = 1
            app_true_vals.append(true_vals)
        vec_profiles = np.concatenate(app_profiles)
        vec_true_vals = np.concatenate(app_true_vals)
        roc_auc = roc_auc_score(vec_true_vals, vec_profiles)
        return roc_auc

HBLRWrapper.py 文件源码项目：PersonalizedMultitaskLearning 作者: mitmedialab 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def getAccuracyAucOnAllTasks(self, task_list):
        all_task_Y = []
        all_preds = []
        for i in range(len(task_list)):
            preds, task_Y = self.getPredsTrueOnOneTask(task_list,i)
            if preds is None:
                # Skipping task because it does not have valid data
                continue
            if len(task_Y)>0:
                all_task_Y.extend(task_Y)
                all_preds.extend(preds)
        if not helper.containsEachLabelType(all_preds):
            print "for some bizarre reason, the preds for all tasks are the same class"
            print "preds", all_preds
            print "true_y", all_task_Y
            auc = np.nan
        else:
            auc=roc_auc_score(all_task_Y, all_preds)
        acc=hblr.getBinaryAccuracy(all_preds,all_task_Y)
        return acc,auc

MTMKL.py 文件源码项目：PersonalizedMultitaskLearning 作者: mitmedialab 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def getAccuracyAucOnOneTask(self, task_list, task, debug=False):
        X_t, y_t = self.extractTaskData(task_list,task)
        if len(X_t) == 0:
            return np.nan, np.nan

        preds = self.internal_predict(X_t, int(task))

        if debug:
            print "y_t:", y_t
            print "preds:", preds

        acc = helper.getBinaryAccuracy(preds,y_t)
        if len(y_t) > 1 and helper.containsEachSVMLabelType(y_t) and helper.containsEachSVMLabelType(preds):
            auc = roc_auc_score(y_t, preds)
        else:
            auc = np.nan

        return acc, auc

roc_auc.py 文件源码项目：deep-mil-for-whole-mammogram-classification 作者: wentaozhu 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def on_epoch_end(self, epoch, logs={}):
    if epoch % self.interval == 0:
      y_pred = self.model.predict(self.X_val, verbose=0)
      #print(np.sum(y_pred[:,1]))
      #y_true = np.argmax(self.y_val, axis=1)
      #y_pred = np.argmax(y_pred, axis=1)
      #print(y_true.shape, y_pred.shape)
      if self.mymil:
        score = roc_auc_score(self.y_val.max(axis=1), y_pred.max(axis=1))  
      else: score = roc_auc_score(self.y_val[:,1], y_pred[:,1])
      print("interval evaluation - epoch: {:d} - auc: {:.2f}".format(epoch, score))
      if score > self.auc:
        self.auc = score
        for f in os.listdir('./'):
          if f.startswith(self.filepath+'auc'):
            os.remove(f)
        self.model.save(self.filepath+'auc'+str(score)+'ep'+str(epoch)+'.hdf5')

roc_auc.py 文件源码项目：deep-mil-for-whole-mammogram-classification 作者: wentaozhu 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def perform(self, node, inputs, output_storage):
        """
        Calculate ROC AUC score.

        Parameters
        ----------
        node : Apply instance
            Symbolic inputs and outputs.
        inputs : list
            Sequence of inputs.
        output_storage : list
            List of mutable 1-element lists.
        """
        if roc_auc_score is None:
            raise RuntimeError("Could not import from sklearn.")
        y_true, y_score = inputs
        try:
            roc_auc = roc_auc_score(y_true, y_score)
        except ValueError:
            roc_auc = np.nan
        #rvalue = np.array((roc_auc, prec, reca, f1))
        #[0][0]
        output_storage[0][0] = theano._asarray(roc_auc, dtype=config.floatX)

optimizers_test.py 文件源码项目：stacker 作者: bamine 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def setUp(self):
        os.putenv("KMP_DUPLICATE_LIB_OK", "TRUE")
        self.X_class, self.y_class = datasets.make_classification(random_state=42)
        self.X_reg, self.y_reg = datasets.make_regression(random_state=42)
        self.classification_optimizers = [XGBoostOptimizer, RandomForestOptimizer]
        self.regression_optimizers = [XGBoostOptimizer, RandomForestOptimizer]
        self.class_scorer = Scorer("auc_error", lambda y_pred, y_true: 1 - metrics.roc_auc_score(y_pred, y_true))
        self.reg_scorer = Scorer("mse", metrics.mean_squared_error)

        self.classification_task_split = \
            Task("class_split", self.X_class, self.y_class, "classification", test_size=0.1, random_state=42)
        self.regression_task_split = \
            Task("reg_split", self.X_class, self.y_class, "regression", test_size=0.1, random_state=42)

        self.classification_task_cv = \
            Task("class_cv", self.X_reg, self.y_reg, "classification", cv=5, random_state=42)
        self.regression_task_cv = \
            Task("reg_cv", self.X_reg, self.y_reg, "regression", cv=5, random_state=42)

posterior.py 文件源码项目：mitre 作者: gerberlab 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def classifier_accuracy_report(self, prediction_vector, threshold=0.5):
        """ Determine AUC and other metrics, write report.

        prediction_vector: vector of booleans (or outcome
        probabilities) of length n_subjects,
        e.g. self.point_predictions, self.ensemble_probabilities()...
        If this has dtype other than bool, prediction_vector > threshold
        is used for the confusion matrix.

        Returns: one string (multiple lines joined with \n, including
        trailing newline) containing a formatted report.

        """
        auc = roc_auc_score(self.model.data.y.astype(float), prediction_vector.astype(float))
        if not (prediction_vector.dtype == np.bool):
            prediction_vector = prediction_vector >= threshold
        conf = confusion_matrix(self.model.data.y, prediction_vector)

        lines = ['AUC: %.3f' % auc,
                 'Confusion matrix: \n\t%s' % str(conf).replace('\n','\n\t')]
        return '\n'.join(lines) + '\n'


    ######################################## 
    # BAYES-FACTOR-BASED METHODS

score_eval.py 文件源码项目：score-zeroshot 作者: pedro-morgado 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def eval_semantics(scores, gt, args):
    from sklearn.metrics import roc_auc_score
    num_semantics = gt.shape[1]
    acc, auc = np.nan*np.zeros((num_semantics,)), np.nan*np.zeros((num_semantics,))
    if args.semantics == ATTRIBUTES:
        for s, (pred, lbl) in enumerate(zip(scores.T, gt.T)):
            acc[s] = (pred*(lbl-0.5) > 0).astype(float).mean()
            if sum(lbl == 0) > 0 and sum(lbl == 1) > 0:
                auc[s] = roc_auc_score(lbl, pred)

    else:
        for s, (pred, lbl) in enumerate(zip(scores, gt.T)):
            acc[s] = (pred.argmax(axis=1) == lbl).astype(float).mean()
            onehot = np.zeros(pred.shape)
            for i, l in enumerate(lbl):
                onehot[i, int(l)] = 1
            if (onehot.sum(axis=0) == 0).sum() == 0:
                auc[s] = roc_auc_score(onehot, pred)
    return acc, auc

lr_mcpc.py 文件源码项目：rtb-unbiased-learning 作者: wnzhang 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def test():
    y = []
    yp = []
    fi = open(sys.argv[1], 'r')
    for line in fi:
        data = ints(line.replace(":1", "").split())
        clk = data[1]
        mp = data[2]
        fsid = 3 # feature start id
        pred = 0.0
        for i in range(fsid, len(data)):
            feat = data[i]
            if feat in featWeight:
                pred += featWeight[feat]
        pred = sigmoid(pred)
        y.append(clk)
        yp.append(pred)
    fi.close()
    auc = roc_auc_score(y, yp)
    rmse = math.sqrt(mean_squared_error(y, yp))
    print str(round) + '\t' + str(auc) + '\t' + str(rmse)

learning.py 文件源码项目：deep-action-proposals 作者: escorciav 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def report_metrics(y_dset, y_pred, batch_size, dset='Val'):
    # Print additional metrics involving predictions
    n_rows = (y_dset.shape[0] / batch_size) * batch_size
    y_true = y_dset[0:n_rows, :].flatten()
    y_pred = y_pred.flatten()

    val_ap = average_precision_score(y_true, y_pred)
    val_roc = roc_auc_score(y_true, y_pred)

    n = y_true.size
    n_pos = y_true.sum()
    idx_sorted = np.argsort(-y_pred)
    val_rec = []

    logging.info(dset + "-AP {:.6f}".format(val_ap))
    logging.info(dset + "-ROC {:.6f}".format(val_roc))
    for i, v in enumerate([10, 25, 50, 75, 100]):
        tp = y_true[idx_sorted[:int(v * n / 100)]].sum()
        val_rec.append(tp * 1.0 / n_pos)
        logging.info(dset + "-R{} {:.6f}".format(v, val_rec[i]))
    return val_ap, val_rec[2]


# ############################## Main program #################################

reporter.py 文件源码项目：postlearn 作者: TomAugspurger 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def plot_roc_curve(y_true, y_score, ax=None):
    '''
    Plot the Receiving Operator Characteristic curved, including the
    Area under the Curve (AUC) score.

    Parameters
    ----------
    y_true : array
    y_score : array
    ax : matplotlib.axes, defaults to new axes

    Returns
    -------
    ax : matplotlib.axes
    '''
    ax = ax or plt.axes()
    auc = metrics.roc_auc_score(y_true, y_score)
    fpr, tpr, _ = metrics.roc_curve(y_true, y_score)
    ax.plot(fpr, tpr)
    ax.annotate('AUC: {:.2f}'.format(auc), (.8, .2))
    ax.plot([0, 1], [0, 1], linestyle='--', color='k')
    return ax

CCIT.py 文件源码项目：CCIT 作者: rajatsen91 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def cross_validate(classifier, n_folds = 5):
    '''Custom cross-validation module I always use '''
    train_X = classifier['train_X']
    train_y = classifier['train_y']
    model = classifier['model']
    score = 0.0

    skf = KFold(n_splits = n_folds)
    for train_index, test_index in skf.split(train_X):
        X_train, X_test = train_X[train_index], train_X[test_index]
        y_train, y_test = train_y[train_index], train_y[test_index]
        clf = model.fit(X_train,y_train)
        pred = clf.predict_proba(X_test)[:,1]
        #print 'cross', roc_auc_score(y_test,pred)
        score = score + roc_auc_score(y_test,pred)

    return score/n_folds

mainPEP.py 文件源码项目：PEP 作者: ma-compbio 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def analyzeResult_temp(data,model,DataVecs):
    predict = model.predict(DataVecs)
    data['predict'] = predict
    print ("Accuracy: %f %%" % (100. * sum(data["label"] == data["predict"]) / len(data["label"])))
    answer1 = data[data["label"] == 1]
    answer2 = data[data["label"] == 0]
    print ("Positive Accuracy: %f %%" % (100. * sum(answer1["label"] == answer1["predict"]) / len(answer1["label"])))
    print ("Negative Accuracy: %f %%" % (100. * sum(answer2["label"] == answer2["predict"]) / len(answer2["label"])))
    try:
        result_auc = model.predict_proba(DataVecs)
        print ("Roc:%f\nAUPR:%f\n" % (roc_auc_score(data["label"],result_auc[:,1]),
            average_precision_score(data["label"],result_auc[:,1])))
        print("Precision:%f\nRecall:%f\nF1score:%f\nMCC:%f\n" %(precision_score(data["label"],data["predict"]),
            recall_score(data["label"],data["predict"]),
            f1_score(data["label"],data["predict"]),
            matthews_corrcoef(data["label"],data["predict"])))
    except:
        print "ROC unavailable"

# Performance evaluation and result analysis uing adjusted thresholds

mainPEP.py 文件源码项目：PEP 作者: ma-compbio 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def analyzeResult(data,model,DataVecs,threshold):
    predict = model.predict_proba(DataVecs)[:,1]
    True,False=1,0
    data['predict'] = (predict > threshold)
    print ("Accuracy: %f %%" % (100. * sum(data["label"] == data["predict"]) / len(data["label"])))
    answer1 = data[data["label"] == 1]
    answer2 = data[data["label"] == 0]
    print ("Positive Accuracy: %f %%" % (100. * sum(answer1["label"] == answer1["predict"]) / len(answer1["label"])))
    print ("Negative Accuracy: %f %%" % (100. * sum(answer2["label"] == answer2["predict"]) / len(answer2["label"])))
    try:
        result_auc = model.predict_proba(DataVecs)
        print ("Roc:%f\nAUPR:%f\n" % (roc_auc_score(data["label"],result_auc[:,1]),
            average_precision_score(data["label"],result_auc[:,1])))
        print("Precision:%f\nRecall:%f\nF1score:%f\nMCC:%f\n" %(precision_score(data["label"],data["predict"]),
            recall_score(data["label"],data["predict"]),
            f1_score(data["label"],data["predict"]),
            matthews_corrcoef(data["label"],data["predict"])))
    except:
        print "ROC unavailable"

# Performance evaluation

__init__.py 文件源码项目：pumil 作者: levelfour 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def print_evaluation_result(clf, bags_test, args):
  pred_score = np.array([clf(B.data()) for B in bags_test])
  pred_label = np.array([1 if score >= 0 else -1 for score in pred_score])
  true_label = np.array([B.y for B in bags_test])
  a = accuracy (pred_label, true_label)  # accuracy
  p = precision(pred_label, true_label)  # precision
  r = recall   (pred_label, true_label)  # recall
  f = f_score  (pred_label, true_label)  # F-score
  auc = metrics.roc_auc_score((true_label+1)/2, pred_score)

  if not args.aucplot:
    sys.stdout.write("""# accuracy,precision,recall,f-score,ROC-AUC
{:.3f},{:.3f},{:.3f},{:.3f},{:.3f}\n""".format(a, p, r, f, auc))
    sys.stdout.flush()

  else:
    sys.stdout.write("""# accuracy,precision,recall,f-score,ROC-AUC
# {:.3f},{:.3f},{:.3f},{:.3f},{:.3f}\n""".format(a, p, r, f, auc))
    sys.stdout.flush()
    np.savetxt(sys.stdout.buffer, np.c_[pred_score, true_label])

s12_run_xgboost_only_train_create.py 文件源码项目：KAGGLE_AVITO_2016 作者: ZFTurbo 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def run_train_with_model(train, features, model_path):
    start_time = time.time()

    gbm = xgb.Booster()
    gbm.load_model(model_path)

    print("Validating...")
    check = gbm.predict(xgb.DMatrix(train[features]))
    score = roc_auc_score(train['isDuplicate'].values, check)
    validation_df = pd.DataFrame({'itemID_1': train['itemID_1'].values, 'itemID_2': train['itemID_2'].values,
                                  'isDuplicate': train['isDuplicate'].values, 'probability': check})
    print('AUC score value: {:.6f}'.format(score))

    imp = get_importance(gbm, features)
    print('Importance array: ', imp)

    print('Prediction time: {} minutes'.format(round((time.time() - start_time)/60, 2)))
    return validation_df, score

gbm.py 文件源码项目：MLAlgorithms 作者: rushter 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def classification():
    # Generate a random binary classification problem.
    X, y = make_classification(n_samples=350, n_features=15, n_informative=10,
                               random_state=1111, n_classes=2,
                               class_sep=1., n_redundant=0)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15,
                                                        random_state=1111)

    model = GradientBoostingClassifier(n_estimators=50, max_depth=4,
                                       max_features=8, learning_rate=0.1)
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    print(predictions)
    print(predictions.min())
    print(predictions.max())
    print('classification, roc auc score: %s'
          % roc_auc_score(y_test, predictions))

test_classification_accuracy.py 文件源码项目：MLAlgorithms 作者: rushter 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def test_mlp():
    y_train_onehot = one_hot(y_train)
    y_test_onehot = one_hot(y_test)

    model = NeuralNet(
        layers=[
            Dense(256, Parameters(init='uniform', regularizers={'W': L2(0.05)})),
            Activation('relu'),
            Dropout(0.5),
            Dense(128, Parameters(init='normal', constraints={'W': MaxNorm()})),
            Activation('relu'),
            Dense(2),
            Activation('softmax'),
        ],
        loss='categorical_crossentropy',
        optimizer=Adadelta(),
        metric='accuracy',
        batch_size=64,
        max_epochs=25,

    )
    model.fit(X_train, y_train_onehot)
    predictions = model.predict(X_test)
    assert roc_auc_score(y_test_onehot[:, 0], predictions[:, 0]) >= 0.95

art_rfc.py 文件源码项目：kaggle_art 作者: small-yellow-duck 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def train_classifier(x_train, y_train, x_cv, y_cv):
    clf = RandomForestClassifier(n_estimators=100)

    print 'starting fit'
    # excluding the patient_id column from the fit and prediction (patient_id?)
    clf.fit(x_train[::5], y_train[::5])
    print 'starting pred'

    y_pred = np.zeros(x_cv.shape[0])
    for i in xrange(4):
        y_pred[i::4] = clf.predict_proba(x_cv[i::4])[:, 1]

    if y_cv is not None:
        print roc_auc_score(y_cv, y_pred)

    return y_pred, clf