python类roc_auc_score()的实例源码

rand_o.py 文件源码 项目:drugADR 作者: cosylabiiit 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def get_scores(clf, X_t_train, y_train, X_t_test, y_test):
    clf.fit(X_t_train, y_train)
    app = dict()
    score = fbeta_score(y_test, clf.predict(X_t_test), beta=2, average=None)
    avg_sample_score = fbeta_score(y_test, clf.predict(X_t_test), beta=2, average='samples')
    prec_score = precision_score(y_test, clf.predict(X_t_test), average='micro')
    rec_score = recall_score(y_test, clf.predict(X_t_test), average='micro')
    avg_prec = average_precision_score(y_test, clf.predict(X_t_test))
    metrics = [score, avg_sample_score, roc_auc_score(y_test, clf.predict_proba(X_t_test))]
    #app['Classwise Scores'] = ([(mlb.classes_[l], score[l]) for l in score.argsort()[::-1]])
    app['F2 Score'] = avg_sample_score
    app['ROC_AUC'] = roc_auc_score(y_test, clf.predict_proba(X_t_test))
    app['P_AUPR'] = avg_prec
    app['Precision'] = prec_score
    app['Recall'] = rec_score
    return app
grasp.py 文件源码 项目:EEG-Grasp-Kaggle 作者: esube 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def score(net, samples=4096):
    """Compute the area under the curve, ROC score from a trained net

    We take `samples` random samples and compute the ROC AUC
    score on those samples. 
    """
    source = net.batch_iterator_test.source
    test_indices = make_valid_indices(source, samples)
    predicted = net.predict_proba(test_indices)
    if predicted.shape[-1] != N_EVENTS:
        predicted = decode(predicted)
    actual = source.events[test_indices]
    try:
        return roc_auc_score(actual.reshape(-1), predicted.reshape(-1))
    except:
        return 0
p3.py 文件源码 项目:Uber-DS-Challenge 作者: bjherger 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def run_statsmodels_models(train, test, model_description):
    """
    Run logistic regression model to predict whether a signed up driver ever actually drove.
    :param input_df: Data frame prepared for statsmodels regression
    :type input_df: pd.DataFrame
    :return: AUC for model generated
    :rtype: float
    """
    # Run model on all observations
    # Use dmatrices to format data
    logging.info('Running model w/ description: %s' %model_description)
    logging.debug('Train df: \n%s' % train.describe())
    logging.debug('Test df: \n%s' % test.describe())
    y_train, X_train = dmatrices(model_description, data=train, return_type='dataframe', NA_action='drop')
    y_test, X_test = dmatrices(model_description, data=test, return_type='dataframe', NA_action='drop')

    # Create, fit model
    mod = sm.Logit(endog=y_train, exog=X_train)
    res = mod.fit(method='bfgs', maxiter=100)

    # Output model summary
    print train['city_name'].value_counts()
    print train['signup_channel'].value_counts()
    print res.summary()

    # Create, output AUC
    predicted = res.predict(X_test)
    auc = roc_auc_score(y_true=y_test, y_score=predicted)
    print 'AUC for 20%% holdout: %s' %auc

    # Return AUC for model generated
    return auc



# Main section
xgb_train.py 文件源码 项目:jdata 作者: learn2Pro 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def modelfit(alg, dtrain, predictors, useTrainCV=True, cv_folds=5, early_stopping_rounds=50):
    if useTrainCV:
        xgb_param = alg.get_xgb_params()
        xgtrain = xgb.DMatrix(dtrain[predictors].values, label=dtrain['label'].values)
        cvresult = xgb.cv(xgb_param, xgtrain, num_boost_round=alg.get_params()['n_estimators'], nfold=cv_folds,
                          metrics='auc', early_stopping_rounds=early_stopping_rounds, show_progress=False)
        alg.set_params(n_estimators=cvresult.shape[0])
    # Fit the algorithm on the data
    alg.fit(dtrain[predictors], dtrain['label'], eval_metric='auc')

    # Predict training set:
    dtrain_predictions = alg.predict(dtrain[predictors])
    dtrain_predprob = alg.predict_proba(dtrain[predictors])[:, 1]

    # Print model report:
    print "\nModel Report"
    print "Accuracy : %.4g" % metrics.accuracy_score(dtrain['Disbursed'].values, dtrain_predictions)
    print "AUC Score (Train): %f" % metrics.roc_auc_score(dtrain['Disbursed'], dtrain_predprob)

    feat_imp = pd.Series(alg.booster().get_fscore()).sort_values(ascending=False)
    feat_imp.plot(kind='bar', title='Feature Importances')
    plt.ylabel('Feature Importance Score')
retain.py 文件源码 项目:retain 作者: mp2893 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def calculate_auc(test_model, dataset, options):
    batchSize = options['batchSize']
    useTime = options['useTime']

    n_batches = int(np.ceil(float(len(dataset[0])) / float(batchSize)))
    scoreVec = []
    for index in xrange(n_batches):
        batchX = dataset[0][index*batchSize:(index+1)*batchSize]
        if useTime:
            batchT = dataset[2][index*batchSize:(index+1)*batchSize]
            x, t, lengths = padMatrixWithTime(batchX, batchT, options)
            scores = test_model(x, t, lengths)
        else:
            x, lengths = padMatrixWithoutTime(batchX, options)
            scores = test_model(x, lengths)
        scoreVec.extend(list(scores))
    labels = dataset[1]
    auc = roc_auc_score(list(labels), list(scoreVec))
    return auc
test_mlp_classifier.py 文件源码 项目:muffnn 作者: civisanalytics 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def test_cross_val_predict():
    # Make sure it works in cross_val_predict for multiclass.

    X, y = load_iris(return_X_y=True)
    y = LabelBinarizer().fit_transform(y)
    X = StandardScaler().fit_transform(X)

    mlp = MLPClassifier(n_epochs=10,
                        solver_kwargs={'learning_rate': 0.05},
                        random_state=4567).fit(X, y)

    cv = KFold(n_splits=4, random_state=457, shuffle=True)
    y_oos = cross_val_predict(mlp, X, y, cv=cv, method='predict_proba')
    auc = roc_auc_score(y, y_oos, average=None)

    assert np.all(auc >= 0.96)
score_dataset.py 文件源码 项目:snape 作者: mbernico 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def score_binary_classification(y, y_hat, report=True):
    """
    Create binary classification output
    :param y: true value
    :param y_hat: class 1 probabilities
    :param report:
    :return:
    """
    y_hat_class = [1 if x >= 0.5 else 0 for x in y_hat]  # convert probability to class for classification report

    report_string = "---Binary Classification Score--- \n"
    report_string += classification_report(y, y_hat_class)
    score = roc_auc_score(y, y_hat)
    report_string += "\nAUC = " + str(score)

    if report:
        print(report_string)

    return score, report_string
convnet.py 文件源码 项目:dm-challenge 作者: ping133 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def find_optimal_C_for_AUC(xTrain, yTrain, xTest, yTest):
    C_2d_range = [10.0 ** i for i in range(-3, 3)]

    accuracy = np.array([])
    auc_score = np.array([])

    for Ctry in C_2d_range:
        clf = SVC(C=Ctry, kernel="linear", probability=True)
        clf.fit(xTrain, yTrain)
        pred = clf.predict(xTest)
        pred_proba = clf.predict_proba(xTest)
        accuracy = np.append(accuracy, np.average(yTest == pred))
        auc_score = np.append(auc_score,
                              roc_auc_score(yTest, pred_proba[:, 1]))
        print "C: {}" .format(Ctry)
        print "accuracy: {}" .format(accuracy[-1])
        print "AUC: {}" .format(auc_score[-1])

    # Extract the optimal parameters to train the final model
    best_auc_idx = np.where(auc_score == max(auc_score))[0]
    best_acc_idx = np.where(accuracy == max(accuracy[best_auc_idx]))[0]
    best_C = C_2d_range[best_acc_idx[0]]

    return best_C
base.py 文件源码 项目:stacking 作者: ikki407 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def eval_pred( y_true, y_pred, eval_type):
    if eval_type == 'logloss':#eval_type??????
        loss = ll( y_true, y_pred )
        print "logloss: ", loss
        return loss            

    elif eval_type == 'auc':
        loss = AUC( y_true, y_pred )
        print "AUC: ", loss
        return loss             

    elif eval_type == 'rmse':
        loss = np.sqrt(mean_squared_error(y_true, y_pred))
        print "rmse: ", loss
        return loss




######### BaseModel Class #########
nf1_classifier.py 文件源码 项目:nf1_inactivation 作者: greenelab 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def train_test_error(clf, train_x, train_y, test_x, test_y):
    """
    Return training and testing errors according to input classifier

    Arguments:
    :param clf: classifier sklearn object
    :param train_x: gene expression matrix
    :param train_y: list of labels
    :param test_x: gene expression matrix
    :param test_y: list of labels

    Output:
    Returns training and testing auroc
    """
    model = clf.fit(train_x, train_y)
    pred_y = model.predict(train_x)
    train_err = roc_auc_score(train_y, pred_y, average='weighted')
    pred_y = model.predict(test_x)
    test_err = roc_auc_score(test_y, pred_y, average='weighted')
    return train_err, test_err
test_iforest.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 43 收藏 0 点赞 0 评论 0
def test_iforest_performance():
    """Test Isolation Forest performs well"""

    # Generate train/test data
    rng = check_random_state(2)
    X = 0.3 * rng.randn(120, 2)
    X_train = np.r_[X + 2, X - 2]
    X_train = X[:100]

    # Generate some abnormal novel observations
    X_outliers = rng.uniform(low=-4, high=4, size=(20, 2))
    X_test = np.r_[X[100:], X_outliers]
    y_test = np.array([0] * 20 + [1] * 20)

    # fit the model
    clf = IsolationForest(max_samples=100, random_state=rng).fit(X_train)

    # predict scores (the lower, the more normal)
    y_pred = clf.predict(X_test)

    # check that there is at most 6 errors (false positive or false negative)
    assert_greater(roc_auc_score(y_test, y_pred), 0.98)
test_ranking.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def test_score_scale_invariance():
    # Test that average_precision_score and roc_auc_score are invariant by
    # the scaling or shifting of probabilities
    y_true, _, probas_pred = make_prediction(binary=True)

    roc_auc = roc_auc_score(y_true, probas_pred)
    roc_auc_scaled = roc_auc_score(y_true, 100 * probas_pred)
    roc_auc_shifted = roc_auc_score(y_true, probas_pred - 10)
    assert_equal(roc_auc, roc_auc_scaled)
    assert_equal(roc_auc, roc_auc_shifted)

    pr_auc = average_precision_score(y_true, probas_pred)
    pr_auc_scaled = average_precision_score(y_true, 100 * probas_pred)
    pr_auc_shifted = average_precision_score(y_true, probas_pred - 10)
    assert_equal(pr_auc, pr_auc_scaled)
    assert_equal(pr_auc, pr_auc_shifted)
test_mlp.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def test_predict_proba_binary():
    # Test that predict_proba works as expected for binary class."""
    X = X_digits_binary[:50]
    y = y_digits_binary[:50]

    clf = MLPClassifier(hidden_layer_sizes=5)
    clf.fit(X, y)
    y_proba = clf.predict_proba(X)
    y_log_proba = clf.predict_log_proba(X)

    (n_samples, n_classes) = y.shape[0], 2

    proba_max = y_proba.argmax(axis=1)
    proba_log_max = y_log_proba.argmax(axis=1)

    assert_equal(y_proba.shape, (n_samples, n_classes))
    assert_array_equal(proba_max, proba_log_max)
    assert_array_equal(y_log_proba, np.log(y_proba))

    assert_equal(roc_auc_score(y, y_proba[:, 1]), 1.0)
fast_em.py 文件源码 项目:crayimage 作者: yandexdataschool 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def evaluate(self):
    t, result = timed(
      lambda: self.em.predict(self.hists).reshape(-1),
      repeat=1
    )

    auc = roc_auc_score(self.cats.reshape(-1), result)
    acc = accuracy_score(self.cats, result > 0.5)

    for i in xrange(result.shape[0]):
      print('%d: %.2e' % (self.cats.reshape(-1)[i], result[i]))

    print('Time %.2f millisec' % (t * 1000.0))
    print('AUC: %.3f' % auc)

    return acc, auc
trainable_em.py 文件源码 项目:crayimage 作者: yandexdataschool 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def evaluate(self):
    for scores in self.em.fit(self.X, self.cats, iterations=100, learning_rate=1.0):
      print np.mean(scores)

    for p in self.em.kernel.params:
      print p, p.get_value()
    auc = roc_auc_score(self.cats.reshape(-1), result)
    acc = accuracy_score(self.cats, result > 0.5)

    for i in xrange(result.shape[0]):
      print('%d: %.2e' % (self.cats.reshape(-1)[i], result[i]))

    print('Time %.2f millisec' % (t * 1000.0))
    print('AUC: %.3f' % auc)

    return acc, auc
em.py 文件源码 项目:crayimage 作者: yandexdataschool 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def b(self, digital_levels = 250):
    minX, maxX = np.min(self.X), np.max(self.X)

    digital = np.floor((self.X - minX) / (maxX - minX) * (digital_levels - 1)).astype('uint16')
    assert np.max(digital) < digital_levels
    counts = np.ndarray(shape=(self.X.shape[0], digital_levels), dtype='uint8')
    ndcount(digital.T, counts)
    print 'counts done'

    result = one_class_em(counts)

    auc = roc_auc_score(self.cats, result[:, 1])

    predictions = np.argmax(result, axis=1)
    acc = accuracy(predictions, self.cats)

    return acc, auc
callbacks.py 文件源码 项目:Sacred_Deep_Learning 作者: AAbercrombie0492 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def on_epoch_end(self, epoch, logs={}):
        import numpy as np
        from sklearn.metrics import recall_score, precision_score, roc_auc_score, f1_score
        y_pred = self.model.predict(self.X_val)
        y_pred = np.argmax(y_pred, axis=1)

        recall = recall_score(self.y_val, y_pred, average=None).mean()
        self.recall.append(recall)
        logs['recall'] = recall

        precision = precision_score(self.y_val, y_pred, average=None).mean()
        self.precision.append(precision)
        logs['precision'] = precision

        auc = roc_auc_score(self.y_val, y_pred, average=None).mean()
        self.auc.append(auc)
        logs['auc'] = auc

        f1 = f1_score(self.y_val, y_pred, average=None).mean()
        self.f1.append(f1)
        logs['f1'] = f1
libscores.py 文件源码 项目:AutoML5 作者: djajetic 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def auc_metric(solution, prediction, task='binary.classification'):
    ''' Normarlized Area under ROC curve (AUC).
    Return Gini index = 2*AUC-1 for  binary classification problems.
    Should work for a vector of binary 0/1 (or -1/1)"solution" and any discriminant values
    for the predictions. If solution and prediction are not vectors, the AUC
    of the columns of the matrices are computed and averaged (with no weight).
    The same for all classification problems (in fact it treats well only the
    binary and multilabel classification problems).'''
    #auc = metrics.roc_auc_score(solution, prediction, average=None)
    # There is a bug in metrics.roc_auc_score: auc([1,0,0],[1e-10,0,0]) incorrect
    label_num=solution.shape[1]
    auc=np.empty(label_num)
    for k in range(label_num):
        r_ = tiedrank(prediction[:,k])
        s_ = solution[:,k]
        if sum(s_)==0: print('WARNING: no positive class example in class {}'.format(k+1))
        npos = sum(s_==1)
        nneg = sum(s_<1)
        auc[k] = (sum(r_[s_==1]) - npos*(npos+1)/2) / (nneg*npos)
    return 2*mvmean(auc)-1


### END CLASSIFICATION METRICS 

# ======= Specialized scores ========
# We run all of them for all tasks even though they don't make sense for some tasks
libscores.py 文件源码 项目:AutoML5 作者: djajetic 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def auc_score_(solution, prediction):
    auc = metrics.roc_auc_score(solution, prediction, average=None)
    return mvmean(auc)

### SOME I/O functions
common_defs.py 文件源码 项目:hyperband 作者: zygmuntz 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def train_and_eval_sklearn_classifier( clf, data ):

    x_train = data['x_train']
    y_train = data['y_train']

    x_test = data['x_test']
    y_test = data['y_test'] 

    clf.fit( x_train, y_train ) 

    try:
        p = clf.predict_proba( x_train )[:,1]   # sklearn convention
    except IndexError:
        p = clf.predict_proba( x_train )

    ll = log_loss( y_train, p )
    auc = AUC( y_train, p )
    acc = accuracy( y_train, np.round( p ))

    print "\n# training | log loss: {:.2%}, AUC: {:.2%}, accuracy: {:.2%}".format( ll, auc, acc )

    #

    try:
        p = clf.predict_proba( x_test )[:,1]    # sklearn convention
    except IndexError:
        p = clf.predict_proba( x_test )

    ll = log_loss( y_test, p )
    auc = AUC( y_test, p )
    acc = accuracy( y_test, np.round( p ))

    print "# testing  | log loss: {:.2%}, AUC: {:.2%}, accuracy: {:.2%}".format( ll, auc, acc ) 

    #return { 'loss': 1 - auc, 'log_loss': ll, 'auc': auc }
    return { 'loss': ll, 'log_loss': ll, 'auc': auc }

###

# "clf", even though it's a regressor


问题


面经


文章

微信
公众号

扫码关注公众号