mainPEP.py 文件源码-python代码片段

mainPEP.py 文件源码

python

阅读 34 收藏 0 点赞 0 评论 0

def threshold_estimate(x,y):
    x_train, x_test, y_train, y_test = cross_validation.train_test_split(x, y, test_size=0.1, random_state=0)
    weight = float(len(y_train[y_train == 0]))/float(len(y_train[y_train == 1]))
    w1 = np.array([1]*y_train.shape[0])
    w1[y_train==1]=weight
    print("samples: %d %d %f" % (x_train.shape[0], x_test.shape[0], weight))
    estimator = xgb.XGBClassifier(max_depth=10, learning_rate=0.1, n_estimators=1000, nthread=50)
    estimator.fit(x_train, y_train, sample_weight=w1)
    y_scores = estimator.predict_proba(x_test)[:,1]
    precision, recall, thresholds = precision_recall_curve(y_test, y_scores)
    f1 = 2*precision[2:]*recall[2:]/(precision[2:]+recall[2:])
    m_idx = np.argmax(f1)
    m_thresh = thresholds[2+m_idx]
    print("%d %f %f" % (precision.shape[0], f1[m_idx], m_thresh))
    return m_thresh

# Estimate threshold for the classifier using inner-round cross validation