def threshold_estimate(x,y):
x_train, x_test, y_train, y_test = cross_validation.train_test_split(x, y, test_size=0.1, random_state=0)
weight = float(len(y_train[y_train == 0]))/float(len(y_train[y_train == 1]))
w1 = np.array([1]*y_train.shape[0])
w1[y_train==1]=weight
print("samples: %d %d %f" % (x_train.shape[0], x_test.shape[0], weight))
estimator = xgb.XGBClassifier(max_depth=10, learning_rate=0.1, n_estimators=1000, nthread=50)
estimator.fit(x_train, y_train, sample_weight=w1)
y_scores = estimator.predict_proba(x_test)[:,1]
precision, recall, thresholds = precision_recall_curve(y_test, y_scores)
f1 = 2*precision[2:]*recall[2:]/(precision[2:]+recall[2:])
m_idx = np.argmax(f1)
m_thresh = thresholds[2+m_idx]
print("%d %f %f" % (precision.shape[0], f1[m_idx], m_thresh))
return m_thresh
# Estimate threshold for the classifier using inner-round cross validation
评论列表
文章目录