def parametered_single(x_train,y_train,x_test,y_test,thresh_opt):
print("samples: %d %d %d %d" % (x_train.shape[0],x_train.shape[1],x_test.shape[0],x_test.shape[1]))
metrics = np.zeros((1,5),dtype="float32")
thresh = 0.5
# estimate the threshold
if thresh_opt==1:
thresh = threshold_estimate(x_train,y_train)
clf = xgb.XGBClassifier(max_depth=10, learning_rate=0.1, n_estimators=500, nthread=50)
weight = float(sum(y_train<1))/float(sum(y_train==1))
w1 = np.array([1]*y_train.shape[0])
w1[y_train==1]=weight
clf.fit(x_train, y_train, sample_weight=w1)
prob = clf.predict_proba(x_test)
yfit = (prob[:,1]>thresh)
precision, recall, f1, mcc = score_function(y_test,yfit)
metrics = np.array((thresh,precision,recall,f1,mcc))
print metrics
importances = clf.feature_importances_
indices1 = np.argsort(importances)[::-1]
features1 = np.transpose(np.array((indices1,importances[indices1])))
pred = np.transpose(np.array((y_test,yfit)))
return metrics, pred, prob, features1
# Cross validation for PEP-Word
评论列表
文章目录