def train_test(X, Y, ratio):
# estimators = build_model_mlp()
# clf = Pipeline(estimators)
clf = RandomForestClassifier(n_jobs=-1, n_estimators=12)
mean_tpr = 0.0
mean_fpr = np.linspace(0., 1., 30)
auc_all = []
num_of_exp = 20
for i in range(1, num_of_exp+1):
print "?%d???,%d/%d." % (i, i, num_of_exp)
x_train, x_test, y_train, y_test = \
train_test_split(X, Y,
test_size=ratio,
random_state=np.random.randint(1, 100))
clf.fit(x_train, y_train)
y_pred = clf.predict_proba(x_test)[:, 1]
fpr, tpr, _ = metrics.roc_curve(y_test, y_pred)
mean_tpr += interp(mean_fpr, fpr, tpr)
mean_tpr[0] = 0.0
auc_all.append(metrics.roc_auc_score(y_test, y_pred))
mean_tpr /= num_of_exp
auc_array = np.array(auc_all)
auc = auc_array.mean()
auc_std = auc_array.std()
mean_tpr[-1] = 1.0
return mean_tpr, auc, auc_std
评论列表
文章目录