def train_classifier(x_train, y_train, x_cv, y_cv):
clf = RandomForestClassifier(n_estimators=100)
print 'starting fit'
# excluding the patient_id column from the fit and prediction (patient_id?)
clf.fit(x_train[::5], y_train[::5])
print 'starting pred'
y_pred = np.zeros(x_cv.shape[0])
for i in xrange(4):
y_pred[i::4] = clf.predict_proba(x_cv[i::4])[:, 1]
if y_cv is not None:
print roc_auc_score(y_cv, y_pred)
return y_pred, clf
评论列表
文章目录