def online2(X_org, y_org, test_x, test_uid):
n_folds = 5
verbose = True
shuffle = False
X = X_org
y = y_org
X_submission = test_x
if shuffle:
idx = np.random.permutation(y.size)
X = X[idx]
y = y[idx]
skf = list(StratifiedKFold(y, n_folds))
clfs = [
RandomForestClassifier().set_params(**INITIAL_PARAMS.get("RFC:one", {})),
ExtraTreesClassifier().set_params(**INITIAL_PARAMS.get("ETC:one", {})),
GradientBoostingClassifier().set_params(**INITIAL_PARAMS.get("GBC:one", {})),
LogisticRegression().set_params(**INITIAL_PARAMS.get("LR:one", {})),
# xgb.XGBClassifier().set_params(**INITIAL_PARAMS.get("XGBC:two", {})),
# xgb.XGBClassifier().set_params(**INITIAL_PARAMS.get("XGBC:one", {})),
]
print "Creating train and test sets for blending."
dataset_blend_train = np.zeros((X.shape[0], len(clfs)))
dataset_blend_test = np.zeros((X_submission.shape[0], len(clfs)))
for j, clf in enumerate(clfs):
print j, clf
dataset_blend_test_j = np.zeros((X_submission.shape[0], len(skf)))
for i, (train, test) in enumerate(skf):
print "Fold", i
X_train = X[train]
y_train = y[train]
clf.fit(X_train, y_train)
dataset_blend_test_j[:, i] = clf.predict_proba(X_submission)[:, 1]
dataset_blend_test[:,j] = dataset_blend_test_j.mean(1)
save_submission(os.path.join(consts.SUBMISSION_PATH,
clf.__class__.__name__ + '_' + strftime("%m_%d_%H_%M_%S", localtime()) + '.csv'),
test_uid, dataset_blend_test[:, j])
评论列表
文章目录