def cross_predict(feat, f_name, X=X, y=y):
if os.name == 'nt':
n_jobs = 1
else:
n_jobs = -1
# ????
# clf_1 = MultinomialNB(alpha=5)
clf_2 = LinearSVC(C=0.02)
# ???? (CV)
# This cross-validation object is a merge of StratifiedKFold and ShuffleSplit,
# which returns stratified randomized folds. The folds are made by preserving
# the percentage of samples for each class.
#
# Note: like the ShuffleSplit strategy, stratified random splits do not guarantee
# that all folds will be different, although this is still
# very likely for sizeable datasets.
#
# Pass this cv to cross_val_predict will raise
# ValueError:cross_val_predict only works for partitions
#
# ? cv ?????? fold ? fold ????????
# cv = cross_validation.StratifiedShuffleSplit(y, test_size=0.2, random_state=42)
# This cross-validation object is a variation of KFold that returns stratified folds.
# The folds are made by preserving the percentage of samples for each class.
cv = cross_validation.StratifiedKFold(y, n_folds=5, random_state=42)
model = Pipeline([('feat', feat), ('clf', clf_2)])
t0 = time()
y_pred = cross_validation.cross_val_predict(model, X=X, y=y, n_jobs=n_jobs, cv=cv)
t = time() - t0
print("=" * 20, f_name, "=" * 20)
print("time cost: {}".format(t))
# print("y_predict: {}".format(y_pred))
print()
print('confusion matrix:\n', confusion_matrix(y, y_pred))
print()
print('\t\taccuracy: {}'.format(accuracy_score(y, y_pred)))
print()
print("\t\tclassification report")
print("-" * 52)
print(classification_report(y, y_pred))
# ??
# ???? (tfidf: baseline feature)
评论列表
文章目录