def trainModel(xtrain, xtest, ytrain, ytest):
classifiers = [
# KNeighborsClassifier(3),
# SVC(kernel="linear", probability=True),
# NuSVC(probability=True),
# DecisionTreeClassifier(),
RandomForestClassifier(),
# AdaBoostClassifier(),
# GradientBoostingClassifier(loss='deviance', learning_rate=0.1, n_estimators=200,
# subsample=1.0, criterion='friedman_mse', min_samples_split=2,
# min_samples_leaf=1, min_weight_fraction_leaf=0.,
# max_depth=5),
# GradientBoostingClassifier(),
# GaussianNB(),
# LinearDiscriminantAnalysis(),
# QuadraticDiscriminantAnalysis()
]
log_cols = ["Classifier", "Accuracy", "Log Loss"]
log = pd.DataFrame(columns=log_cols)
for clf in classifiers:
clf.fit(xtrain, ytrain)
name = clf.__class__.__name__
print("=" * 30)
print(name)
print('****Results****')
train_predictions = clf.predict(xtest)
# acc = accuracy_score(ytest, train_predictions)
# print("Accuracy: {:.4%}".format(acc))
train_porb_predictions = clf.predict_proba(xtest)
ll = log_loss(ytest, train_porb_predictions)
print("Log Loss: {}".format(ll))
# printResult(ytest, train_predictions)
# result.printMultiResult(ytest, train_predictions)
save_path = "doc/result.txt"
desc = "sentiment by tfidf "
result_str = result.printMultiResult(ytest, train_predictions)
result.saveResult(save_path, desc, result_str)
#
# log_entry = pd.DataFrame([[name, acc * 100, ll]], columns=log_cols)
# log = log.append(log_entry)
print("=" * 30)
评论列表
文章目录