def crossValidate(document_term_matrix,labels,classifier="SVM",nfold=2):
clf = None
precision = []
recall = []
fscore = []
if classifier == "NN":
clf = MLPClassifier(hidden_layer_sizes=(50), activation='relu', solver='sgd', alpha=1e-2, random_state=None)
elif classifier == "LR":
clf = linear_model.LogisticRegression(C=1e3)
#clf = tree.DecisionTreeClassifier()
if classifier == "RF":
clf = RandomForestClassifier()
elif classifier == "NB":
clf = GaussianNB()
elif classifier == "SVM":
clf = LinearSVC()
elif classifier == "KNN":
clf = NearestCentroid()
skf = StratifiedKFold(n_splits=nfold, shuffle=True)
y_test_total = []
y_pred_total = []
for train_index, test_index in skf.split(document_term_matrix, labels):
X_train, X_test = document_term_matrix[train_index], document_term_matrix[test_index]
y_train, y_test = labels[train_index], labels[test_index]
y_test_total.extend(y_test.tolist())
model = clf.fit(X_train, y_train)
y_pred = model.predict(X_test)
y_pred_total.extend(y_pred.tolist())
p,r,f,s = precision_recall_fscore_support(y_test, y_pred, average='weighted')
print accuracy_score(y_test, y_pred)
a_score.append(accuracy_score(y_test, y_pred))
precision.append(p)
recall.append(r)
fscore.append(f)
plot_learning_curve(clf, "Learning Curves", document_term_matrix, labels, ylim=None, cv=skf, n_jobs=1, train_sizes=np.linspace(.1, 1.0, 5))
plt.savefig('lc.png')
return pd.Series(y_test_total), pd.Series(y_pred_total), np.mean(precision),np.mean(recall),np.mean(fscore), np.mean(a_score)
评论列表
文章目录