classification.py 文件源码

python
阅读 29 收藏 0 点赞 0 评论 0

项目:DocumentClassification 作者: bahmanh 项目源码 文件源码
def crossValidate(document_term_matrix,labels,classifier="SVM",nfold=2):
    clf = None
    precision = []
    recall = []
    fscore = []
    if classifier == "NN":
       clf = MLPClassifier(hidden_layer_sizes=(50), activation='relu', solver='sgd', alpha=1e-2, random_state=None)   
    elif classifier == "LR":
        clf = linear_model.LogisticRegression(C=1e3)
        #clf = tree.DecisionTreeClassifier()
    if classifier == "RF":
        clf = RandomForestClassifier()
    elif classifier == "NB":
        clf = GaussianNB()
    elif classifier == "SVM":
        clf = LinearSVC()
    elif classifier == "KNN":
        clf = NearestCentroid()

    skf = StratifiedKFold(n_splits=nfold, shuffle=True)
    y_test_total = []
    y_pred_total = []

    for train_index, test_index in skf.split(document_term_matrix, labels):
        X_train, X_test = document_term_matrix[train_index], document_term_matrix[test_index]
        y_train, y_test = labels[train_index], labels[test_index]
        y_test_total.extend(y_test.tolist())
        model = clf.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        y_pred_total.extend(y_pred.tolist())
        p,r,f,s = precision_recall_fscore_support(y_test, y_pred, average='weighted')
        print accuracy_score(y_test, y_pred)
        a_score.append(accuracy_score(y_test, y_pred))
        precision.append(p)
        recall.append(r)
        fscore.append(f)

    plot_learning_curve(clf, "Learning Curves", document_term_matrix, labels, ylim=None, cv=skf, n_jobs=1, train_sizes=np.linspace(.1, 1.0, 5))

    plt.savefig('lc.png')

    return pd.Series(y_test_total), pd.Series(y_pred_total), np.mean(precision),np.mean(recall),np.mean(fscore), np.mean(a_score)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号