knock78.py 文件源码-python代码片段

knock78.py 文件源码

python

阅读 24 收藏 0 点赞 0 评论 0

项目：100knock2016 作者: tmu-nlp 项目源码文件源码

def cv_prediction(feature_dict, feature, polarity, threshold, folds):
    accuracy = 0
    precision = 0
    recall = 0
    f1 = 0
    count = 0
    dicvec = DictVectorizer()
    LR = LogisticRegression()
    kfold = KFold(len(polarity), n_folds=folds)
    for train, test in kfold:
        count += 1
        x = list()
        y = list()
        [x.append(feature[i]) for i in train]
        [y.append(polarity[i]) for i in train]
        x.append(feature_dict)
        y.append(0)
        LR.fit(dicvec.fit_transform(x), y)
        test_label = list()
        answer_label = list()
        [answer_label.append(polarity[j]) for j in test]
        for j in test:
            query = fit_feature(feature[j], feature_dict)
            result = -1 if query.shape[1] != len(feature_dict) else prediction(LR, query, threshold)
            test_label.append(result)
        accuracy += accuracy_score(answer_label, test_label)
        precision += precision_score(answer_label, test_label)
        recall += recall_score(answer_label, test_label)
        f1 += f1_score(answer_label, test_label)
        print('{}_fold finished.'.format(count))

    return accuracy, precision, recall, f1