def cv_prediction(feature_dict, feature, polarity, threshold, folds):
accuracy = 0
precision = 0
recall = 0
f1 = 0
count = 0
dicvec = DictVectorizer()
LR = LogisticRegression()
kfold = KFold(len(polarity), n_folds=folds)
for train, test in kfold:
count += 1
x = list()
y = list()
[x.append(feature[i]) for i in train]
[y.append(polarity[i]) for i in train]
x.append(feature_dict)
y.append(0)
LR.fit(dicvec.fit_transform(x), y)
test_label = list()
answer_label = list()
[answer_label.append(polarity[j]) for j in test]
for j in test:
query = fit_feature(feature[j], feature_dict)
result = -1 if query.shape[1] != len(feature_dict) else prediction(LR, query, threshold)
test_label.append(result)
accuracy += accuracy_score(answer_label, test_label)
precision += precision_score(answer_label, test_label)
recall += recall_score(answer_label, test_label)
f1 += f1_score(answer_label, test_label)
print('{}_fold finished.'.format(count))
return accuracy, precision, recall, f1
评论列表
文章目录