def naive_bayes(analysis):
tags = []
words = []
deps_cc = []
for sen in analysis["sentences"]:
tags += sen['pos']
words += sen['tokens']
deps_cc += sen["deps_cc"]
norm = normalize_title(tags, words)
f1 = []
current = list(nltk.ngrams(norm.split(), 1)) + list(nltk.ngrams(norm.split(), 2)) + list(nltk.ngrams(norm.split(),3))
ngram_list = [' '.join(list(g)) for g in current]
for pos in common_grams:
if pos in ngram_list:
f1.append(1)
else:
f1.append(0)
f1 = numpy.array(f1).reshape(1, len(f1))
#pos ngrams
f2 = []
current_pos = list(nltk.ngrams(tags, 1)) + list(nltk.ngrams(tags, 2)) + list(nltk.ngrams(tags,3))
ngram_list = [' '.join(list(g)) for g in current_pos]
for pos in common_pos_grams:
if pos in ngram_list:
f2.append(1)
else:
f2.append(0)
f2 = numpy.array(f2).reshape(1, len(f2))
# print f2.shape
# syntactic ngrams
f3 = []
current_sngrams = list(syntactic_n_gram(deps_cc, 1)) + list(syntactic_n_gram(deps_cc, 2)) + list(syntactic_n_gram(deps_cc, 3))
ngram_list = [' '.join(list(g)) for g in current_sngrams]
for pos in common_sn_grams:
if pos in ngram_list:
f3.append(1)
else:
f3.append(0)
f3 = numpy.array(f3).reshape(1, len(f3))
return [clf1.predict(f1)[0], clf2.predict(f2)[0], clf3.predict(f3)[0]]
评论列表
文章目录