def ngram_baseline(text):
ngs = ngrams(text, 2)
cnt = 0
"""
for t in ngs:
print(t, )
cnt = cnt + 1
if (cnt > 1000):
break
"""
refine = []
for (first, second) in ngs:
if (second[1] == 1):
#print(first[0], second[0], zip(first[0], second[0]))
#tmp = (first[0], second[0])
#print(tmp)
#break
refine.append((first[0], second[0]))
cnt = 0
"""
for t in refine:
print(t)
cnt = cnt + 1
if (cnt > 1000):
break
#print(ngs)
"""
cfdist = nltk.ConditionalFreqDist(refine)
return cfdist
classifier.py 文件源码
python
阅读 23
收藏 0
点赞 0
评论 0
评论列表
文章目录