def createbigramvocabulary(reviewfile, vocabfile):
createvocabulary(reviewfile, vocabfile)
finput = open(reviewfile,"r")
foutput = open(vocabfile,"a")
all_bigrams = []
for line in finput:
tokenized_line = []
tokenized_line.append('*')
tokenized_line.extend(word_tokenize(line[1:]))
tokenized_line.append('$')
bgrms = bigrams(tokenized_line)
all_bigrams.extend(bgrms)
c = Counter(all_bigrams)
for b in c:
if (b[0] != "+" and b[0] != "-" and c[b] >= 3):
foutput.write(b[0] + " " + b[1] + "\n")
finput.close()
foutput.close()
createVocabulary.py 文件源码
python
阅读 29
收藏 0
点赞 0
评论 0
评论列表
文章目录