def build_vocabulary(lower=3, n=50000):
try:
with open(vocab_fn, 'rb') as vocab_file:
vocab = pickle.load(vocab_file)
print('vocabulary loaded')
return vocab
except IOError:
print('building vocabulary')
freq = build_word_frequency_distribution()
top_words = list(sorted(freq.items(), key=lambda x: -x[1]))[:n-lower+1]
vocab = {}
i = lower
for w, freq in top_words:
vocab[w] = i
i += 1
with open(vocab_fn, 'wb') as vocab_file:
pickle.dump(vocab, vocab_file)
return vocab
yelp_prepare.py 文件源码
python
阅读 17
收藏 0
点赞 0
评论 0
评论列表
文章目录