def build_word_frequency_distribution():
path = os.path.join(data_dir, 'word_freq.pickle')
try:
with open(path, 'rb') as freq_dist_f:
freq_dist_f = pickle.load(freq_dist_f)
print('frequency distribution loaded')
return freq_dist_f
except IOError:
pass
print('building frequency distribution')
freq = defaultdict(int)
for i, review in enumerate(read_reviews()):
doc = en.tokenizer(review['text'])
for token in doc:
freq[token.orth_] += 1
if i % 10000 == 0:
with open(path, 'wb') as freq_dist_f:
pickle.dump(freq, freq_dist_f)
print('dump at {}'.format(i))
return freq
yelp_prepare.py 文件源码
python
阅读 23
收藏 0
点赞 0
评论 0
评论列表
文章目录