def get_user_to_word_proportion(user_to_text, word):
"""
Maps each user to the proportion of his words that consist of a specificied
word.
"""
user_to_word_proportion = {}
for user in user_to_text:
lm = LanuageModel(user_to_text[user])
n_tokens = len(lm.lowercase_tokens)
if n_tokens > 0:
fd = nltk.FreqDist(lm.lowercase_tokens)
user_to_word_proportion[user] = fd[word] / float(n_tokens)
else:
user_to_word_proportion[user] = 0.0
print 'Finished user {}'.format(user.encode('utf-8'))
return user_to_word_proportion
language_model.py 文件源码
python
阅读 25
收藏 0
点赞 0
评论 0
评论列表
文章目录