def info_content(lookup_word):
"""
Uses the Brown corpus available in NLTK to calculate a Laplace
smoothed frequency distribution of words, then uses this information
to compute the information content of the lookup_word.
"""
global N
if N == 0:
# poor man's lazy evaluation
print "I SHOULD BE PRINTED ONLY ONCE"
for sent in brown.sents():
for word in sent:
word = word.lower()
if not brown_word_counter.has_key(word):
brown_word_counter[word] = 0
brown_word_counter[word] = brown_word_counter[word] + 1
N = N + 1
lookup_word = lookup_word.lower()
n = 0 if not brown_word_counter.has_key(lookup_word) else brown_word_counter[lookup_word]
return 1.0 - (math.log(n + 1) / math.log(N + 1))
sentence_similarity.py 文件源码
python
阅读 22
收藏 0
点赞 0
评论 0
评论列表
文章目录