sentence_similarity.py 文件源码-python代码片段

sentence_similarity.py 文件源码

python

阅读 23 收藏 0 点赞 0 评论 0

项目：QuestionAnswerNLP 作者: debjyoti385 项目源码文件源码

def info_content(lookup_word):
    """
    Uses the Brown corpus available in NLTK to calculate a Laplace
    smoothed frequency distribution of words, then uses this information
    to compute the information content of the lookup_word.
    """
    global N
    if N == 0:
        # poor man's lazy evaluation
        print "I SHOULD BE PRINTED ONLY ONCE"
        for sent in brown.sents():
            for word in sent:
                word = word.lower()
                if not brown_word_counter.has_key(word):
                    brown_word_counter[word] = 0
                brown_word_counter[word] = brown_word_counter[word] + 1
                N = N + 1
    lookup_word = lookup_word.lower()
    n = 0 if not brown_word_counter.has_key(lookup_word) else brown_word_counter[lookup_word]
    return 1.0 - (math.log(n + 1) / math.log(N + 1))