paper.py 文件源码

python
阅读 20 收藏 0 点赞 0 评论 0

项目:sharead 作者: strin 项目源码 文件源码
def rank_by_inverted_words(raw_query, filehashes=None):
    from nltk.tokenize import TweetTokenizer
    tokenizer = TweetTokenizer()
    keywords = tokenizer.tokenize(raw_query)

    kv_paperwords = lambda filehash: KeyValueStore('paperwords:' + filehash)
    if not filehashes: # retrieve all from db. complexity warning.
        scopes = KeyValueStore.scopes('paper:*')
        filehashes = [scope[len('paper:'):] for scope in scopes]

    score_by_filehash = {}
    for filehash in filehashes:
        word_dict = kv_paperwords(filehash)
        score = 0.
        for word in keywords:
            score += word_dict.get(word, default=0.)
        score_by_filehash[filehash] = score
    print score_by_filehash
    return sorted(score_by_filehash, key=lambda k: score_by_filehash[k], reverse=True)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号