app.py 文件源码-python代码片段

app.py 文件源码

python

阅读 40 收藏 0 点赞 0 评论 0

def get_words(tweets):
    """Given a set of tweets, return the most frequently-used words."""
    tweets = filter(lambda x: not(x.is_rt), tweets)
    tokenized = [nltk.word_tokenize(handle_strip(t.tweet_text))
                 for t in tweets]
    words = [item for sublist in tokenized for item in sublist]
    longwords = filter(lambda x: len(x) > 6, words)
    lcwords = map(lambda x: x.lower(), longwords)
    fdist = nltk.FreqDist(lcwords)
    common = fdist.most_common(100)
    common = filter(lambda x: x[1] > 4, common)
    common = map(lambda x: [x[0], 6 + int(x[1]/3)], common)
    return common