dataset.py 文件源码

python
阅读 27 收藏 0 点赞 0 评论 0

项目:LinguisticAnalysis 作者: DucAnhPhi 项目源码 文件源码
def extract_features(tweet, combinedKeywords, pronDict):
    preprocessed = ut.preprocess(tweet)

    # ignore empty preprocessed tweets or retweets
    if len(preprocessed) == 0 or ut.is_retweet(tweet):
        return []
    else:
        sentLength = la.get_sentence_length(preprocessed)
        exclMarks = la.get_exclamation_marks(tweet)
        gradeLvl = get_flesch_grade_level(preprocessed, pronDict)
        keyCount = get_keywords_count(preprocessed, combinedKeywords)
        # ensure same order everytime
        keys = sorted(list(keyCount.keys()))

        # put all features together
        features = [ sentLength, exclMarks, gradeLvl ]
        for key in keys:
            features.append(keyCount.get(key))
        # return array
        return features
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号