dataset.py 文件源码-python代码片段

dataset.py 文件源码

python

阅读 39 收藏 0 点赞 0 评论 0

项目：LinguisticAnalysis 作者: DucAnhPhi 项目源码文件源码

def get_combined_keywords(tweets1, tweets2):
        # preprocess tweets
        preprocTweets1 = [
            ut.preprocess(tweet) for tweet in tweets1
            if len(ut.preprocess(tweet))
        ]
        preprocTweets2 = [
            ut.preprocess(tweet) for tweet in tweets2
            if len(ut.preprocess(tweet))
        ]

        # get combined list of top 25 most used keywords
        keywords1 = la.get_most_frequent_keywords(preprocTweets1)
        keywords2 = la.get_most_frequent_keywords(preprocTweets2)

        # get rid of tuples
        for i,tuple in enumerate(keywords1):
            keywords1[i] = tuple[0]
        for i,tuple in enumerate(keywords2):
            keywords2[i] = tuple[0]

        keywords1 = set(keywords1)
        keywords2 = set(keywords2)

        # combined keywords
        combinedKeywords = keywords1.union(keywords2)
        # return dictionary for counting keywords
        return {keyword:0 for keyword in combinedKeywords}