bytileAggregator.py 文件源码-python代码片段

bytileAggregator.py 文件源码

python

阅读 22 收藏 0 点赞 0 评论 0

项目：project-fortis 作者: CatalystCode 项目源码文件源码

def __init__(self, lines):
        self.lookup = {}
        self.max_len = 0        
        ensure_package_path()
        from nltk.tokenize import wordpunct_tokenize as tokenize
        for line in lines:
            word_data = json.loads(line)
            # capture both positive and negative, choose one at scoring time
            pos_score, neg_score = word_data['pos'], word_data['neg']            
            terms = [word_data['word']]
            # TODO: make the sentiment scorer configurable
            if 'word_ar' in word_data:
                terms.append(word_data['word_ar'])
            if 'word_ur' in word_data:
                terms.append(word_data['word_ur'])
            for term in terms:
                # if a scores exists for a term use the least neutral score
                existing_scores = (0., 0.)
                if term in self.lookup:
                    existing_scores = self.lookup[term]
                self.lookup[term] = (max(pos_score, existing_scores[0]), max(neg_score, existing_scores[1]))
                # update the maximum token length to check
                self.max_len = max(len(tokenize(term)), self.max_len)