def create_word_scores(posWords,negWords,posTag,negTag):
from nltk.probability import FreqDist, ConditionalFreqDist
import itertools
posWords = list(itertools.chain(*posWords)) #????????????
negWords = list(itertools.chain(*negWords)) #??
word_fd = FreqDist() #?????????
cond_word_fd = ConditionalFreqDist() #????????????????????
for word in posWords:
#help(FreqDist)
word_fd[word] += 1#word_fd.inc(word)
cond_word_fd[posTag][word]+= 1#cond_word_fd['pos'].inc(word)
for word in negWords:
word_fd[word] += 1#word_fd.inc(word)
cond_word_fd[negTag][word]+= 1#cond_word_fd['neg'].inc(word)
pos_word_count = cond_word_fd[posTag].N() #??????
neg_word_count = cond_word_fd[negTag].N() #??????
total_word_count = pos_word_count + neg_word_count
word_scores = {}
for word, freq in word_fd.iteritems():
pos_score = BigramAssocMeasures.chi_sq(cond_word_fd[posTag][word], (freq, pos_word_count), total_word_count) #????????????????????????????
neg_score = BigramAssocMeasures.chi_sq(cond_word_fd[negTag][word], (freq, neg_word_count), total_word_count) #??
word_scores[word] = pos_score + neg_score #?????????????????????????
return word_scores #??????????????
extractFeatures.py 文件源码
python
阅读 41
收藏 0
点赞 0
评论 0
评论列表
文章目录