def _get_revision_word_dist(self, page_title, revid):
""""""
revids_to_word_dist = self.ctitle_to_revids_to_word_dist[page_title]
if revid in revids_to_word_dist:
return revids_to_word_dist[revid]
text = self._get_revision_text(page_title, revid)
text = [word.lower() for word in wordpunct_tokenize(text)
if word.lower() not in STOPWORDS and word.lower() not in PUNCTUATION]
pdist = StatsCounter(text).normalize()
revids_to_word_dist[revid] = pdist
return pdist
评论列表
文章目录