def load_data():
global N, words
freqs = [ FreqDist(corpus.words(fileid)) for fileid in corpus.fileids() ]
words = list(set(word
for dist in freqs
for word in dist.keys()
if word not in ENGLISH_STOP_WORDS and
word not in punctuation))
data = []
N = len(words)
for dist in freqs:
x = volumize(dist)
data.append((x, x.w))
return data
评论列表
文章目录