def get_frequency_table(titles, vocab):
tokenizer = RegexpTokenizer(r'\w+')
freqtable = np.ndarray(shape=(len(titles),len(vocab)), dtype=int, order='C')
freqtable.fill(0)
for i in range(0,len(titles)):
raw = titles[i].lower()
tokens = tokenizer.tokenize(raw)
for token in tokens:
index = vocab[token]
freqtable[i][index] +=1
return freqtable
评论列表
文章目录