def cut_low_freq(self, corpus, threshold=1):
new_vocas = []
new_docfreq = []
self.vocas_id = dict()
conv_map = dict()
for id, term in enumerate(self.vocas):
freq = self.docfreq[id]
if freq > threshold:
new_id = len(new_vocas)
self.vocas_id[term] = new_id
new_vocas.append(term)
new_docfreq.append(freq)
conv_map[id] = new_id
self.vocas = new_vocas
self.docfreq = new_docfreq
return np.array([ self.conv(doc, conv_map) for doc in corpus])
评论列表
文章目录