def prune(self, min_freq=5, max_size=sys.maxsize):
"""returns new Vocab object, pruned based on minimum symbol frequency"""
pruned_vocab = Vocab(unk=self.unk, emb=self.emb)
cnt = 0
for sym, freq in sorted(self.sym2freqs.items(), key=operator.itemgetter(1), reverse=True):
# for sym in self.sym2freqs:
# freq = self.sym2freqs[sym]
cnt += 1
if freq >= min_freq and cnt < max_size:
pruned_vocab(sym)
pruned_vocab.sym2freqs[sym] = freq
if self.frozen:
# if original Vocab was frozen, freeze new one
pruned_vocab.freeze()
return pruned_vocab
评论列表
文章目录