def compactify(self):
"""
Assign new word ids to all words.
This is done to make the ids more compact, e.g. after some tokens have
been removed via :func:`filter_tokens` and there are gaps in the id series.
Calling this method will remove the gaps.
"""
logger.debug("rebuilding dictionary, shrinking gaps")
# build mapping from old id -> new id
idmap = dict(izip(itervalues(self.token2id), xrange(len(self.token2id))))
# reassign mappings to new ids
self.token2id = dict((token, idmap[tokenid]) for token, tokenid in iteritems(self.token2id))
self.id2token = {}
self.dfs = dict((idmap[tokenid], freq) for tokenid, freq in iteritems(self.dfs))
评论列表
文章目录