def __init__(self, lang, tokenizer=None, load=True):
self.lang = lang
self.tokenizer = tokenizer or Tokenizer(lang)
dirname = join(nlp_data, lang)
dict_fname = join(dirname, DICTIONARY_FNAME)
phrase_fname = join(dirname, PHRASES_FNAME)
if load and exists(phrase_fname):
self.phrases = gmodels.Phrases.load(phrase_fname)
else:
self.phrases = gmodels.Phrases()
if load and exists(dict_fname):
self.dictionary = corpora.Dictionary.load(dict_fname)
else:
self.dictionary = corpora.Dictionary()
评论列表
文章目录