def add_from_lemma_definitions(self, vocab, try_lower=False):
"""Add lemma definitions for non-lemmas.
This code covers the following scenario: supposed a dictionary is crawled,
but only for word lemmas.
"""
lemmatizer = nltk.WordNetLemmatizer()
added = 0
for word in vocab.words:
word_list = [word, word.lower()] if try_lower else [word]
for word_to_lemma in word_list:
try:
for part_of_speech in ['a', 's', 'r', 'n', 'v']:
lemma = lemmatizer.lemmatize(word_to_lemma, part_of_speech)
lemma_defs = self._data.get(lemma)
if lemma != word and lemma_defs:
# This can be quite slow. But this code will not be used
# very often.
for def_ in lemma_defs:
if not def_ in self._data[word]:
added += 1
self._data[word].append(def_)
except:
logger.error("lemmatizer crashed on {}".format(word))
logger.error(traceback.format_exc())
logger.info("Added {} new defs in add_from_lemma_definitions".format(added))
self.save()
评论列表
文章目录