def setup_extractor(self):
self.splitter = PunktSentenceSplitter(self.language)
grammar = self.grammars.get(self.language)
if grammar:
self.parser = RegexpParser(grammar)
else:
raise ValueError(
"Invalid or unsupported language: '%s'. Please use one of the currently supported ones: %s" % (
self.language, self.grammars.keys())
)
for lemma, match_tokens in self.lemma_to_token.iteritems():
self.lemma_to_token[lemma] = set([match.lower() for match in match_tokens])
评论列表
文章目录