def _set_tokenizer(self, tokenizer):
"""
Set tokenizer
:param tokenizer: tokenization method
:return: None
"""
if tokenizer == "nltk":
self.tokenizer = nltk.word_tokenize
elif tokenizer == "spacy":
spacy_en = spacy.load("en")
def spacy_tokenizer(seq):
return [w.text for w in spacy_en(seq)]
self.tokenizer = spacy_tokenizer
else:
raise ValueError("Invalid tokenizing method %s" % tokenizer)
评论列表
文章目录