def __init__(self, tokenizer_type="PTBTokenizer"):
# Sanity checks
if tokenizer_type in ['SpaceTokenizer', 'NLTKWhiteSpaceTokenizer', 'PTBTokenizer']:
self.tokenizer_type = tokenizer_type
else:
print ("Unrecognized tokenizer type : setting back to default (PTBTokenizer)")
self.tokenizer_type = "PTBTokenizer"
try:
nltk.data.find('punkt.zip')
except LookupError:
nltk.download('punkt')
评论列表
文章目录