def prep_data(data):
sent_detector = nltk.data.load('tokenizers/punkt/english.pickle')
sents = sent_detector.tokenize(data['content'].strip())
sent_dict = {str(uuid.uuid4()): {'text': x} for x in sents[:2]}
data['sents'] = sent_dict
return data
评论列表
文章目录