def _tokenize_corpus_into_list_of_tokenized_sentences(cls, corpus):
tokenized_corpus = nltk.sent_tokenize(corpus)
tokenized_corpus = [cls._clean_sentence(sentence) for sentence in tokenized_corpus]
return [nltk.word_tokenize(sentence) for sentence in tokenized_corpus]
评论列表
文章目录