def sents(self, fileids=None, categories=None):
"""
Uses the built in sentence tokenizer to extract sentences from the
paragraphs. Note that this method uses BeautifulSoup to parse HTML.
"""
for paragraph in self.paras(fileids, categories):
for sentence in self._sent_tokenizer.tokenize(paragraph):
yield sentence
评论列表
文章目录