def english_sentence_segment(text):
"""segment text into sentence"""
try:
sent_detector = nltk.data.load(
'tokenizers/punkt/english.pickle'
)
extra_abbrev = ["e.g", "al", "i.e"]
sent_detector._params.abbrev_types.update(extra_abbrev)
return sent_detector.tokenize(text)
except LookupError as e:
raise LookupError(
"NLTK tokenizers are missing. Download them by following command: "
'''python -c "import nltk; nltk.download('punkt')"'''
)
评论列表
文章目录