def __init__(self, root, items, encoding='utf8'):
gaps_re = r'(?u)(?<=/\.)\s+|\s*\S*_CODE\s*|\s*\S*_ID\s*'
sent_tokenizer = RegexpTokenizer(gaps_re, gaps=True)
TaggedCorpusReader.__init__(self, root, items, sep='_',
sent_tokenizer=sent_tokenizer)
#: A list of all documents and their titles in ycoe.
评论列表
文章目录