def __init__(self, root, items, encoding='utf8'):
gaps_re = r'(?u)(?<=/\.)\s+|\s*\S*_CODE\s*|\s*\S*_ID\s*'
sent_tokenizer = RegexpTokenizer(gaps_re, gaps=True)
TaggedCorpusReader.__init__(self, root, items, sep='_',
sent_tokenizer=sent_tokenizer)
#: A list of all documents and their titles in ycoe.
ycoe.py 文件源码
python
阅读 21
收藏 0
点赞 0
评论 0
评论列表
文章目录