def stream_reuters_documents(reuters_dir):
""" Iterate over documents of the Reuters dataset.
The Reuters archive will automatically be downloaded and uncompressed if
the `data_path` directory does not exist.
Documents are represented as dictionaries with 'body' (str),
'title' (str), 'topics' (list(str)) keys.
"""
parser = ReutersParser()
for filename in glob(os.path.join(reuters_dir, "*.sgm")):
for doc in parser.parse(open(filename, 'rb')):
yield doc
##################### main ######################
pos_tagging_data.py 文件源码
python
阅读 22
收藏 0
点赞 0
评论 0
评论列表
文章目录