def newsgroups(*, path=None, key=None, limit=None):
"""
Return a list of newsgroup messages from the 20 newsgroups dataset.
Arguments:
- path(str): Unused in this case. Dataset is managed by sklearn.
- key(str): Unused.
- limit(int): Unused.
"""
# This is going to download the dataset the first time we
# run this function. Ideally we can populate these datasets
# ahead of time.
from sklearn.datasets import fetch_20newsgroups
if limit:
return fetch_20newsgroups(subset='train').data[:limit]
return fetch_20newsgroups(subset='train').data
评论列表
文章目录