def get_datasets_20newsgroup(subset='train', categories=None, shuffle=True, random_state=42):
"""
Retrieve data from 20 newsgroups
:param subset: train, test or all
:param categories: List of newsgroup name
:param shuffle: shuffle the list or not
:param random_state: seed integer to shuffle the dataset
:return: data and labels of the newsgroup
"""
datasets = fetch_20newsgroups(subset=subset, categories=categories, shuffle=shuffle, random_state=random_state)
return datasets
评论列表
文章目录