def test_bag_of_words_for_series():
dataset = fetch_20newsgroups(shuffle=True, random_state=1,
remove=('headers', 'footers', 'quotes'))
series = XSeries(dataset.data[:10])
assert series.data_type == str
translator = str.maketrans('', '', string.punctuation)
tokenizer_transformer = XSeriesTransformer(
transform_function=lambda text: text.lower().translate(translator).strip().split()
)
transformed_series = tokenizer_transformer.fit_transform(series)
# print(transformed_series)
bag_transform = BagOfWordsTransformer()
transformed_series = bag_transform.fit_transform(transformed_series)
# print(transformed_series)
assert type(transformed_series) == XDataFrame
test_bag_of_features.py 文件源码
python
阅读 27
收藏 0
点赞 0
评论 0
评论列表
文章目录