def countvectorizer(inputpath=None, text=None):
"""
docstring
"""
vectorizer = CountVectorizer(min_df=1)
if inputpath:
filenames = [os.path.join(inputpath, file) for file in os.listdir(inputpath)]
corpus = []
for file in filenames:
with open(file, 'r') as f:
data = f.read()
corpus.append(data)
if text:
corpus = text
X = vectorizer.fit_transform(corpus)
print(X.toarray())
print(vectorizer.get_feature_names())
feature_extraction.py 文件源码
python
阅读 28
收藏 0
点赞 0
评论 0
评论列表
文章目录