def corpus2bow(self, tokenized_corpus=default_documents):
"""returns (vocab,corpus_in_bow)
??????? BOW ??
Arguments:
tokenized_corpus -- ?????????
Return:
vocab -- {'human': 0, ... 'minors': 11}
corpus_in_bow -- [[(0, 1), (1, 1), (2, 1)]...]
"""
dictionary = corpora.Dictionary(tokenized_corpus)
# ????
vocab = dictionary.token2id
# ?????????
corpus_in_bow = [dictionary.doc2bow(text) for text in tokenized_corpus]
return (vocab, corpus_in_bow)
corpus_processor.py 文件源码
python
阅读 22
收藏 0
点赞 0
评论 0
评论列表
文章目录