def get_topic_distributions(examples, vectorizer, lda_model):
"""
Retrieve the topic distributions of a collection of documents.
:param examples: a list of tokenised documents
:param vectorizer: the CountVectorizer used for transforming the documents
:param lda_model: the trained LDA model
:return: an array of shape (num_examples, num_topics) containing the topic
distribution of each example
"""
vectorized_corpus = vectorizer.transform(examples)
gensim_corpus = gensim.matutils.Sparse2Corpus(vectorized_corpus,
documents_columns=False)
topic_representations = []
for doc in gensim_corpus:
topic_representations.append(
[topic_prob for (topic_id, topic_prob) in
lda_model.get_document_topics(doc, minimum_probability=0.)])
return np.array(topic_representations)
# PRE-TRAINED WORD EMBEDDINGS METHODS
评论列表
文章目录