def fit(self, X, y):
"""
Estimate the topic distributions per document (theta), term
distributions per topic (phi), and regression coefficients (eta).
Parameters
----------
X : array-like, shape = (n_docs, n_terms)
The document-term matrix.
y : array-like, shape = (n_docs,)
Response values for each document.
"""
self.doc_term_matrix = X
self.n_docs, self.n_terms = X.shape
self.n_tokens = X.sum()
doc_lookup, term_lookup = self._create_lookups(X)
# iterate
self.theta, self.phi, self.eta, self.loglikelihoods = gibbs_sampler_blslda(
self.n_iter, self.n_report_iter,
self.n_topics, self.n_docs, self.n_terms, self.n_tokens,
self.alpha, self.beta, self.mu, self.nu2, self.b,
doc_lookup, term_lookup,
np.ascontiguousarray(y, dtype=np.float64), self.seed)
评论列表
文章目录