def update_chunk(self, chunk, update=True, opt_o=True):
# Find the unique words in this chunk...
unique_words = dict()
word_list = []
for doc in chunk:
for word_id, _ in doc:
if word_id not in unique_words:
unique_words[word_id] = len(unique_words)
word_list.append(word_id)
Wt = len(word_list) # length of words in these documents
# ...and do the lazy updates on the necessary columns of lambda
rw = np.array([self.m_r[t] for t in self.m_timestamp[word_list]])
self.m_lambda[:, word_list] *= np.exp(self.m_r[-1] - rw)
self.m_Elogbeta[:, word_list] = \
sp.psi(self.m_eta + self.m_lambda[:, word_list]) - \
sp.psi(self.m_W * self.m_eta + self.m_lambda_sum[:, np.newaxis])
ss = SuffStats(self.m_T, Wt, len(chunk))
Elogsticks_1st = expect_log_sticks(self.m_var_sticks) # global sticks
# run variational inference on some new docs
score = 0.0
count = 0
for doc in chunk:
if len(doc) > 0:
doc_word_ids, doc_word_counts = zip(*doc)
doc_score = self.doc_e_step(doc, ss, Elogsticks_1st,
word_list, unique_words, doc_word_ids,
doc_word_counts, self.m_var_converge)
count += sum(doc_word_counts)
score += doc_score
if update:
self.update_lambda(ss, word_list, opt_o)
return (score, count)
评论列表
文章目录