def _beta_update_raw_tfidf(self):
'''
Run only once - it does not depend on other parameters.
'''
for nodeid in xrange(self.D):
self.beta[nodeid] = self.W[self.node_vec == nodeid, :
].sum(axis=0)
for nodeid in xrange(self.D):
for wordid in xrange(self.beta.shape[1]):
docs_cnt = np.sum(self.W[self.node_vec == nodeid,
wordid] >= 1)
docs_cnt += 1 # smooth by adding one
self.beta[nodeid][wordid] *= 1 + np.log(self.W.shape[0]
* 1. / docs_cnt) # 1+ because we still want to keep words which always occurr, but probably it never happens
# Laplace smoothing to avoid zeros!
self.beta += 1
self._normalize_beta_rowwise()
return self.beta
评论列表
文章目录