def _fit(self, v_pos):
"""Inner fit for one mini-batch.
Adjust the parameters to maximize the likelihood of v using
Stochastic Maximum Likelihood (SML).
v_pos : array-like, shape (n_samples, n_features)
The data to use for training.
"""
h_pos = self._mean_hiddens(v_pos)
# TODO: Worth trying with visible probabilities rather than binary states.
# PG: it is common to use p_i instead of sampling a binary value'... 'it reduces
# sampling noise this allowing faster learning. There is some evidence that it leads
# to slightly worse density models'
# I'm confounded by the fact that we seem to get more effective models WITHOUT
# softmax visible units. The only explanation I can think of is that it's like
# a pseudo-version of using visible probabilities. Without softmax, v_neg
# can have multiple 1s per one-hot vector, which maybe somehow accelerates learning?
# Need to think about this some more.
v_neg = self._sample_visibles(self.h_samples_)
h_neg = self._mean_hiddens(v_neg)
lr = float(self.learning_rate) / v_pos.shape[0]
update = safe_sparse_dot(v_pos.T, h_pos, dense_output=True).T
update -= np.dot(h_neg.T, v_neg) / self.fantasy_to_batch
# L2 weight penalty
update -= self.components_ * self.weight_cost
self.components_ += lr * update
self.intercept_hidden_ += lr * (h_pos.sum(axis=0) - h_neg.sum(axis=0)/self.fantasy_to_batch)
self.intercept_visible_ += lr * (np.asarray(
v_pos.sum(axis=0)).squeeze() -
v_neg.sum(axis=0)/self.fantasy_to_batch)
h_neg[self.rng_.uniform(size=h_neg.shape) < h_neg] = 1.0 # sample binomial
self.h_samples_ = np.floor(h_neg, h_neg)
评论列表
文章目录