RBM.py 文件源码-python代码片段

def _fit(self, v_pos):
        """Inner fit for one mini-batch.

        Adjust the parameters to maximize the likelihood of v using
        Stochastic Maximum Likelihood (SML).

        v_pos : array-like, shape (n_samples, n_features)
            The data to use for training.
        """
        h_pos = self._mean_hiddens(v_pos)
        # TODO: Worth trying with visible probabilities rather than binary states.
        # PG: it is common to use p_i instead of sampling a binary value'... 'it reduces
        # sampling noise this allowing faster learning. There is some evidence that it leads
        # to slightly worse density models'

        # I'm confounded by the fact that we seem to get more effective models WITHOUT
        # softmax visible units. The only explanation I can think of is that it's like
        # a pseudo-version of using visible probabilities. Without softmax, v_neg
        # can have multiple 1s per one-hot vector, which maybe somehow accelerates learning?
        # Need to think about this some more.
        v_neg = self._sample_visibles(self.h_samples_)
        h_neg = self._mean_hiddens(v_neg)

        lr = float(self.learning_rate) / v_pos.shape[0]
        update = safe_sparse_dot(v_pos.T, h_pos, dense_output=True).T
        update -= np.dot(h_neg.T, v_neg) / self.fantasy_to_batch
        # L2 weight penalty
        update -= self.components_ * self.weight_cost
        self.components_ += lr * update
        self.intercept_hidden_ += lr * (h_pos.sum(axis=0) - h_neg.sum(axis=0)/self.fantasy_to_batch)
        self.intercept_visible_ += lr * (np.asarray(
                                         v_pos.sum(axis=0)).squeeze() -
                                         v_neg.sum(axis=0)/self.fantasy_to_batch)

        h_neg[self.rng_.uniform(size=h_neg.shape) < h_neg] = 1.0  # sample binomial
        self.h_samples_ = np.floor(h_neg, h_neg)