RBM.py 文件源码-python代码片段

def score_samples(self, X):
        """Compute the pseudo-likelihood of X.

        X : {array-like, sparse matrix} shape (n_samples, n_features)
            Values of the visible layer. Must be all-boolean (not checked).

        Returns
        -------
        pseudo_likelihood : array-like, shape (n_samples,)
            Value of the pseudo-likelihood (proxy for likelihood).

        Notes
        -----
        This method is not deterministic: it computes a quantity called the
        free energy on X, then on a randomly corrupted version of X, and
        returns the log of the logistic function of the difference.
        """
        check_is_fitted(self, "components_")

        v = check_array(X, accept_sparse='csr')
        fe = self._free_energy(v)

        v_, state = self.corrupt(v)
        # TODO: If I wanted to be really fancy here, I would do one of those "with..." things.
        fe_corrupted = self._free_energy(v)
        self.uncorrupt(v, state)

        # See https://en.wikipedia.org/wiki/Pseudolikelihood
        # Let x be some visible vector. x_i is the ith entry. x_-i is the vector except that entry. 
        #       x_iflipped is x with the ith bit flipped. F() is free energy.
        # P(x_i | x_-i) = P(x) / P(x_-i) = P(x) / (P(x) + p(x_iflipped))
        # expand def'n of P(x), cancel out the partition function on each term, and divide top and bottom by e^{-F(x)} to get...
        # 1 / (1 + e^{F(x) - F(x_iflipped)})
        # So we're just calculating the log of that. We multiply by the number of
        # visible units because we're approximating P(x) as the product of the conditional likelihood
        # of each individual unit. But we're too lazy to do each one individually, so we say the unit
        # we tested represents an average.
        if hasattr(self, 'codec'):
            normalizer = self.codec.shape()[0]
        else:
            normalizer = v.shape[1]
        return normalizer * log_logistic(fe_corrupted - fe)

    # TODO: No longer used