vfm.py 文件源码-python代码片段

def __init__(self, n_features=None, n_dim=8, lossfun=F.mean_squared_error,
                 lambda0=1, lambda1=1, lambda2=1, init_bias_mu=0.0,
                 init_bias_lv=0.0, intx_term=True, total_nobs=1):
        self.n_dim = n_dim
        self.n_features = n_features
        self.lossfun = lossfun
        self.lambda0 = lambda0
        self.lambda1 = lambda1
        self.lambda2 = lambda2
        self.intx_term = intx_term
        self.total_nobs = total_nobs

        # In contrast to the FM model, the slopes and latent vectors
        # will have means (mu) and log variances (lv) for each component.
        super(VFM, self).__init__(bias_mu=L.Bias(shape=(1,)),
                                  bias_lv=L.Bias(shape=(1,)),
                                  slop_mu=L.Bias(shape=(1, 1)),
                                  slop_lv=L.Bias(shape=(1, 1)),
                                  slop_delta_mu=L.EmbedID(n_features, 1,
                                                          ignore_label=-1),
                                  slop_delta_lv=L.EmbedID(n_features, 1,
                                                          ignore_label=-1),
                                  feat_mu_vec=L.Bias(shape=(1, 1, n_dim)),
                                  feat_lv_vec=L.Bias(shape=(1, 1, n_dim)),
                                  feat_delta_mu=L.EmbedID(n_features, n_dim,
                                                          ignore_label=-1),
                                  feat_delta_lv=L.EmbedID(n_features, n_dim,
                                                          ignore_label=-1))

        # Xavier initialize weights
        c = np.sqrt(n_features * n_dim) * 1e3
        d = np.sqrt(n_features) * 1e3
        self.feat_delta_mu.W.data[...] = np.random.randn(n_features, n_dim) / c
        self.feat_delta_lv.W.data[...] = np.random.randn(n_features, n_dim) / c
        self.slop_delta_mu.W.data[...] = np.random.randn(n_features, 1) / d
        self.slop_delta_lv.W.data[...] = np.random.randn(n_features, 1) / d
        self.bias_mu.b.data[...] *= 0.0
        self.bias_mu.b.data[...] += init_bias_mu
        self.bias_lv.b.data[...] *= 0.0
        self.bias_lv.b.data[...] += init_bias_lv