ppr.py 文件源码-python代码片段

def preprocess(self, filepath):
        """Remember: row-first ordered csv file only!"""
        self.pp("reading")
        self.node2index, H = read_matrix(filepath, d=-self.d, add_identity=True)
        self.n, _ = H.shape
        if self.t is None:
            self.t = np.power(self.n, -0.5)
        elif self.t == 0:
            self.exact = True
        if self.k is None:
            self.k = max(1, int(0.001 * self.n))
        self.pp("running slashburn")
        self.perm_H, wing = slashburn(H, self.k, self.greedy)
        self.body = self.n - wing
        self.pp("sorting H")
        H = reorder_matrix(H, self.perm_H)
        self.pp("partitioning H")
        H11, H12, H21, H22 = matrix_partition(H, self.body)
        del H
        H11 = H11.tocsc()
        self.pp("computing LU decomposition on H11")
        if self.exact:
            self.LU1 = splu(H11)
        else:
            self.LU1 = spilu(H11, drop_tol=self.t)
        del H11
        self.pp("computing LU1 solve")
        S = H22 - H21 @ self.LU1.solve(H12.toarray())
        S = coo_matrix(S)
        self.pp("sorting S")
        self.perm_S = degree_reverse_rank_perm(S)
        S = reorder_matrix(S, self.perm_S)
        self.H12 = reorder_matrix(H12, self.perm_S, fix_row=True)
        self.H21 = reorder_matrix(H21, self.perm_S, fix_col=True)
        S = S.tocsc()
        del H12, H21, H22
        self.pp("computing LU decomposition on S")
        if self.exact:
            self.LU2 = splu(S)
        else:
            self.LU2 = spilu(S, drop_tol=self.t)
        # issue: this approximation drops accuracy way too much! why?
        """
        if not self.exact:
            H12 = drop_tolerance(self.H12, self.t)
            del self.H12
            self.H12 = H12
            H21 = drop_tolerance(self.H21, self.t)
            del self.H21
            self.H21 = H21
        """
        del S