python类sparse()的实例源码

data.py 文件源码 项目:GVIN 作者: sufengniu 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def theta_matrix(coord, adj, preload=True, train=True):
    print "creating adjacent theta matrix ..."
    if preload is True:
        if train is True:
            theta_matrix = np.load('../data/theta_matrix_train_n_100.npy')
        else:
            theta_matrix = np.load('../data/theta_matrix_test_n_100.npy')
    else:
        theta_matrix = []
        for i in tqdm(range(coord.shape[0])):
            for j in range(coord.shape[1]):
                theta_row = angle(coord[i,adj[i][j].nonzero()[1],:] - coord[i,j,:])
                col_indice = adj[i][j].nonzero()[1]
                row_indice = (np.zeros(col_indice.shape[0])).astype(int32)
                if j == 0:
                    theta_matrix_tmp = csc_matrix((theta_row, (row_indice, col_indice)), shape=(1,coord.shape[1]))
                else:
                    theta_matrix_tmp = scipy.sparse.vstack((theta_matrix_tmp, csc_matrix((theta_row, (row_indice, col_indice)), shape=(1,coord.shape[1]))))
            theta_matrix.append(theta_matrix_tmp)
        theta_matrix = np.array(theta_matrix)
    return theta_matrix
docsim.py 文件源码 项目:topical_word_embeddings 作者: thunlp 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def iter_chunks(self, chunksize=None):
        """
        Iteratively yield the index as chunks of documents, each of size <= chunksize.

        The chunk is returned in its raw form (matrix or sparse matrix slice).
        The size of the chunk may be smaller than requested; it is up to the caller
        to check the result for real length, using `chunk.shape[0]`.
        """
        self.close_shard()

        if chunksize is None:
            # if not explicitly specified, use the chunksize from the constructor
            chunksize = self.chunksize

        for shard in self.shards:
            query = shard.get_index().index
            for chunk_start in xrange(0, query.shape[0], chunksize):
                # scipy.sparse doesn't allow slicing beyond real size of the matrix
                # (unlike numpy). so, clip the end of the chunk explicitly to make
                # scipy.sparse happy
                chunk_end = min(query.shape[0], chunk_start + chunksize)
                chunk = query[chunk_start: chunk_end] # create a view
                yield chunk
docsim.py 文件源码 项目:topical_word_embeddings 作者: thunlp 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def close_shard(self):
        """
        Force the latest shard to close (be converted to a matrix and stored
        to disk). Do nothing if no new documents added since last call.

        **NOTE**: the shard is closed even if it is not full yet (its size is smaller
        than `self.shardsize`). If documents are added later via `add_documents()`,
        this incomplete shard will be loaded again and completed.
        """
        if not self.fresh_docs:
            return
        shardid = len(self.shards)
        # consider the shard sparse if its density is < 30%
        issparse = 0.3 > 1.0 * self.fresh_nnz / (len(self.fresh_docs) * self.num_features)
        if issparse:
            index = SparseMatrixSimilarity(self.fresh_docs, num_terms=self.num_features,
                                           num_docs=len(self.fresh_docs), num_nnz=self.fresh_nnz)
        else:
            index = MatrixSimilarity(self.fresh_docs, num_features=self.num_features)
        logger.info("creating %s shard #%s" % ('sparse' if issparse else 'dense', shardid))
        shard = Shard(self.shardid2filename(shardid), index)
        shard.num_best = self.num_best
        shard.num_nnz = self.fresh_nnz
        self.shards.append(shard)
        self.fresh_docs, self.fresh_nnz = [], 0
docsim.py 文件源码 项目:topical_word_embeddings 作者: thunlp 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def close_shard(self):
        """
        Force the latest shard to close (be converted to a matrix and stored
        to disk). Do nothing if no new documents added since last call.

        **NOTE**: the shard is closed even if it is not full yet (its size is smaller
        than `self.shardsize`). If documents are added later via `add_documents()`,
        this incomplete shard will be loaded again and completed.
        """
        if not self.fresh_docs:
            return
        shardid = len(self.shards)
        # consider the shard sparse if its density is < 30%
        issparse = 0.3 > 1.0 * self.fresh_nnz / (len(self.fresh_docs) * self.num_features)
        if issparse:
            index = SparseMatrixSimilarity(self.fresh_docs, num_terms=self.num_features,
                                           num_docs=len(self.fresh_docs), num_nnz=self.fresh_nnz)
        else:
            index = MatrixSimilarity(self.fresh_docs, num_features=self.num_features)
        logger.info("creating %s shard #%s" % ('sparse' if issparse else 'dense', shardid))
        shard = Shard(self.shardid2filename(shardid), index)
        shard.num_best = self.num_best
        shard.num_nnz = self.fresh_nnz
        self.shards.append(shard)
        self.fresh_docs, self.fresh_nnz = [], 0
docsim.py 文件源码 项目:topical_word_embeddings 作者: thunlp 项目源码 文件源码 阅读 41 收藏 0 点赞 0 评论 0
def iter_chunks(self, chunksize=None):
        """
        Iteratively yield the index as chunks of documents, each of size <= chunksize.

        The chunk is returned in its raw form (matrix or sparse matrix slice).
        The size of the chunk may be smaller than requested; it is up to the caller
        to check the result for real length, using `chunk.shape[0]`.
        """
        self.close_shard()

        if chunksize is None:
            # if not explicitly specified, use the chunksize from the constructor
            chunksize = self.chunksize

        for shard in self.shards:
            query = shard.get_index().index
            for chunk_start in xrange(0, query.shape[0], chunksize):
                # scipy.sparse doesn't allow slicing beyond real size of the matrix
                # (unlike numpy). so, clip the end of the chunk explicitly to make
                # scipy.sparse happy
                chunk_end = min(query.shape[0], chunk_start + chunksize)
                chunk = query[chunk_start: chunk_end] # create a view
                yield chunk
balAPinc.py 文件源码 项目:UnsupervisedHypernymy 作者: vered1986 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def get_contexts_rank(targets, cooc_mat, target_index):
    """
    A dictionary in which each key is a target word and the value is a sorted list of
    context columns in descending order
    :param targets: the words
    :return:
    """
    contexts_rank = {}

    for target in targets:
        index = target_index.get(target, -1)

        if index == -1:
            contexts_rank[target] = []

        row = cooc_mat[index, :]
        contexts_rank[target] = sort_by_value_get_col(scipy.sparse.coo_matrix(row.mat)) # tuples of (row, col, value)

    return contexts_rank
APSyn.py 文件源码 项目:UnsupervisedHypernymy 作者: vered1986 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def APSyn(x_row, y_row, N):
    """
    APSyn(x, y) = (\sum_{f\epsilon N(f_{x})\bigcap N(f_{y})))} \frac{1}{(rank(f_{x})+rank(f_{y})/2)})
    :param x_row:
    :param y_row:
    :return:
    """

    # Sort y's contexts
    y_contexts_cols = sort_by_value_get_col(scipy.sparse.coo_matrix(y_row.mat)) # tuples of (row, col, value)
    y_contexts_cols = y_contexts_cols[:N]
    y_context_rank = { c : i + 1 for i, c in enumerate(y_contexts_cols) }

    # Sort x's contexts
    x_contexts_cols = sort_by_value_get_col(scipy.sparse.coo_matrix(x_row.mat))
    x_contexts_cols = x_contexts_cols[:N]
    x_context_rank = { c : i + 1 for i, c in enumerate(x_contexts_cols) }

    # Average of 1/(rank(w1)+rank(w2)/2) for every intersected feature among the top N contexts
    intersected_context = set(y_contexts_cols).intersection(set(x_contexts_cols))
    score = sum([1.0 / ((x_context_rank[c] + y_context_rank[c]) / 2.0) for c in intersected_context])
    #score *= (1.0 / N)

    return score
temporal.py 文件源码 项目:SCaIP 作者: simonsfoundation 项目源码 文件源码 阅读 42 收藏 0 点赞 0 评论 0
def make_G_matrix(T,g):
    ''' create matrix of autoregression to enforce indicator dynamics
    Inputs: 
    T: positive integer
        number of time-bins
    g: nd.array, vector p x 1
        Discrete time constants

    Output:
    G: sparse diagonal matrix
        Matrix of autoregression
    '''    
    if type(g) is np.ndarray:    
        if len(g) == 1 and g < 0:
            g=0

#        gs=np.matrix(np.hstack((-np.flipud(g[:]).T,1)))
        gs=np.matrix(np.hstack((1,-(g[:]).T)))
        ones_=np.matrix(np.ones((T,1)))
        G = spdiags((ones_*gs).T,range(0,-len(g)-1,-1),T,T)    

        return G
    else:
        raise Exception('g must be an array')
#%%
tools.py 文件源码 项目:document_classification 作者: scotthlee 项目源码 文件源码 阅读 45 收藏 0 点赞 0 评论 0
def process(self, df, x_name, y_name=None, ngrams=2, max_features=35000, method='counts', binary=True, sparse=False):
        #choosing the particular flavor of vectorizer
        if method == 'counts':
            vectorizer = CountVectorizer(max_features=max_features, ngram_range=(1, ngrams), decode_error='replace', binary=binary)
        elif method == 'tfidf':
            vectorizer = TfidfVectorizer(max_features=max_features, ngram_range=(1, ngrams), decode_error='replace')

        #fitting the vectorizer and converting the counts to an array
        full_fit = vectorizer.fit_transform(df[x_name])
        full_counts = full_fit.toarray()
        self.vocabulary_ = vectorizer.vocabulary_

        #passing the attributes up to the class instance
        self.data = df
        if sparse:
            full_counts = csr_matrix(full_counts)
        self.X = full_counts
        if y_name != None:
            self.y = np.array(df[y_name])
        return

    #splits the data into training and test sets; either called from process()
    #or on its own when your text is already vectorized and divided into x and y
scipy_sparse.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def _sparse_series_to_coo(ss, row_levels=(0, ), column_levels=(1, ),
                          sort_labels=False):
    """ Convert a SparseSeries to a scipy.sparse.coo_matrix using index
    levels row_levels, column_levels as the row and column
    labels respectively. Returns the sparse_matrix, row and column labels.
    """

    import scipy.sparse

    if ss.index.nlevels < 2:
        raise ValueError('to_coo requires MultiIndex with nlevels > 2')
    if not ss.index.is_unique:
        raise ValueError('Duplicate index entries are not allowed in to_coo '
                         'transformation.')

    # to keep things simple, only rely on integer indexing (not labels)
    row_levels = [ss.index._get_level_number(x) for x in row_levels]
    column_levels = [ss.index._get_level_number(x) for x in column_levels]

    v, i, j, rows, columns = _to_ijv(ss, row_levels=row_levels,
                                     column_levels=column_levels,
                                     sort_labels=sort_labels)
    sparse_matrix = scipy.sparse.coo_matrix(
        (v, (i, j)), shape=(len(rows), len(columns)))
    return sparse_matrix, rows, columns
testing.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def _create_missing_idx(nrows, ncols, density, random_state=None):
    if random_state is None:
        random_state = np.random
    else:
        random_state = np.random.RandomState(random_state)

    # below is cribbed from scipy.sparse
    size = int(np.round((1 - density) * nrows * ncols))
    # generate a few more to ensure unique values
    min_rows = 5
    fac = 1.02
    extra_size = min(size + min_rows, fac * size)

    def _gen_unique_rand(rng, _extra_size):
        ind = rng.rand(int(_extra_size))
        return np.unique(np.floor(ind * nrows * ncols))[:size]

    ind = _gen_unique_rand(random_state, extra_size)
    while ind.size < size:
        extra_size *= 1.05
        ind = _gen_unique_rand(random_state, extra_size)

    j = np.floor(ind * 1. / nrows).astype(int)
    i = (ind - j * nrows).astype(int)
    return i.tolist(), j.tolist()
_sparse_tools.py 文件源码 项目:FermiLib 作者: ProjectQ-Framework 项目源码 文件源码 阅读 42 收藏 0 点赞 0 评论 0
def jw_number_restrict_operator(operator, n_electrons, n_qubits=None):
    """Restrict a Jordan-Wigner encoded operator to a given particle number

    Args:
        sparse_operator(ndarray or sparse): Numpy operator acting on
            the space of n_qubits.
        n_electrons(int): Number of particles to restrict the operator to
        n_qubits(int): Number of qubits defining the total state

    Returns:
        new_operator(ndarray or sparse): Numpy operator restricted to
            acting on states with the same particle number.
    """
    if n_qubits is None:
        n_qubits = int(numpy.log2(operator.shape[0]))

    select_indices = jw_number_indices(n_electrons, n_qubits)
    return operator[numpy.ix_(select_indices, select_indices)]
_sparse_tools.py 文件源码 项目:FermiLib 作者: ProjectQ-Framework 项目源码 文件源码 阅读 36 收藏 0 点赞 0 评论 0
def get_ground_state(sparse_operator):
    """Compute lowest eigenvalue and eigenstate.

    Returns:
        eigenvalue: The lowest eigenvalue, a float.
        eigenstate: The lowest eigenstate in scipy.sparse csc format.
    """
    if not is_hermitian(sparse_operator):
        raise ValueError('sparse_operator must be Hermitian.')

    values, vectors = scipy.sparse.linalg.eigsh(
        sparse_operator, 2, which='SA', maxiter=1e7)

    eigenstate = scipy.sparse.csc_matrix(vectors[:, 0])
    eigenvalue = values[0]
    return eigenvalue, eigenstate.getH()
docsim.py 文件源码 项目:nonce2vec 作者: minimalparts 项目源码 文件源码 阅读 37 收藏 0 点赞 0 评论 0
def iter_chunks(self, chunksize=None):
        """
        Iteratively yield the index as chunks of documents, each of size <= chunksize.

        The chunk is returned in its raw form (matrix or sparse matrix slice).
        The size of the chunk may be smaller than requested; it is up to the caller
        to check the result for real length, using `chunk.shape[0]`.
        """
        self.close_shard()

        if chunksize is None:
            # if not explicitly specified, use the chunksize from the constructor
            chunksize = self.chunksize

        for shard in self.shards:
            query = shard.get_index().index
            for chunk_start in xrange(0, query.shape[0], chunksize):
                # scipy.sparse doesn't allow slicing beyond real size of the matrix
                # (unlike numpy). so, clip the end of the chunk explicitly to make
                # scipy.sparse happy
                chunk_end = min(query.shape[0], chunk_start + chunksize)
                chunk = query[chunk_start: chunk_end]  # create a view
                yield chunk
lsi.py 文件源码 项目:lazy-semantic-indexing 作者: o19s 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def _getCscMatrix(self):#compressed sparse column matrix
        if self._cscMatrix is not None:
            return self._cscMatrix
        # data and indices are parallel arrays,
        # data storing values (ie tf*idf) and indices storing values
        num_nnz, data, indices, indptr = 0, [], [], [0]
        for termVector in self._termVectors:
            newIndices = [i for i in termVector[1].keys()]
            newValues = [v for v in termVector[1].values()]
            indices.extend(newIndices)
            data.extend(newValues)
            num_nnz += len(newValues)
            indptr.append(num_nnz)
        data = numpy.asarray(data)
        indices = numpy.asarray(indices)
        # compressed sparse column matrix
        # Rows terms, column docs
        #
        #        doc1   doc2   doc3
        # 'the'    1      1     1
        # 'cat'    1      0     2
        self._cscMatrix = scipy.sparse.csc_matrix((data, indices, indptr),
                shape=(self.numTerms, self.numDocs))
        return self._cscMatrix
engine.py 文件源码 项目:TensorGlue 作者: Evfro 项目源码 文件源码 阅读 37 收藏 0 点赞 0 评论 0
def svd_recommender(self):
        userid, itemid, contextid, values = self.fields
        test_idx = (self.test.testset[userid].values,
                    self.test.testset[itemid].values)
        if contextid:
            #TODO: refactor it! need to think about dependence on self.arrange_by and contextid
            #values are contextualized already
            test_val = self.test.testset[values].values
        else:
            test_val = self.test.testset[values].values

        v = self._items_factors
        test_shp = (self.test.testset[userid].max()+1,
                    v.shape[1])

        test_matrix = sp.sparse.coo_matrix((test_val, test_idx),
                                           shape=test_shp,
                                           dtype=np.float64).tocsr()

        svd_scores = (test_matrix.dot(v.T)).dot(v)
        return svd_scores
tests.py 文件源码 项目:pyspark 作者: v-v-vishnevskiy 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def test_serialize(self):
        from scipy.sparse import lil_matrix
        lil = lil_matrix((4, 1))
        lil[1, 0] = 1
        lil[3, 0] = 2
        sv = SparseVector(4, {1: 1, 3: 2})
        self.assertEqual(sv, _convert_to_vector(lil))
        self.assertEqual(sv, _convert_to_vector(lil.tocsc()))
        self.assertEqual(sv, _convert_to_vector(lil.tocoo()))
        self.assertEqual(sv, _convert_to_vector(lil.tocsr()))
        self.assertEqual(sv, _convert_to_vector(lil.todok()))

        def serialize(l):
            return ser.loads(ser.dumps(_convert_to_vector(l)))
        self.assertEqual(sv, serialize(lil))
        self.assertEqual(sv, serialize(lil.tocsc()))
        self.assertEqual(sv, serialize(lil.tocsr()))
        self.assertEqual(sv, serialize(lil.todok()))
.pynufft_cpu.py 文件源码 项目:pynufft 作者: jyhmiinlin 项目源码 文件源码 阅读 37 收藏 0 点赞 0 评论 0
def _matvec(self, x_vec):
        '''
        dot operation provided for scipy.sparse.linalg
        wrapper of self.forward()
        '''

        x2 = numpy.reshape(x_vec, self.st['Nd'], order='F')

        return self.forward(x2)
transform_cpu.py 文件源码 项目:pynufft 作者: jyhmiinlin 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def _matvec(self, x_vec):
        """
        (To be tested): dot operation provided for scipy.sparse.linalg
        wrapper of self.forward()
        """

        x2 = numpy.reshape(x_vec, self.Nd, order='F')

        return self.forward(x2)
sepfilter.py 文件源码 项目:pyrsss 作者: butala 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def asmatrix(self):
        """
        Return the sparse matrix representation of the separable filter.
        """
        h_matrix = NP.array([1])
        for i in range(self.ndim):
            if self.mode == 'circ':
                h_i = Convmtx([self.k[i]], self.h_list[i], mode=self.mode)
            else:
                h_i = Convmtx([self.n[i]], self.h_list[i], mode=self.mode)
            h_matrix = scipy.sparse.kron(h_matrix, h_i)
        return h_matrix


问题


面经


文章

微信
公众号

扫码关注公众号