python类sparse()的实例源码-面圈网

matrix_factorization.py 文件源码项目：probabilistic-matrix-factorization 作者: aki-nishimura 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def __init__(self, y_coo, num_factor, bias_scale, factor_scale, weight=None):

        if weight is None:
            weight = np.ones(y_coo.data.size)

        self.y_coo = y_coo
        self.y_csr = scipy.sparse.csr_matrix(y_coo)
        self.y_csc = scipy.sparse.csc_matrix(y_coo)
        self.num_factor = num_factor
        self.prior_param = {
            'col_bias_scale': bias_scale,
            'row_bias_scale': bias_scale,
            'factor_scale': np.tile(factor_scale, self.num_factor),
            'weight': weight,
            'obs_df': float('inf'),
            'param_df': float('inf'),
        }

.pynufft_hsa.py 文件源码项目：pynufft 作者: jyhmiinlin 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def _linear_phase(self, n_shift):
        """
        Private: Select the center of FOV
        """
        om = self.st['om']
        M = self.st['M']
        final_shifts = tuple(
            numpy.array(n_shift) +
            numpy.array(self.st['Nd']) / 2)

        phase = numpy.exp(
            1.0j *
            numpy.sum(
                om * numpy.tile(
                    final_shifts,
                    (M,1)),
                1))
        # add-up all the linear phasees in all axes,

        self.st['p'] = scipy.sparse.diags(phase, 0).dot(self.st['p0'])

pynufft_gpu.py 文件源码项目：pynufft 作者: jyhmiinlin 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def precompute(self):

#         CSR_W = cuda_cffi.cusparse.CSR.to_CSR(self.st['W_gpu'],diag_type=True)

#         Dia_W_cpu = scipy.sparse.dia_matrix( (self.st['M'], self.st['M']),dtype=dtype)
#         Dia_W_cpu = scipy.sparse.dia_matrix( ( self.st['W'], 0 ), shape=(self.st['M'], self.st['M']) )
#         Dia_W_cpu = scipy.sparse.diags(self.st['W'], format="csr", dtype=dtype)
#         CSR_W = cuda_cffi.cusparse.CSR.to_CSR(Dia_W_cpu)


        self.st['pHp_gpu'] = self.CSRH.gemm(self.CSR)
        self.st['pHp']=self.st['pHp_gpu'].get()
        print('untrimmed',self.st['pHp'].nnz)
        self.truncate_selfadjoint(1e-5)
        print('trimmed', self.st['pHp'].nnz)
        self.st['pHp_gpu'] = cuda_cffi.cusparse.CSR.to_CSR(self.st['pHp'])
#         self.st['pHWp_gpu'] = self.CSR.conj().gemm(CSR_W,transA=cuda_cffi.cusparse.CUSPARSE_OPERATION_TRANSPOSE)
#         self.st['pHWp_gpu'] = self.st['pHWp_gpu'].gemm(self.CSR, transA=cuda_cffi.cusparse.CUSPARSE_OPERATION_NON_TRANSPOSE)

pynufft_gpu.py 文件源码项目：pynufft 作者: jyhmiinlin 项目源码文件源码阅读 38 收藏 0 点赞 0 评论 0

def linear_phase(self, n_shift):
        '''
        Select the center of FOV
        '''
        om = self.st['om']
        M = self.st['M']
        final_shifts = tuple(
            numpy.array(n_shift) +
            numpy.array(self.st['Nd']) / 2)

        phase = numpy.exp(
            1.0j *
            numpy.sum(
                om * numpy.tile(
                    final_shifts,
                    (M,1)),
                1))
        # add-up all the linear phasees in all axes,

        self.st['p'] = scipy.sparse.diags(phase, 0).dot(self.st['p0'])
        # multiply the diagonal, linear phase before the gridding matrix

pynufft_gpu.py 文件源码项目：pynufft 作者: jyhmiinlin 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def finalization(self):
        '''
        Add sparse matrix multiplication on GPU
        Note: use "python-cuda-cffi" generated interface to access cusparse

        '''
        self.gpu_flag = 0

        self.CSR = cuda_cffi.cusparse.CSR.to_CSR(self.st['p'].astype(dtype), )
        self.CSRH = cuda_cffi.cusparse.CSR.to_CSR(self.st['p'].getH().tocsr().astype(dtype), )

        self.scikit_plan = cu_fft.Plan(self.st['Kd'], dtype, dtype)
#         self.pHp = cuda_cffi.cusparse.CSR.to_CSR(
#             self.st['pHp'].astype(dtype))

        self.gpu_flag = 1
        self.sn_gpu = pycuda.gpuarray.to_gpu(self.sn.astype(dtype))
#         tmp_array = skcuda.misc.ones((numpy.prod(self.st['Kd']),1),dtype=dtype)
#         tmp = cuda_cffi.cusolver.csrlsvqr(self.CSR, tmp_array)

transform_cpu.py 文件源码项目：pynufft 作者: jyhmiinlin 项目源码文件源码阅读 39 收藏 0 点赞 0 评论 0

def _linear_phase(self, n_shift):
        """
        Private: Select the center of FOV
        """
        om = self.st['om']
        M = self.st['M']
        final_shifts = tuple(
            numpy.array(n_shift) +
            numpy.array(
                self.Nd) /
            2)
        phase = numpy.exp(
            1.0j *
            numpy.sum(
                om *
                numpy.tile(
                    final_shifts,
                    (M,
                     1)),
                1))
        # add-up all the linear phasees in all axes,

        self.st['p'] = scipy.sparse.diags(phase, 0).dot(self.st['p0'])
        return 0  # shifted sparse matrix

transform_hsa.py 文件源码项目：pynufft 作者: jyhmiinlin 项目源码文件源码阅读 38 收藏 0 点赞 0 评论 0

def _linear_phase(self, n_shift):
        """
        Private: Select the center of FOV
        """
        om = self.st['om']
        M = self.st['M']
        final_shifts = tuple(
            numpy.array(n_shift) +
            numpy.array(self.st['Nd']) / 2)

        phase = numpy.exp(
            1.0j *
            numpy.sum(
                om * numpy.tile(
                    final_shifts,
                    (M,1)),
                1))
        # add-up all the linear phasees in all axes,

        self.st['p'] = scipy.sparse.diags(phase, 0).dot(self.st['p0'])

matrix_factorization.py 文件源码项目：probabilistic-matrix-factorization 作者: aki-nishimura 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def prepare_matrix(val, row_var, col_var):
        # Takes a vector of observed values and two categorical variables
        # and returns a sparse matrix in coo format that can be used to
        # instantiate the class. Also returned are dictionaries that maps the
        # row and column categories to indices of a matrix
        #
        # Params:
        # val, row_var, col_var: numpy arrays

        row_id = row_var.unique()
        col_id = col_var.unique()
        nrow = row_id.size
        ncol = col_id.size

        # Associate each of the unique id names to a row and column index.
        row_id_map = {row_id[index]: index for index in range(len(row_id))}
        col_id_map = {col_id[index]: index for index in range(len(col_id))}

        row_indices = np.array([row_id_map[id] for id in row_var])
        col_indices = np.array([col_id_map[id] for id in col_var])
        y_coo = scipy.sparse.coo_matrix((val, (row_indices, col_indices)), shape=(nrow, ncol))

        return y_coo, row_id_map, col_id_map

__init__.py 文件源码项目：MIT-Thesis 作者: alec-heif 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def sparse(size, *args):
        """
        Create a sparse vector, using either a dictionary, a list of
        (index, value) pairs, or two separate arrays of indices and
        values (sorted by index).

        :param size: Size of the vector.
        :param args: Non-zero entries, as a dictionary, list of tuples,
                     or two sorted lists containing indices and values.

        >>> Vectors.sparse(4, {1: 1.0, 3: 5.5})
        SparseVector(4, {1: 1.0, 3: 5.5})
        >>> Vectors.sparse(4, [(1, 1.0), (3, 5.5)])
        SparseVector(4, {1: 1.0, 3: 5.5})
        >>> Vectors.sparse(4, [1, 3], [1.0, 5.5])
        SparseVector(4, {1: 1.0, 3: 5.5})
        """
        return SparseVector(size, *args)

__init__.py 文件源码项目：MIT-Thesis 作者: alec-heif 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def _equals(v1_indices, v1_values, v2_indices, v2_values):
        """
        Check equality between sparse/dense vectors,
        v1_indices and v2_indices assume to be strictly increasing.
        """
        v1_size = len(v1_values)
        v2_size = len(v2_values)
        k1 = 0
        k2 = 0
        all_equal = True
        while all_equal:
            while k1 < v1_size and v1_values[k1] == 0:
                k1 += 1
            while k2 < v2_size and v2_values[k2] == 0:
                k2 += 1

            if k1 >= v1_size or k2 >= v2_size:
                return k1 >= v1_size and k2 >= v2_size

            all_equal = v1_indices[k1] == v2_indices[k2] and v1_values[k1] == v2_values[k2]
            k1 += 1
            k2 += 1
        return all_equal

tests.py 文件源码项目：MIT-Thesis 作者: alec-heif 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def test_ml_mllib_vector_conversion(self):
        # to ml
        # dense
        mllibDV = Vectors.dense([1, 2, 3])
        mlDV1 = newlinalg.Vectors.dense([1, 2, 3])
        mlDV2 = mllibDV.asML()
        self.assertEqual(mlDV2, mlDV1)
        # sparse
        mllibSV = Vectors.sparse(4, {1: 1.0, 3: 5.5})
        mlSV1 = newlinalg.Vectors.sparse(4, {1: 1.0, 3: 5.5})
        mlSV2 = mllibSV.asML()
        self.assertEqual(mlSV2, mlSV1)
        # from ml
        # dense
        mllibDV1 = Vectors.dense([1, 2, 3])
        mlDV = newlinalg.Vectors.dense([1, 2, 3])
        mllibDV2 = Vectors.fromML(mlDV)
        self.assertEqual(mllibDV1, mllibDV2)
        # sparse
        mllibSV1 = Vectors.sparse(4, {1: 1.0, 3: 5.5})
        mlSV = newlinalg.Vectors.sparse(4, {1: 1.0, 3: 5.5})
        mllibSV2 = Vectors.fromML(mlSV)
        self.assertEqual(mllibSV1, mllibSV2)

tests.py 文件源码项目：MIT-Thesis 作者: alec-heif 项目源码文件源码阅读 33 收藏 0 点赞 0 评论 0

def test_serialize(self):
        from scipy.sparse import lil_matrix
        lil = lil_matrix((4, 1))
        lil[1, 0] = 1
        lil[3, 0] = 2
        sv = SparseVector(4, {1: 1, 3: 2})
        self.assertEqual(sv, _convert_to_vector(lil))
        self.assertEqual(sv, _convert_to_vector(lil.tocsc()))
        self.assertEqual(sv, _convert_to_vector(lil.tocoo()))
        self.assertEqual(sv, _convert_to_vector(lil.tocsr()))
        self.assertEqual(sv, _convert_to_vector(lil.todok()))

        def serialize(l):
            return ser.loads(ser.dumps(_convert_to_vector(l)))
        self.assertEqual(sv, serialize(lil))
        self.assertEqual(sv, serialize(lil.tocsc()))
        self.assertEqual(sv, serialize(lil.tocsr()))
        self.assertEqual(sv, serialize(lil.todok()))

__init__.py 文件源码项目：MIT-Thesis 作者: alec-heif 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def sparse(size, *args):
        """
        Create a sparse vector, using either a dictionary, a list of
        (index, value) pairs, or two separate arrays of indices and
        values (sorted by index).

        :param size: Size of the vector.
        :param args: Non-zero entries, as a dictionary, list of tuples,
                     or two sorted lists containing indices and values.

        >>> Vectors.sparse(4, {1: 1.0, 3: 5.5})
        SparseVector(4, {1: 1.0, 3: 5.5})
        >>> Vectors.sparse(4, [(1, 1.0), (3, 5.5)])
        SparseVector(4, {1: 1.0, 3: 5.5})
        >>> Vectors.sparse(4, [1, 3], [1.0, 5.5])
        SparseVector(4, {1: 1.0, 3: 5.5})
        """
        return SparseVector(size, *args)

__init__.py 文件源码项目：MIT-Thesis 作者: alec-heif 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def _equals(v1_indices, v1_values, v2_indices, v2_values):
        """
        Check equality between sparse/dense vectors,
        v1_indices and v2_indices assume to be strictly increasing.
        """
        v1_size = len(v1_values)
        v2_size = len(v2_values)
        k1 = 0
        k2 = 0
        all_equal = True
        while all_equal:
            while k1 < v1_size and v1_values[k1] == 0:
                k1 += 1
            while k2 < v2_size and v2_values[k2] == 0:
                k2 += 1

            if k1 >= v1_size or k2 >= v2_size:
                return k1 >= v1_size and k2 >= v2_size

            all_equal = v1_indices[k1] == v2_indices[k2] and v1_values[k1] == v2_values[k2]
            k1 += 1
            k2 += 1
        return all_equal

coo.py 文件源码项目：cupy 作者: cupy 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def get(self, stream=None):
        """Returns a copy of the array on host memory.

        Args:
            stream (cupy.cuda.Stream): CUDA stream object. If it is given, the
                copy runs asynchronously. Otherwise, the copy is synchronous.

        Returns:
            scipy.sparse.coo_matrix: Copy of the array on host memory.

        """
        if not _scipy_available:
            raise RuntimeError('scipy is not available')

        data = self.data.get(stream)
        row = self.row.get(stream)
        col = self.col.get(stream)
        return scipy.sparse.coo_matrix(
            (data, (row, col)), shape=self.shape)

coo.py 文件源码项目：cupy 作者: cupy 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def transpose(self, axes=None, copy=False):
        """Returns a transpose matrix.

        Args:
            axes: This option is not supported.
            copy (bool): If ``True``, a returned matrix shares no data.
                Otherwise, it shared data arrays as much as possible.

        Returns:
            cupy.sparse.spmatrix: Transpose matrix.

        """
        if axes is not None:
            raise ValueError(
                'Sparse matrices do not support an \'axes\' parameter because '
                'swapping dimensions is the only logical permutation.')
        shape = self.shape[1], self.shape[0]
        return coo_matrix(
            (self.data, (self.col, self.row)), shape=shape, copy=copy)

csc.py 文件源码项目：cupy 作者: cupy 项目源码文件源码阅读 43 收藏 0 点赞 0 评论 0

def get(self, stream=None):
        """Returns a copy of the array on host memory.

        .. warning::
           You need to install SciPy to use this method.

        Args:
            stream (cupy.cuda.Stream): CUDA stream object. If it is given, the
                copy runs asynchronously. Otherwise, the copy is synchronous.

        Returns:
            scipy.sparse.csc_matrix: Copy of the array on host memory.

        """
        if not _scipy_available:
            raise RuntimeError('scipy is not available')
        data = self.data.get(stream)
        indices = self.indices.get(stream)
        indptr = self.indptr.get(stream)
        return scipy.sparse.csc_matrix(
            (data, indices, indptr), shape=self._shape)

csc.py 文件源码项目：cupy 作者: cupy 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def tocsr(self, copy=False):
        """Converts the matrix to Compressed Sparse Row format.

        Args:
            copy (bool): If ``False``, it shares data arrays as much as
                possible. Actually this option is ignored because all
                arrays in a matrix cannot be shared in csr to csc conversion.

        Returns:
            cupy.sparse.csr_matrix: Converted matrix.

        """
        return self.T.tocsc(copy=False).T

    # TODO(unno): Implement todia
    # TODO(unno): Implement todok
    # TODO(unno): Implement tolil

csc.py 文件源码项目：cupy 作者: cupy 项目源码文件源码阅读 51 收藏 0 点赞 0 评论 0

def transpose(self, axes=None, copy=False):
        """Returns a transpose matrix.

        Args:
            axes: This option is not supported.
            copy (bool): If ``True``, a returned matrix shares no data.
                Otherwise, it shared data arrays as much as possible.

        Returns:
            cupy.sparse.spmatrix: Transpose matrix.

        """
        if axes is not None:
            raise ValueError(
                'Sparse matrices do not support an \'axes\' parameter because '
                'swapping dimensions is the only logical permutation.')

        shape = self.shape[1], self.shape[0]
        return cupy.sparse.csr_matrix(
            (self.data, self.indices, self.indptr), shape=shape, copy=copy)

csr.py 文件源码项目：cupy 作者: cupy 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def get(self, stream=None):
        """Returns a copy of the array on host memory.

        Args:
            stream (cupy.cuda.Stream): CUDA stream object. If it is given, the
                copy runs asynchronously. Otherwise, the copy is synchronous.

        Returns:
            scipy.sparse.csr_matrix: Copy of the array on host memory.

        """
        if not _scipy_available:
            raise RuntimeError('scipy is not available')
        data = self.data.get(stream)
        indices = self.indices.get(stream)
        indptr = self.indptr.get(stream)
        return scipy.sparse.csr_matrix(
            (data, indices, indptr), shape=self._shape)

csr.py 文件源码项目：cupy 作者: cupy 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def tocoo(self, copy=False):
        """Converts the matrix to COOdinate format.

        Args:
            copy (bool): If ``False``, it shares data arrays as much as
                possible.

        Returns:
            cupy.sparse.coo_matrix: Converted matrix.

        """
        if copy:
            data = self.data.copy()
            indices = self.indices.copy()
        else:
            data = self.data
            indices = self.indices

        return cusparse.csr2coo(self, data, indices)

gputools.py 文件源码项目：slitSpectrographBlind 作者: aasensio 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def comp_ola_sdeconv(gx_gpu, gy_gpu, xx_gpu, xy_gpu, Ftpy_gpu, f_gpu, L_gpu, alpha, beta, gamma=0):
    """
    Computes the division in Fourier space needed for sparse deconvolution
    """

    sfft = xx_gpu.shape
    block_size = (16,16,1)   
    grid_size = (int(np.ceil(np.float32(sfft[0]*sfft[1])/block_size[0])),
                 int(np.ceil(np.float32(sfft[2])/block_size[1])))

    mod = cu.module_from_buffer(cubin)
    comp_ola_sdeconv_Kernel = mod.get_function("comp_ola_sdeconv_Kernel")

    z_gpu = cua.zeros(sfft, np.complex64)

    comp_ola_sdeconv_Kernel(z_gpu.gpudata,
                            np.int32(sfft[0]), np.int32(sfft[1]), np.int32(sfft[2]),
                            gx_gpu.gpudata, gy_gpu.gpudata,
                            xx_gpu.gpudata, xy_gpu.gpudata, 
                            Ftpy_gpu.gpudata, f_gpu.gpudata, L_gpu.gpudata,
                            np.float32(alpha), np.float32(beta),
                            np.float32(gamma),
                            block=block_size, grid=grid_size)

    return z_gpu

docsim.py 文件源码项目：paragraph2vec 作者: thunlp 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def iter_chunks(self, chunksize=None):
        """
        Iteratively yield the index as chunks of documents, each of size <= chunksize.

        The chunk is returned in its raw form (matrix or sparse matrix slice).
        The size of the chunk may be smaller than requested; it is up to the caller
        to check the result for real length, using `chunk.shape[0]`.
        """
        self.close_shard()

        if chunksize is None:
            # if not explicitly specified, use the chunksize from the constructor
            chunksize = self.chunksize

        for shard in self.shards:
            query = shard.get_index().index
            for chunk_start in xrange(0, query.shape[0], chunksize):
                # scipy.sparse doesn't allow slicing beyond real size of the matrix
                # (unlike numpy). so, clip the end of the chunk explicitly to make
                # scipy.sparse happy
                chunk_end = min(query.shape[0], chunk_start + chunksize)
                chunk = query[chunk_start: chunk_end] # create a view
                yield chunk

dort.py 文件源码项目：smrt 作者: smrt-model 项目源码文件源码阅读 31 收藏 0 点赞 0 评论 0

def extend_2pol_npol(x, npol):

    if npol == 2:
        return x

    if scipy.sparse.isspmatrix_dia(x):
        y = scipy.sparse.diags(extend_2pol_npol(x.diagonal(), npol))
    elif len(x.shape) == 1:
        y = np.zeros(len(x)//2*npol)
        y[0::npol] = x[0::2]
        y[1::npol] = x[1::2]
    elif len(x.shape) == 2:
        y = np.zeros((x.shape[0]//2*npol, x.shape[1]//2*npol))
        y[0::npol, 0::npol] = x[0::2, 0::2]
        y[0::npol, 1::npol] = x[0::2, 1::2]
        y[1::npol, 0::npol] = x[1::2, 0::2]
        y[1::npol, 1::npol] = x[1::2, 1::2]
    else:
        raise SMRTError("should never be here")

    return y

sparse.py 文件源码项目：polara 作者: Evfro 项目源码文件源码阅读 27 收藏 0 点赞 0 评论 0

def csc_matvec(mat_csc, vec, dense_output=True, dtype=None):
    v_nnz = vec.indices
    v_val = vec.data

    m_val = mat_csc.data
    m_ind = mat_csc.indices
    m_ptr = mat_csc.indptr

    res_dtype = dtype or np.result_type(mat_csc.dtype, vec.dtype)
    if dense_output:
        res = np.zeros((mat_csc.shape[0],), dtype=res_dtype)
        matvec2dense(m_ptr, m_ind, m_val, v_nnz, v_val, res)
    else:
        sizes = m_ptr.take(v_nnz+1) - m_ptr.take(v_nnz)
        sizes = np.concatenate(([0], np.cumsum(sizes)))
        n = sizes[-1]
        data = np.empty((n,), dtype=res_dtype)
        indices = np.empty((n,), dtype=np.intp)
        indptr = np.array([0, n], dtype=np.intp)
        matvec2sparse(m_ptr, m_ind, m_val, v_nnz, v_val, sizes, indices, data)
        res = sp.sparse.csr_matrix((data, indices, indptr), shape=(1, mat_csc.shape[0]), dtype=res_dtype)
        res.sum_duplicates() # expensive operation
    return res

models.py 文件源码项目：polara 作者: Evfro 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def _sparse_dot(self, tst_mat, i2i_mat):
    # scipy always returns sparse result, even if dot product is actually dense
    # this function offers solution to this problem
    # it also takes care on sparse result w.r.t. to further processing
        if self.dense_output: # calculate dense result directly
        # TODO implement matmat multiplication instead of iteration with matvec
            res_type = np.result_type(i2i_mat.dtype, tst_mat.dtype)
            scores = np.empty((tst_mat.shape[0], i2i_mat.shape[1]), dtype=res_type)
            for i in xrange(tst_mat.shape[0]):
                v = tst_mat.getrow(i)
                scores[i, :] = csc_matvec(i2i_mat, v, dense_output=True, dtype=res_type)
        else:
            scores = tst_mat.dot(i2i_mat.T)
            # NOTE even though not neccessary for symmetric i2i matrix,
            # transpose helps to avoid expensive conversion to CSR (performed by scipy)
            if scores.nnz > NNZ_MAX:
                # too many nnz lead to undesired memory overhead in downvote_seen_items
                scores = scores.toarray(order='C')
        return scores

select_percentile_classification.py 文件源码项目：AutoML-Challenge 作者: postech-mlg-exbrain 项目源码文件源码阅读 25 收藏 0 点赞 0 评论 0

def fit(self, X, y):
        import scipy.sparse
        import sklearn.feature_selection

        self.preprocessor = sklearn.feature_selection.SelectPercentile(
            score_func=self.score_func,
                percentile=self.percentile)

        # Because the pipeline guarantees that each feature is positive,
        # clip all values below zero to zero
        if self.score_func == sklearn.feature_selection.chi2:
            if scipy.sparse.issparse(X):
                X.data[X.data<0] = 0.0
            else:
                X[X<0] = 0.0

        self.preprocessor.fit(X, y)
        return self

select_percentile_classification.py 文件源码项目：AutoML-Challenge 作者: postech-mlg-exbrain 项目源码文件源码阅读 30 收藏 0 点赞 0 评论 0

def transform(self, X):
        import scipy.sparse
        import sklearn.feature_selection

        # Because the pipeline guarantees that each feature is positive,
        # clip all values below zero to zero
        if self.score_func == sklearn.feature_selection.chi2:
            if scipy.sparse.issparse(X):
                X.data[X.data < 0] = 0.0
            else:
                X[X < 0] = 0.0

        if self.preprocessor is None:
            raise NotImplementedError()
        Xt = self.preprocessor.transform(X)
        if Xt.shape[1] == 0:
            raise ValueError(
                "%s removed all features." % self.__class__.__name__)
        return Xt

select_percentile_classification.py 文件源码项目：AutoML-Challenge 作者: postech-mlg-exbrain 项目源码文件源码阅读 28 收藏 0 点赞 0 评论 0

def get_hyperparameter_search_space(dataset_properties=None):
        percentile = UniformFloatHyperparameter(
            name="percentile", lower=1, upper=99, default=50)

        score_func = CategoricalHyperparameter(
            name="score_func", choices=["chi2", "f_classif"], default="chi2")
        if dataset_properties is not None:
            # Chi2 can handle sparse data, so we respect this
            if 'is_sparse' in dataset_properties and dataset_properties['is_sparse']:
                score_func = Constant(
                    name="score_func", value="chi2")

        cs = ConfigurationSpace()
        cs.add_hyperparameter(percentile)
        cs.add_hyperparameter(score_func)

        return cs

select_rates.py 文件源码项目：AutoML-Challenge 作者: postech-mlg-exbrain 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def fit(self, X, y):
        import scipy.sparse
        import sklearn.feature_selection

        self.preprocessor = sklearn.feature_selection.GenericUnivariateSelect(
            score_func=self.score_func, param=self.alpha, mode=self.mode)

        # Because the pipeline guarantees that each feature is positive,
        # clip all values below zero to zero
        if self.score_func == sklearn.feature_selection.chi2:
            if scipy.sparse.issparse(X):
                X.data[X.data < 0] = 0.0
            else:
                X[X < 0] = 0.0

        self.preprocessor.fit(X, y)
        return self