def __init__(self, y_coo, num_factor, bias_scale, factor_scale, weight=None):
if weight is None:
weight = np.ones(y_coo.data.size)
self.y_coo = y_coo
self.y_csr = scipy.sparse.csr_matrix(y_coo)
self.y_csc = scipy.sparse.csc_matrix(y_coo)
self.num_factor = num_factor
self.prior_param = {
'col_bias_scale': bias_scale,
'row_bias_scale': bias_scale,
'factor_scale': np.tile(factor_scale, self.num_factor),
'weight': weight,
'obs_df': float('inf'),
'param_df': float('inf'),
}
python类sparse()的实例源码
matrix_factorization.py 文件源码
项目:probabilistic-matrix-factorization
作者: aki-nishimura
项目源码
文件源码
阅读 27
收藏 0
点赞 0
评论 0
def _linear_phase(self, n_shift):
"""
Private: Select the center of FOV
"""
om = self.st['om']
M = self.st['M']
final_shifts = tuple(
numpy.array(n_shift) +
numpy.array(self.st['Nd']) / 2)
phase = numpy.exp(
1.0j *
numpy.sum(
om * numpy.tile(
final_shifts,
(M,1)),
1))
# add-up all the linear phasees in all axes,
self.st['p'] = scipy.sparse.diags(phase, 0).dot(self.st['p0'])
def precompute(self):
# CSR_W = cuda_cffi.cusparse.CSR.to_CSR(self.st['W_gpu'],diag_type=True)
# Dia_W_cpu = scipy.sparse.dia_matrix( (self.st['M'], self.st['M']),dtype=dtype)
# Dia_W_cpu = scipy.sparse.dia_matrix( ( self.st['W'], 0 ), shape=(self.st['M'], self.st['M']) )
# Dia_W_cpu = scipy.sparse.diags(self.st['W'], format="csr", dtype=dtype)
# CSR_W = cuda_cffi.cusparse.CSR.to_CSR(Dia_W_cpu)
self.st['pHp_gpu'] = self.CSRH.gemm(self.CSR)
self.st['pHp']=self.st['pHp_gpu'].get()
print('untrimmed',self.st['pHp'].nnz)
self.truncate_selfadjoint(1e-5)
print('trimmed', self.st['pHp'].nnz)
self.st['pHp_gpu'] = cuda_cffi.cusparse.CSR.to_CSR(self.st['pHp'])
# self.st['pHWp_gpu'] = self.CSR.conj().gemm(CSR_W,transA=cuda_cffi.cusparse.CUSPARSE_OPERATION_TRANSPOSE)
# self.st['pHWp_gpu'] = self.st['pHWp_gpu'].gemm(self.CSR, transA=cuda_cffi.cusparse.CUSPARSE_OPERATION_NON_TRANSPOSE)
def linear_phase(self, n_shift):
'''
Select the center of FOV
'''
om = self.st['om']
M = self.st['M']
final_shifts = tuple(
numpy.array(n_shift) +
numpy.array(self.st['Nd']) / 2)
phase = numpy.exp(
1.0j *
numpy.sum(
om * numpy.tile(
final_shifts,
(M,1)),
1))
# add-up all the linear phasees in all axes,
self.st['p'] = scipy.sparse.diags(phase, 0).dot(self.st['p0'])
# multiply the diagonal, linear phase before the gridding matrix
def finalization(self):
'''
Add sparse matrix multiplication on GPU
Note: use "python-cuda-cffi" generated interface to access cusparse
'''
self.gpu_flag = 0
self.CSR = cuda_cffi.cusparse.CSR.to_CSR(self.st['p'].astype(dtype), )
self.CSRH = cuda_cffi.cusparse.CSR.to_CSR(self.st['p'].getH().tocsr().astype(dtype), )
self.scikit_plan = cu_fft.Plan(self.st['Kd'], dtype, dtype)
# self.pHp = cuda_cffi.cusparse.CSR.to_CSR(
# self.st['pHp'].astype(dtype))
self.gpu_flag = 1
self.sn_gpu = pycuda.gpuarray.to_gpu(self.sn.astype(dtype))
# tmp_array = skcuda.misc.ones((numpy.prod(self.st['Kd']),1),dtype=dtype)
# tmp = cuda_cffi.cusolver.csrlsvqr(self.CSR, tmp_array)
def _linear_phase(self, n_shift):
"""
Private: Select the center of FOV
"""
om = self.st['om']
M = self.st['M']
final_shifts = tuple(
numpy.array(n_shift) +
numpy.array(
self.Nd) /
2)
phase = numpy.exp(
1.0j *
numpy.sum(
om *
numpy.tile(
final_shifts,
(M,
1)),
1))
# add-up all the linear phasees in all axes,
self.st['p'] = scipy.sparse.diags(phase, 0).dot(self.st['p0'])
return 0 # shifted sparse matrix
def _linear_phase(self, n_shift):
"""
Private: Select the center of FOV
"""
om = self.st['om']
M = self.st['M']
final_shifts = tuple(
numpy.array(n_shift) +
numpy.array(self.st['Nd']) / 2)
phase = numpy.exp(
1.0j *
numpy.sum(
om * numpy.tile(
final_shifts,
(M,1)),
1))
# add-up all the linear phasees in all axes,
self.st['p'] = scipy.sparse.diags(phase, 0).dot(self.st['p0'])
matrix_factorization.py 文件源码
项目:probabilistic-matrix-factorization
作者: aki-nishimura
项目源码
文件源码
阅读 34
收藏 0
点赞 0
评论 0
def prepare_matrix(val, row_var, col_var):
# Takes a vector of observed values and two categorical variables
# and returns a sparse matrix in coo format that can be used to
# instantiate the class. Also returned are dictionaries that maps the
# row and column categories to indices of a matrix
#
# Params:
# val, row_var, col_var: numpy arrays
row_id = row_var.unique()
col_id = col_var.unique()
nrow = row_id.size
ncol = col_id.size
# Associate each of the unique id names to a row and column index.
row_id_map = {row_id[index]: index for index in range(len(row_id))}
col_id_map = {col_id[index]: index for index in range(len(col_id))}
row_indices = np.array([row_id_map[id] for id in row_var])
col_indices = np.array([col_id_map[id] for id in col_var])
y_coo = scipy.sparse.coo_matrix((val, (row_indices, col_indices)), shape=(nrow, ncol))
return y_coo, row_id_map, col_id_map
def sparse(size, *args):
"""
Create a sparse vector, using either a dictionary, a list of
(index, value) pairs, or two separate arrays of indices and
values (sorted by index).
:param size: Size of the vector.
:param args: Non-zero entries, as a dictionary, list of tuples,
or two sorted lists containing indices and values.
>>> Vectors.sparse(4, {1: 1.0, 3: 5.5})
SparseVector(4, {1: 1.0, 3: 5.5})
>>> Vectors.sparse(4, [(1, 1.0), (3, 5.5)])
SparseVector(4, {1: 1.0, 3: 5.5})
>>> Vectors.sparse(4, [1, 3], [1.0, 5.5])
SparseVector(4, {1: 1.0, 3: 5.5})
"""
return SparseVector(size, *args)
def _equals(v1_indices, v1_values, v2_indices, v2_values):
"""
Check equality between sparse/dense vectors,
v1_indices and v2_indices assume to be strictly increasing.
"""
v1_size = len(v1_values)
v2_size = len(v2_values)
k1 = 0
k2 = 0
all_equal = True
while all_equal:
while k1 < v1_size and v1_values[k1] == 0:
k1 += 1
while k2 < v2_size and v2_values[k2] == 0:
k2 += 1
if k1 >= v1_size or k2 >= v2_size:
return k1 >= v1_size and k2 >= v2_size
all_equal = v1_indices[k1] == v2_indices[k2] and v1_values[k1] == v2_values[k2]
k1 += 1
k2 += 1
return all_equal
def test_ml_mllib_vector_conversion(self):
# to ml
# dense
mllibDV = Vectors.dense([1, 2, 3])
mlDV1 = newlinalg.Vectors.dense([1, 2, 3])
mlDV2 = mllibDV.asML()
self.assertEqual(mlDV2, mlDV1)
# sparse
mllibSV = Vectors.sparse(4, {1: 1.0, 3: 5.5})
mlSV1 = newlinalg.Vectors.sparse(4, {1: 1.0, 3: 5.5})
mlSV2 = mllibSV.asML()
self.assertEqual(mlSV2, mlSV1)
# from ml
# dense
mllibDV1 = Vectors.dense([1, 2, 3])
mlDV = newlinalg.Vectors.dense([1, 2, 3])
mllibDV2 = Vectors.fromML(mlDV)
self.assertEqual(mllibDV1, mllibDV2)
# sparse
mllibSV1 = Vectors.sparse(4, {1: 1.0, 3: 5.5})
mlSV = newlinalg.Vectors.sparse(4, {1: 1.0, 3: 5.5})
mllibSV2 = Vectors.fromML(mlSV)
self.assertEqual(mllibSV1, mllibSV2)
def test_serialize(self):
from scipy.sparse import lil_matrix
lil = lil_matrix((4, 1))
lil[1, 0] = 1
lil[3, 0] = 2
sv = SparseVector(4, {1: 1, 3: 2})
self.assertEqual(sv, _convert_to_vector(lil))
self.assertEqual(sv, _convert_to_vector(lil.tocsc()))
self.assertEqual(sv, _convert_to_vector(lil.tocoo()))
self.assertEqual(sv, _convert_to_vector(lil.tocsr()))
self.assertEqual(sv, _convert_to_vector(lil.todok()))
def serialize(l):
return ser.loads(ser.dumps(_convert_to_vector(l)))
self.assertEqual(sv, serialize(lil))
self.assertEqual(sv, serialize(lil.tocsc()))
self.assertEqual(sv, serialize(lil.tocsr()))
self.assertEqual(sv, serialize(lil.todok()))
def sparse(size, *args):
"""
Create a sparse vector, using either a dictionary, a list of
(index, value) pairs, or two separate arrays of indices and
values (sorted by index).
:param size: Size of the vector.
:param args: Non-zero entries, as a dictionary, list of tuples,
or two sorted lists containing indices and values.
>>> Vectors.sparse(4, {1: 1.0, 3: 5.5})
SparseVector(4, {1: 1.0, 3: 5.5})
>>> Vectors.sparse(4, [(1, 1.0), (3, 5.5)])
SparseVector(4, {1: 1.0, 3: 5.5})
>>> Vectors.sparse(4, [1, 3], [1.0, 5.5])
SparseVector(4, {1: 1.0, 3: 5.5})
"""
return SparseVector(size, *args)
def _equals(v1_indices, v1_values, v2_indices, v2_values):
"""
Check equality between sparse/dense vectors,
v1_indices and v2_indices assume to be strictly increasing.
"""
v1_size = len(v1_values)
v2_size = len(v2_values)
k1 = 0
k2 = 0
all_equal = True
while all_equal:
while k1 < v1_size and v1_values[k1] == 0:
k1 += 1
while k2 < v2_size and v2_values[k2] == 0:
k2 += 1
if k1 >= v1_size or k2 >= v2_size:
return k1 >= v1_size and k2 >= v2_size
all_equal = v1_indices[k1] == v2_indices[k2] and v1_values[k1] == v2_values[k2]
k1 += 1
k2 += 1
return all_equal
def get(self, stream=None):
"""Returns a copy of the array on host memory.
Args:
stream (cupy.cuda.Stream): CUDA stream object. If it is given, the
copy runs asynchronously. Otherwise, the copy is synchronous.
Returns:
scipy.sparse.coo_matrix: Copy of the array on host memory.
"""
if not _scipy_available:
raise RuntimeError('scipy is not available')
data = self.data.get(stream)
row = self.row.get(stream)
col = self.col.get(stream)
return scipy.sparse.coo_matrix(
(data, (row, col)), shape=self.shape)
def transpose(self, axes=None, copy=False):
"""Returns a transpose matrix.
Args:
axes: This option is not supported.
copy (bool): If ``True``, a returned matrix shares no data.
Otherwise, it shared data arrays as much as possible.
Returns:
cupy.sparse.spmatrix: Transpose matrix.
"""
if axes is not None:
raise ValueError(
'Sparse matrices do not support an \'axes\' parameter because '
'swapping dimensions is the only logical permutation.')
shape = self.shape[1], self.shape[0]
return coo_matrix(
(self.data, (self.col, self.row)), shape=shape, copy=copy)
def get(self, stream=None):
"""Returns a copy of the array on host memory.
.. warning::
You need to install SciPy to use this method.
Args:
stream (cupy.cuda.Stream): CUDA stream object. If it is given, the
copy runs asynchronously. Otherwise, the copy is synchronous.
Returns:
scipy.sparse.csc_matrix: Copy of the array on host memory.
"""
if not _scipy_available:
raise RuntimeError('scipy is not available')
data = self.data.get(stream)
indices = self.indices.get(stream)
indptr = self.indptr.get(stream)
return scipy.sparse.csc_matrix(
(data, indices, indptr), shape=self._shape)
def tocsr(self, copy=False):
"""Converts the matrix to Compressed Sparse Row format.
Args:
copy (bool): If ``False``, it shares data arrays as much as
possible. Actually this option is ignored because all
arrays in a matrix cannot be shared in csr to csc conversion.
Returns:
cupy.sparse.csr_matrix: Converted matrix.
"""
return self.T.tocsc(copy=False).T
# TODO(unno): Implement todia
# TODO(unno): Implement todok
# TODO(unno): Implement tolil
def transpose(self, axes=None, copy=False):
"""Returns a transpose matrix.
Args:
axes: This option is not supported.
copy (bool): If ``True``, a returned matrix shares no data.
Otherwise, it shared data arrays as much as possible.
Returns:
cupy.sparse.spmatrix: Transpose matrix.
"""
if axes is not None:
raise ValueError(
'Sparse matrices do not support an \'axes\' parameter because '
'swapping dimensions is the only logical permutation.')
shape = self.shape[1], self.shape[0]
return cupy.sparse.csr_matrix(
(self.data, self.indices, self.indptr), shape=shape, copy=copy)
def get(self, stream=None):
"""Returns a copy of the array on host memory.
Args:
stream (cupy.cuda.Stream): CUDA stream object. If it is given, the
copy runs asynchronously. Otherwise, the copy is synchronous.
Returns:
scipy.sparse.csr_matrix: Copy of the array on host memory.
"""
if not _scipy_available:
raise RuntimeError('scipy is not available')
data = self.data.get(stream)
indices = self.indices.get(stream)
indptr = self.indptr.get(stream)
return scipy.sparse.csr_matrix(
(data, indices, indptr), shape=self._shape)
def tocoo(self, copy=False):
"""Converts the matrix to COOdinate format.
Args:
copy (bool): If ``False``, it shares data arrays as much as
possible.
Returns:
cupy.sparse.coo_matrix: Converted matrix.
"""
if copy:
data = self.data.copy()
indices = self.indices.copy()
else:
data = self.data
indices = self.indices
return cusparse.csr2coo(self, data, indices)
def comp_ola_sdeconv(gx_gpu, gy_gpu, xx_gpu, xy_gpu, Ftpy_gpu, f_gpu, L_gpu, alpha, beta, gamma=0):
"""
Computes the division in Fourier space needed for sparse deconvolution
"""
sfft = xx_gpu.shape
block_size = (16,16,1)
grid_size = (int(np.ceil(np.float32(sfft[0]*sfft[1])/block_size[0])),
int(np.ceil(np.float32(sfft[2])/block_size[1])))
mod = cu.module_from_buffer(cubin)
comp_ola_sdeconv_Kernel = mod.get_function("comp_ola_sdeconv_Kernel")
z_gpu = cua.zeros(sfft, np.complex64)
comp_ola_sdeconv_Kernel(z_gpu.gpudata,
np.int32(sfft[0]), np.int32(sfft[1]), np.int32(sfft[2]),
gx_gpu.gpudata, gy_gpu.gpudata,
xx_gpu.gpudata, xy_gpu.gpudata,
Ftpy_gpu.gpudata, f_gpu.gpudata, L_gpu.gpudata,
np.float32(alpha), np.float32(beta),
np.float32(gamma),
block=block_size, grid=grid_size)
return z_gpu
def iter_chunks(self, chunksize=None):
"""
Iteratively yield the index as chunks of documents, each of size <= chunksize.
The chunk is returned in its raw form (matrix or sparse matrix slice).
The size of the chunk may be smaller than requested; it is up to the caller
to check the result for real length, using `chunk.shape[0]`.
"""
self.close_shard()
if chunksize is None:
# if not explicitly specified, use the chunksize from the constructor
chunksize = self.chunksize
for shard in self.shards:
query = shard.get_index().index
for chunk_start in xrange(0, query.shape[0], chunksize):
# scipy.sparse doesn't allow slicing beyond real size of the matrix
# (unlike numpy). so, clip the end of the chunk explicitly to make
# scipy.sparse happy
chunk_end = min(query.shape[0], chunk_start + chunksize)
chunk = query[chunk_start: chunk_end] # create a view
yield chunk
def extend_2pol_npol(x, npol):
if npol == 2:
return x
if scipy.sparse.isspmatrix_dia(x):
y = scipy.sparse.diags(extend_2pol_npol(x.diagonal(), npol))
elif len(x.shape) == 1:
y = np.zeros(len(x)//2*npol)
y[0::npol] = x[0::2]
y[1::npol] = x[1::2]
elif len(x.shape) == 2:
y = np.zeros((x.shape[0]//2*npol, x.shape[1]//2*npol))
y[0::npol, 0::npol] = x[0::2, 0::2]
y[0::npol, 1::npol] = x[0::2, 1::2]
y[1::npol, 0::npol] = x[1::2, 0::2]
y[1::npol, 1::npol] = x[1::2, 1::2]
else:
raise SMRTError("should never be here")
return y
def csc_matvec(mat_csc, vec, dense_output=True, dtype=None):
v_nnz = vec.indices
v_val = vec.data
m_val = mat_csc.data
m_ind = mat_csc.indices
m_ptr = mat_csc.indptr
res_dtype = dtype or np.result_type(mat_csc.dtype, vec.dtype)
if dense_output:
res = np.zeros((mat_csc.shape[0],), dtype=res_dtype)
matvec2dense(m_ptr, m_ind, m_val, v_nnz, v_val, res)
else:
sizes = m_ptr.take(v_nnz+1) - m_ptr.take(v_nnz)
sizes = np.concatenate(([0], np.cumsum(sizes)))
n = sizes[-1]
data = np.empty((n,), dtype=res_dtype)
indices = np.empty((n,), dtype=np.intp)
indptr = np.array([0, n], dtype=np.intp)
matvec2sparse(m_ptr, m_ind, m_val, v_nnz, v_val, sizes, indices, data)
res = sp.sparse.csr_matrix((data, indices, indptr), shape=(1, mat_csc.shape[0]), dtype=res_dtype)
res.sum_duplicates() # expensive operation
return res
def _sparse_dot(self, tst_mat, i2i_mat):
# scipy always returns sparse result, even if dot product is actually dense
# this function offers solution to this problem
# it also takes care on sparse result w.r.t. to further processing
if self.dense_output: # calculate dense result directly
# TODO implement matmat multiplication instead of iteration with matvec
res_type = np.result_type(i2i_mat.dtype, tst_mat.dtype)
scores = np.empty((tst_mat.shape[0], i2i_mat.shape[1]), dtype=res_type)
for i in xrange(tst_mat.shape[0]):
v = tst_mat.getrow(i)
scores[i, :] = csc_matvec(i2i_mat, v, dense_output=True, dtype=res_type)
else:
scores = tst_mat.dot(i2i_mat.T)
# NOTE even though not neccessary for symmetric i2i matrix,
# transpose helps to avoid expensive conversion to CSR (performed by scipy)
if scores.nnz > NNZ_MAX:
# too many nnz lead to undesired memory overhead in downvote_seen_items
scores = scores.toarray(order='C')
return scores
select_percentile_classification.py 文件源码
项目:AutoML-Challenge
作者: postech-mlg-exbrain
项目源码
文件源码
阅读 22
收藏 0
点赞 0
评论 0
def fit(self, X, y):
import scipy.sparse
import sklearn.feature_selection
self.preprocessor = sklearn.feature_selection.SelectPercentile(
score_func=self.score_func,
percentile=self.percentile)
# Because the pipeline guarantees that each feature is positive,
# clip all values below zero to zero
if self.score_func == sklearn.feature_selection.chi2:
if scipy.sparse.issparse(X):
X.data[X.data<0] = 0.0
else:
X[X<0] = 0.0
self.preprocessor.fit(X, y)
return self
select_percentile_classification.py 文件源码
项目:AutoML-Challenge
作者: postech-mlg-exbrain
项目源码
文件源码
阅读 27
收藏 0
点赞 0
评论 0
def transform(self, X):
import scipy.sparse
import sklearn.feature_selection
# Because the pipeline guarantees that each feature is positive,
# clip all values below zero to zero
if self.score_func == sklearn.feature_selection.chi2:
if scipy.sparse.issparse(X):
X.data[X.data < 0] = 0.0
else:
X[X < 0] = 0.0
if self.preprocessor is None:
raise NotImplementedError()
Xt = self.preprocessor.transform(X)
if Xt.shape[1] == 0:
raise ValueError(
"%s removed all features." % self.__class__.__name__)
return Xt
select_percentile_classification.py 文件源码
项目:AutoML-Challenge
作者: postech-mlg-exbrain
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def get_hyperparameter_search_space(dataset_properties=None):
percentile = UniformFloatHyperparameter(
name="percentile", lower=1, upper=99, default=50)
score_func = CategoricalHyperparameter(
name="score_func", choices=["chi2", "f_classif"], default="chi2")
if dataset_properties is not None:
# Chi2 can handle sparse data, so we respect this
if 'is_sparse' in dataset_properties and dataset_properties['is_sparse']:
score_func = Constant(
name="score_func", value="chi2")
cs = ConfigurationSpace()
cs.add_hyperparameter(percentile)
cs.add_hyperparameter(score_func)
return cs
def fit(self, X, y):
import scipy.sparse
import sklearn.feature_selection
self.preprocessor = sklearn.feature_selection.GenericUnivariateSelect(
score_func=self.score_func, param=self.alpha, mode=self.mode)
# Because the pipeline guarantees that each feature is positive,
# clip all values below zero to zero
if self.score_func == sklearn.feature_selection.chi2:
if scipy.sparse.issparse(X):
X.data[X.data < 0] = 0.0
else:
X[X < 0] = 0.0
self.preprocessor.fit(X, y)
return self