def csc_matvec(mat_csc, vec, dense_output=True, dtype=None):
v_nnz = vec.indices
v_val = vec.data
m_val = mat_csc.data
m_ind = mat_csc.indices
m_ptr = mat_csc.indptr
res_dtype = dtype or np.result_type(mat_csc.dtype, vec.dtype)
if dense_output:
res = np.zeros((mat_csc.shape[0],), dtype=res_dtype)
matvec2dense(m_ptr, m_ind, m_val, v_nnz, v_val, res)
else:
sizes = m_ptr.take(v_nnz+1) - m_ptr.take(v_nnz)
sizes = np.concatenate(([0], np.cumsum(sizes)))
n = sizes[-1]
data = np.empty((n,), dtype=res_dtype)
indices = np.empty((n,), dtype=np.intp)
indptr = np.array([0, n], dtype=np.intp)
matvec2sparse(m_ptr, m_ind, m_val, v_nnz, v_val, sizes, indices, data)
res = sp.sparse.csr_matrix((data, indices, indptr), shape=(1, mat_csc.shape[0]), dtype=res_dtype)
res.sum_duplicates() # expensive operation
return res
python类result_type()的实例源码
def _sparse_dot(self, tst_mat, i2i_mat):
# scipy always returns sparse result, even if dot product is actually dense
# this function offers solution to this problem
# it also takes care on sparse result w.r.t. to further processing
if self.dense_output: # calculate dense result directly
# TODO implement matmat multiplication instead of iteration with matvec
res_type = np.result_type(i2i_mat.dtype, tst_mat.dtype)
scores = np.empty((tst_mat.shape[0], i2i_mat.shape[1]), dtype=res_type)
for i in xrange(tst_mat.shape[0]):
v = tst_mat.getrow(i)
scores[i, :] = csc_matvec(i2i_mat, v, dense_output=True, dtype=res_type)
else:
scores = tst_mat.dot(i2i_mat.T)
# NOTE even though not neccessary for symmetric i2i matrix,
# transpose helps to avoid expensive conversion to CSR (performed by scipy)
if scores.nnz > NNZ_MAX:
# too many nnz lead to undesired memory overhead in downvote_seen_items
scores = scores.toarray(order='C')
return scores
def __init__(self, x, g, reservoir, transient=0, sideTrack=False,
verbose=False, **kwargs):
x = util.segmat(x)
g = util.segmat(g)
self.dtype = np.result_type(x.dtype, g.dtype)
nIn = x.shape[2]
nOut = g.shape[2]
Regression.__init__(self, nIn, nOut)
self.reservoir = reservoir
self.transient = transient
self.sideTrack = sideTrack
self.verbose = verbose
self.train(x, g, **kwargs)
def __init__(self, x, g,
elastic=1.0, penalty=0.0,
weightInitFunc=pinit.lecun,
optimFunc=optim.scg, **kwargs):
x = np.asarray(x)
g = np.asarray(g)
self.dtype = np.result_type(x.dtype, g.dtype)
if g.ndim > 1:
self.flattenOut = False
else:
self.flattenOut = True
self.elastic = elastic
self.penalty = penalty
Regression.__init__(self, util.colmat(x).shape[1],
util.colmat(g).shape[1])
optim.Optable.__init__(self)
self.weights = weightInitFunc((self.nIn+1, self.nOut)).astype(self.dtype, copy=False)
if optimFunc is not None:
self.train(x, g, optimFunc, **kwargs)
align.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def _align(terms):
"""Align a set of terms"""
try:
# flatten the parse tree (a nested list, really)
terms = list(com.flatten(terms))
except TypeError:
# can't iterate so it must just be a constant or single variable
if isinstance(terms.value, pd.core.generic.NDFrame):
typ = type(terms.value)
return typ, _zip_axes_from_type(typ, terms.value.axes)
return np.result_type(terms.type), None
# if all resolved variables are numeric scalars
if all(term.isscalar for term in terms):
return _result_type_many(*(term.value for term in terms)).type, None
# perform the main alignment
typ, axes = _align_core(terms)
return typ, axes
def _cartesian_product(*arrays):
"""
Get the cartesian product of a number of arrays.
Parameters
----------
arrays : Iterable[np.ndarray]
The arrays to get a cartesian product of. Always sorted with respect
to the original array.
Returns
-------
out : np.ndarray
The overall cartesian product of all the input arrays.
"""
broadcastable = np.ix_(*arrays)
broadcasted = np.broadcast_arrays(*broadcastable)
rows, cols = np.prod(broadcasted[0].shape), len(broadcasted)
dtype = np.result_type(*arrays)
out = np.empty(rows * cols, dtype=dtype)
start, end = 0, rows
for a in broadcasted:
out[start:end] = a.reshape(-1)
start, end = end, end + rows
return out.reshape(cols, rows)
def auto_dtype(A, B):
"""
Get promoted datatype for A and B combined.
Parameters
----------
A : ndarray
B : ndarray
Returns
-------
precision : dtype
Datatype that would be used after appplying NumPy type promotion rules.
If its not float dtype, e.g. int dtype, output is `float32` dtype.
"""
# Datatype that would be used after appplying NumPy type promotion rules
precision = np.result_type(A.dtype, B.dtype)
# Cast to float32 dtype for dtypes that are not float
if np.issubdtype(precision, float)==0:
precision = np.float32
return precision
def test_result_type(self):
self.check_promotion_cases(np.result_type)
assert_(np.result_type(None) == np.dtype(None))
def test_weights(self):
y = np.arange(10)
w = np.arange(10)
actual = average(y, weights=w)
desired = (np.arange(10) ** 2).sum() * 1. / np.arange(10).sum()
assert_almost_equal(actual, desired)
y1 = np.array([[1, 2, 3], [4, 5, 6]])
w0 = [1, 2]
actual = average(y1, weights=w0, axis=0)
desired = np.array([3., 4., 5.])
assert_almost_equal(actual, desired)
w1 = [0, 0, 1]
actual = average(y1, weights=w1, axis=1)
desired = np.array([3., 6.])
assert_almost_equal(actual, desired)
# This should raise an error. Can we test for that ?
# assert_equal(average(y1, weights=w1), 9./2.)
# 2D Case
w2 = [[0, 0, 1], [0, 0, 2]]
desired = np.array([3., 6.])
assert_array_equal(average(y1, weights=w2, axis=1), desired)
assert_equal(average(y1, weights=w2), 5.)
y3 = rand(5).astype(np.float32)
w3 = rand(5).astype(np.float64)
assert_(np.average(y3, weights=w3).dtype == np.result_type(y3, w3))
def asfptype(self):
"""Upcasts matrix to a floating point format.
When the matrix has floating point type, the method returns itself.
Otherwise it makes a copy with floating point type and the same format.
Returns:
cupy.sparse.spmatrix: A matrix with float type.
"""
if self.dtype.kind == 'f':
return self
else:
typ = numpy.result_type(self.dtype, 'f')
return self.astype(typ)
def test_result_type(self):
self.check_promotion_cases(np.result_type)
assert_(np.result_type(None) == np.dtype(None))
def test_weights(self):
y = np.arange(10)
w = np.arange(10)
actual = average(y, weights=w)
desired = (np.arange(10) ** 2).sum() * 1. / np.arange(10).sum()
assert_almost_equal(actual, desired)
y1 = np.array([[1, 2, 3], [4, 5, 6]])
w0 = [1, 2]
actual = average(y1, weights=w0, axis=0)
desired = np.array([3., 4., 5.])
assert_almost_equal(actual, desired)
w1 = [0, 0, 1]
actual = average(y1, weights=w1, axis=1)
desired = np.array([3., 6.])
assert_almost_equal(actual, desired)
# This should raise an error. Can we test for that ?
# assert_equal(average(y1, weights=w1), 9./2.)
# 2D Case
w2 = [[0, 0, 1], [0, 0, 2]]
desired = np.array([3., 6.])
assert_array_equal(average(y1, weights=w2, axis=1), desired)
assert_equal(average(y1, weights=w2), 5.)
y3 = rand(5).astype(np.float32)
w3 = rand(5).astype(np.float64)
assert_(np.average(y3, weights=w3).dtype == np.result_type(y3, w3))
def __init__(self, classData, average=0.0, shrinkage=0.0):
"""Construct a new Quadratic Discriminant Analysis (QDA) classifier.
Args:
classData: Training data. This is a numpy array or list of numpy
arrays with shape (nCls,nObs[,nIn]). If the dimensions
index is missing the data is assumed to be
one-dimensional.
average: This parameter regularizes QDA by mixing the class
covariance matrices with the average covariance matrix.
A value of zero is pure QDA while a value of one
reduces to LDA.
shrinkage: This parameter regularizes QDA by shrinking each
covariance matrix toward the average eigenvalue of
the average covariance matrix.
Returns:
A trained QDA classifier.
"""
Classifier.__init__(self, util.colmat(classData[0]).shape[1],
len(classData))
self.dtype = np.result_type(*[cls.dtype for cls in classData])
# average regularization parameter
self.average = average
# shrinkage regularization parameter
self.shrinkage = shrinkage
self.train(classData)
def __init__(self, classData, shrinkage=0):
"""Construct a new Linear Discriminant Analysis (LDA) classifier.
Args:
classData: Training data. This is a numpy array or list of numpy
arrays with shape (nCls,nObs[,nIn]). If the dimensions
index is missing the data is assumed to be
one-dimensional.
shrinkage: This parameter regularizes LDA by shrinking the average
covariance matrix toward its average eigenvalue:
covariance = (1-shrinkage)*covariance +
shrinkage*averageEigenvalue*identity
Behavior is undefined if shrinkage is outside [0,1].
This parameter has no effect if average is 0.
Returns:
A trained LDA classifier.
"""
Classifier.__init__(self, util.colmat(classData[0]).shape[1],
len(classData))
self.dtype = np.result_type(*[cls.dtype for cls in classData])
self.shrinkage = shrinkage
self.train(classData)
def __init__(self, classData, weightInitFunc=pinit.runif,
optimFunc=optim.scg, **kwargs):
"""Create a new logistic regression classifier.
Args:
classData: Training data. This is a numpy array or list of numpy
arrays with shape (nCls,nObs[,nIn]). If the
dimensions index is missing the data is assumed to
be one-dimensional.
weightInitFunc: Function to initialize the model weights.
The default function is the runif function in the
paraminit module. See the paraminit module for
more candidates.
optimFunc: Function used to optimize the model weights.
See ml.optim for some candidate optimization
functions.
kwargs: Additional arguments passed to optimFunc.
Returns:
A new, trained logistic regression classifier.
"""
Classifier.__init__(self, util.colmat(classData[0]).shape[1],
len(classData))
optim.Optable.__init__(self)
self.dtype = np.result_type(*[cls.dtype for cls in classData])
self.weights = weightInitFunc((self.nIn+1, self.nCls)).astype(self.dtype, copy=False)
self.train(classData, optimFunc, **kwargs)
def __init__(self, x, g, penalty=0.0, pseudoInv=True):
Regression.__init__(self, util.colmat(x).shape[1],
util.colmat(g).shape[1])
self.dtype = np.result_type(x.dtype, g.dtype)
self.penalty = penalty
self.pseudoInv = pseudoInv
self.train(x, g)
def indicatorsFromVector(vector, nCls=None, conf=1.0):
dtype = np.result_type(vector.dtype, np.float32)
if nCls is None:
nCls = np.max(vector)+1
labels = np.arange(nCls, dtype=dtype)
indicators = np.ones((len(vector), len(labels)), dtype=dtype)
indicators = ((indicators*vector[:,None]) == (indicators*labels))
offset = (1.0 - conf) / (nCls-1)
indicators = indicators * (conf-offset) + offset
return indicators.astype(dtype, copy=False)
def _valid_input(self, value, dtype=None):
if not misc.is_valid_param_value(value):
msg = 'The value must be either a tensorflow variable, an array or a scalar.'
raise ValueError(msg)
cast = not (dtype is None)
is_built = False
shape = None
if hasattr(self, '_value'): # The parameter has not initialized yet.
is_built = self.is_built_coherence() == Build.YES
shape = self.shape
inner_dtype = self.dtype
if dtype is not None and inner_dtype != dtype:
msg = 'Overriding parameter\'s type "{0}" with "{1}" is not possible.'
raise ValueError(msg.format(inner_dtype, dtype))
elif isinstance(value, np.ndarray) and inner_dtype != value.dtype:
msg = 'The value has different data type "{0}". Parameter type is "{1}".'
raise ValueError(msg.format(value.dtype, inner_dtype))
cast = False
dtype = inner_dtype
if misc.is_number(value):
value_type = np.result_type(value).type
num_type = misc.normalize_num_type(value_type)
dtype = num_type if dtype is None else dtype
value = np.array(value, dtype=dtype)
elif misc.is_list(value):
dtype = settings.float_type if dtype is None else dtype
value = np.array(value, dtype=dtype)
elif cast:
value = value.astype(dtype)
if shape is not None and self.fixed_shape and is_built and shape != value.shape:
msg = 'Value has different shape. Parameter shape {0}, value shape {1}.'
raise ValueError(msg.format(shape, value.shape))
return value
common.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 51
收藏 0
点赞 0
评论 0
def _result_type_many(*arrays_and_dtypes):
""" wrapper around numpy.result_type which overcomes the NPY_MAXARGS (32)
argument limit """
try:
return np.result_type(*arrays_and_dtypes)
except ValueError:
# we have > NPY_MAXARGS terms in our expression
return reduce(np.result_type, arrays_and_dtypes)
test_numeric.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 23
收藏 0
点赞 0
评论 0
def test_result_type(self):
self.check_promotion_cases(np.result_type)
assert_(np.result_type(None) == np.dtype(None))
def test_result_type(self):
self.check_promotion_cases(np.result_type)
assert_(np.result_type(None) == np.dtype(None))
def test_weights(self):
y = np.arange(10)
w = np.arange(10)
actual = average(y, weights=w)
desired = (np.arange(10) ** 2).sum() * 1. / np.arange(10).sum()
assert_almost_equal(actual, desired)
y1 = np.array([[1, 2, 3], [4, 5, 6]])
w0 = [1, 2]
actual = average(y1, weights=w0, axis=0)
desired = np.array([3., 4., 5.])
assert_almost_equal(actual, desired)
w1 = [0, 0, 1]
actual = average(y1, weights=w1, axis=1)
desired = np.array([3., 6.])
assert_almost_equal(actual, desired)
# This should raise an error. Can we test for that ?
# assert_equal(average(y1, weights=w1), 9./2.)
# 2D Case
w2 = [[0, 0, 1], [0, 0, 2]]
desired = np.array([3., 6.])
assert_array_equal(average(y1, weights=w2, axis=1), desired)
assert_equal(average(y1, weights=w2), 5.)
y3 = rand(5).astype(np.float32)
w3 = rand(5).astype(np.float64)
assert_(np.average(y3, weights=w3).dtype == np.result_type(y3, w3))
def test_result_type(self):
self.check_promotion_cases(np.result_type)
assert_(np.result_type(None) == np.dtype(None))
def test_weights(self):
y = np.arange(10)
w = np.arange(10)
actual = average(y, weights=w)
desired = (np.arange(10) ** 2).sum() * 1. / np.arange(10).sum()
assert_almost_equal(actual, desired)
y1 = np.array([[1, 2, 3], [4, 5, 6]])
w0 = [1, 2]
actual = average(y1, weights=w0, axis=0)
desired = np.array([3., 4., 5.])
assert_almost_equal(actual, desired)
w1 = [0, 0, 1]
actual = average(y1, weights=w1, axis=1)
desired = np.array([3., 6.])
assert_almost_equal(actual, desired)
# This should raise an error. Can we test for that ?
# assert_equal(average(y1, weights=w1), 9./2.)
# 2D Case
w2 = [[0, 0, 1], [0, 0, 2]]
desired = np.array([3., 6.])
assert_array_equal(average(y1, weights=w2, axis=1), desired)
assert_equal(average(y1, weights=w2), 5.)
y3 = rand(5).astype(np.float32)
w3 = rand(5).astype(np.float64)
assert_(np.average(y3, weights=w3).dtype == np.result_type(y3, w3))
def test_result_type(self):
self.check_promotion_cases(np.result_type)
assert_(np.result_type(None) == np.dtype(None))
def test_weights(self):
y = np.arange(10)
w = np.arange(10)
actual = average(y, weights=w)
desired = (np.arange(10) ** 2).sum() * 1. / np.arange(10).sum()
assert_almost_equal(actual, desired)
y1 = np.array([[1, 2, 3], [4, 5, 6]])
w0 = [1, 2]
actual = average(y1, weights=w0, axis=0)
desired = np.array([3., 4., 5.])
assert_almost_equal(actual, desired)
w1 = [0, 0, 1]
actual = average(y1, weights=w1, axis=1)
desired = np.array([3., 6.])
assert_almost_equal(actual, desired)
# This should raise an error. Can we test for that ?
# assert_equal(average(y1, weights=w1), 9./2.)
# 2D Case
w2 = [[0, 0, 1], [0, 0, 2]]
desired = np.array([3., 6.])
assert_array_equal(average(y1, weights=w2, axis=1), desired)
assert_equal(average(y1, weights=w2), 5.)
y3 = rand(5).astype(np.float32)
w3 = rand(5).astype(np.float64)
assert_(np.average(y3, weights=w3).dtype == np.result_type(y3, w3))
def cartesian_product(*arrays):
'''
https://stackoverflow.com/questions/11144513/
numpy-cartesian-product-of-x-and-y-array-points-into-single-array-of-2d-points
'''
la = len(arrays)
dtype = numpy.result_type(*arrays)
arr = numpy.empty([len(a) for a in arrays] + [la], dtype=dtype)
for i, a in enumerate(numpy.ix_(*arrays)):
arr[...,i] = a
return arr.reshape(-1, la)
def test_result_type(self):
self.check_promotion_cases(np.result_type)
assert_(np.result_type(None) == np.dtype(None))
def test_weights(self):
y = np.arange(10)
w = np.arange(10)
actual = average(y, weights=w)
desired = (np.arange(10) ** 2).sum() * 1. / np.arange(10).sum()
assert_almost_equal(actual, desired)
y1 = np.array([[1, 2, 3], [4, 5, 6]])
w0 = [1, 2]
actual = average(y1, weights=w0, axis=0)
desired = np.array([3., 4., 5.])
assert_almost_equal(actual, desired)
w1 = [0, 0, 1]
actual = average(y1, weights=w1, axis=1)
desired = np.array([3., 6.])
assert_almost_equal(actual, desired)
# This should raise an error. Can we test for that ?
# assert_equal(average(y1, weights=w1), 9./2.)
# 2D Case
w2 = [[0, 0, 1], [0, 0, 2]]
desired = np.array([3., 6.])
assert_array_equal(average(y1, weights=w2, axis=1), desired)
assert_equal(average(y1, weights=w2), 5.)
y3 = rand(5).astype(np.float32)
w3 = rand(5).astype(np.float64)
assert_(np.average(y3, weights=w3).dtype == np.result_type(y3, w3))
def __init__(self, x, g, recs=(8,4,2), transient=0, phi=transfer.tanh,
#iwInitFunc=pinit.lecun, rwInitFunc=pinit.lecun,
hwInitFunc=pinit.esp, vwInitFunc=pinit.lecun, optimFunc=optim.scg,
**kwargs):
x = util.segmat(x)
g = util.segmat(g)
self.dtype = np.result_type(x.dtype, g.dtype)
Regression.__init__(self, x.shape[2], g.shape[2])
optim.Optable.__init__(self)
self.transient = transient
self.phi = phi
self.nRecHiddens = list(recs)
self.nRecLayers = len(self.nRecHiddens)
self.layerDims = [(self.nIn+self.nRecHiddens[0]+1, self.nRecHiddens[0])]
for l in xrange(1, self.nRecLayers):
self.layerDims.append((self.nRecHiddens[l-1]+self.nRecHiddens[l]+1, self.nRecHiddens[l]))
self.layerDims.append((self.nRecHiddens[-1]+1, self.nOut))
views = util.packedViews(self.layerDims, dtype=self.dtype)
self.pw = views[0]
self.hws = views[1:-1]
self.vw = views[-1]
self.iws = []
self.rws = []
nIn = self.nIn
for l in xrange(self.nRecLayers):
iw = self.hws[l][:(nIn+1)]
rw = self.hws[l][(nIn+1):]
self.iws.append(iw)
self.rws.append(rw)
#self.iws[l][...] = iwInitFunc(iw.shape).astype(self.dtype, copy=False)
#self.rws[l][...] = rwInitFunc(rw.shape).astype(self.dtype, copy=False)
nIn = self.nRecHiddens[l]
self.hws[l][...] = hwInitFunc(self.hws[l].shape).astype(self.dtype, copy=False)
self.vw[...] = vwInitFunc(self.vw.shape).astype(self.dtype, copy=False)
# train the network
if optimFunc is not None:
self.train(x, g, optimFunc, **kwargs)