def _reduction__base(self, operatorName, axis):
if axis==None: return self.ravel()._reduction__base(operatorName, 0).item()
if not type(axis) in _numberTypes: raise TypeError('the value %s is not appropriate for the "axis" parameter.' % str(axis))
if axis < -self.ndim or axis>=self.ndim: raise ValueError('axis (%d) out of bounds for an array with %d axes.' % (axis, self.ndim))
axis = int(axis) % self.ndim
if self.size==0:
retShape = _deleteT2(self.shape, axis)
if operatorName=='sum': return zeros(retShape)
elif operatorName=='max': return tile(-inf, retShape)
else: assert False
if operatorName=='max' and axis==0 and cudamatHas('maxAxis0'): # my own fast implementation
ret = empty(self.shape[1:])
_ctInt = _cudamat.ct.c_int
nThreadsPerBlock = 32
gridX, gridY = ((ret.size+nThreadsPerBlock-1)//nThreadsPerBlock), 1
while gridX>65535: gridY*=2; gridX = (gridX+1)//2;
_cudamat._cudamat.maxAxis0.restype = _ctypes.c_int
assert 0==_cudamat._cudamat.maxAxis0(_ctInt(gridX), _ctInt(gridY), _ctInt(nThreadsPerBlock), self._base.p_mat, ret._base.p_mat, _ctInt(self.shape[0]), _ctInt(ret.size))
return ret
if axis==0 and operatorName=='max': # max over rows is not yet supported in cudamat
return self.reshape_2d(1).T.max(1).reshape(self.shape[1:])
if axis==0 and self.ndim==1 and self.size>5000 and operatorName=='sum': # optimization. apparently, cudamat is not maximally efficient.
n = int(numpy.sqrt(self.size-1))
return self[:n*n].reshape((n, n))._reduction__base(operatorName, 0)._reduction__base(operatorName, 0) + self[n*n:]._reduction__base(operatorName, 0)
if operatorName=='sum':
chunkSize = 1024*256 # sum over longer dimensions fails in cudamat
nChunks = (self.shape[axis] + chunkSize-1) // chunkSize
if nChunks>1:
return reduceAdd( self[(slice(None),) * axis + (slice(chunkI*chunkSize, __builtin__.min(self.shape[axis], (chunkI+1)*chunkSize)),)]._reduction__base(operatorName, axis)
for chunkI in range(nChunks))
if operatorName=='max' and self.isnan().any2(): # cudamat bug workaround
return garray(self.asarray().max(axis))
operatorInCm = {'sum': _cmType.sum, 'max': _cmType.max}[operatorName]
if axis==0: return _check_number_types(garray(operatorInCm(self._base_shaped(1), 1, _new_cm(_prodT(self.shape[1:]))), self.shape[1:], None))
if axis==self.ndim-1:
if self.ndim!=2: return self.reshape_2d(-1)._reduction__base(operatorName, 1).reshape(self.shape[:-1])
if self.ndim==2:
chunkSize = 2**16-1
nChunks = (len(self) + chunkSize-1) // chunkSize
if nChunks>1: # cudamat chokes on big arrays, so break it in pieces for cudamat
chunks = tuple([ self[chunkI*chunkSize : __builtin__.min((chunkI+1)*chunkSize, len(self))]
for chunkI in range(nChunks)])
return concatenate([ chunk._reduction__base(operatorName, 1) for chunk in chunks])
else: # small array
return _check_number_types(garray(operatorInCm(self._base_shaped(1), 0, _new_cm((len(self), 1))), (len(self),), None))
return self.transpose_simple(axis)._reduction__base(operatorName, 0).transpose_simple(-axis)
# ------------------------------------------------------------------------------- external misc non-numerical
评论列表
文章目录