gnumpy.py 文件源码-python代码片段

def _reduction__base(self, operatorName, axis):
  if axis==None: return self.ravel()._reduction__base(operatorName, 0).item()
  if not type(axis) in _numberTypes: raise TypeError('the value %s is not appropriate for the "axis" parameter.' % str(axis))
  if axis < -self.ndim or axis>=self.ndim: raise ValueError('axis (%d) out of bounds for an array with %d axes.' % (axis, self.ndim))
  axis = int(axis) % self.ndim
  if self.size==0:
   retShape = _deleteT2(self.shape, axis)
   if operatorName=='sum': return zeros(retShape)
   elif operatorName=='max': return tile(-inf, retShape)
   else: assert False
  if operatorName=='max' and axis==0 and cudamatHas('maxAxis0'): # my own fast implementation
   ret = empty(self.shape[1:])
   _ctInt = _cudamat.ct.c_int
   nThreadsPerBlock = 32
   gridX, gridY = ((ret.size+nThreadsPerBlock-1)//nThreadsPerBlock), 1
   while gridX>65535: gridY*=2; gridX = (gridX+1)//2;
   _cudamat._cudamat.maxAxis0.restype = _ctypes.c_int
   assert 0==_cudamat._cudamat.maxAxis0(_ctInt(gridX), _ctInt(gridY), _ctInt(nThreadsPerBlock), self._base.p_mat, ret._base.p_mat, _ctInt(self.shape[0]), _ctInt(ret.size))
   return ret
  if axis==0 and operatorName=='max': # max over rows is not yet supported in cudamat
   return self.reshape_2d(1).T.max(1).reshape(self.shape[1:])
  if axis==0 and self.ndim==1 and self.size>5000 and operatorName=='sum': # optimization. apparently, cudamat is not maximally efficient.
   n = int(numpy.sqrt(self.size-1))
   return self[:n*n].reshape((n, n))._reduction__base(operatorName, 0)._reduction__base(operatorName, 0) + self[n*n:]._reduction__base(operatorName, 0)
  if operatorName=='sum':
   chunkSize = 1024*256 # sum over longer dimensions fails in cudamat
   nChunks = (self.shape[axis] + chunkSize-1) // chunkSize
   if nChunks>1:
    return reduceAdd( self[(slice(None),) * axis + (slice(chunkI*chunkSize, __builtin__.min(self.shape[axis], (chunkI+1)*chunkSize)),)]._reduction__base(operatorName, axis)
                      for chunkI in range(nChunks))
  if operatorName=='max' and self.isnan().any2(): # cudamat bug workaround
   return garray(self.asarray().max(axis))
  operatorInCm = {'sum': _cmType.sum, 'max': _cmType.max}[operatorName]
  if axis==0: return _check_number_types(garray(operatorInCm(self._base_shaped(1), 1, _new_cm(_prodT(self.shape[1:]))), self.shape[1:], None))
  if axis==self.ndim-1:
   if self.ndim!=2: return self.reshape_2d(-1)._reduction__base(operatorName, 1).reshape(self.shape[:-1])
   if self.ndim==2:
    chunkSize = 2**16-1
    nChunks = (len(self) + chunkSize-1) // chunkSize
    if nChunks>1: # cudamat chokes on big arrays, so break it in pieces for cudamat
     chunks = tuple([ self[chunkI*chunkSize : __builtin__.min((chunkI+1)*chunkSize, len(self))]
                      for chunkI in range(nChunks)])
     return concatenate([ chunk._reduction__base(operatorName, 1) for chunk in chunks])
    else: # small array
     return _check_number_types(garray(operatorInCm(self._base_shaped(1), 0, _new_cm((len(self), 1))), (len(self),), None))
  return self.transpose_simple(axis)._reduction__base(operatorName, 0).transpose_simple(-axis)



 # ------------------------------------------------------------------------------- external misc non-numerical