def compute(self, X=[]):
if len(X)==0:
X=self.data
else:
pass
originalLength=X.shape[0]
originalWidth=self.weightsOpenCL.shape[0]
if not self.openCL.active:
raise Exception('openCL not active')
#===================================================================
# networks=self.weights.wHL.shape[0]
# phiOL=np.empty((X.shape[0], networks))
# for i0 in range(networks):
# aHL=X.dot(self.weights.wHL[i0,:,:])+np.tile(self.weights.bHL[i0,],(X.shape[0],1))
# phiHL=self._activate(aHL,0)
# aOL=phiHL.dot(self.weights.wOL[:,i0])+self.weights.bOL[i0,]
# phiOL[:,i0]=self._activate(aOL,1)
#===================================================================
else:
remData=np.remainder(X.shape[0],self.openCL.workGroup[0])
if remData != 0:
X=np.vstack((X, np.zeros((self.openCL.workGroup[0]-remData, X.shape[1]))))
else:
remData=self.openCL.workGroup[0]
remNetwork=np.remainder(self.weightsOpenCL.shape[0],self.openCL.workGroup[1])
if remNetwork != 0:
weights=np.vstack((self.weightsOpenCL, np.zeros((self.openCL.workGroup[1]-remNetwork, self.weightsOpenCL.shape[1]))))
else:
weights=self.weightsOpenCL
remNetwork=self.openCL.workGroup[1]
XOpenCL=X.reshape(-1, order = 'C').astype(np.float32)
weightsOpenCL=weights.reshape(-1, order = 'C').astype(np.float32)
mf = cl.mem_flags
inputs=np.int32(X.shape[1])
nodes=np.int32(self.nodes)
dataSize=np.int32(X.shape[0])
weightSize=np.int32(self.weightsOpenCL.shape[1])
dataBuffer = cl.Buffer(self.openCL.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=XOpenCL)
weightsBuffer = cl.Buffer(self.openCL.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=weightsOpenCL)
outBuffer = cl.Buffer(self.openCL.ctx, mf.WRITE_ONLY, int(XOpenCL.nbytes/inputs*weights.shape[0]))
kernel=self.openCL.prg.ann
globalSize=(int(X.shape[0]), int(weights.shape[0]))
localSize=(int(self.openCL.workGroup[0]), int(self.openCL.workGroup[1]))
kernel(self.openCL.queue, globalSize, localSize, inputs, nodes, dataSize, weightSize, dataBuffer, outBuffer, weightsBuffer, cl.LocalMemory(self.weightsOpenCL[0,].nbytes*localSize[1]))
phiOL = np.empty((np.prod(globalSize),)).astype(np.float32)
cl.enqueue_copy(self.openCL.queue, phiOL, outBuffer)
phiOL=np.reshape(phiOL, globalSize, order='F')[:originalLength,:originalWidth]
if self.lowerThreshold!=-999:
phiOL[phiOL<self.lowerThreshold] = self.lowerThreshold
return phiOL
评论列表
文章目录