trainer.py 文件源码-python代码片段

def mineHardNegativeTrainingPairsWithinMiniBatches(self):

        dnParams = self.descrNet.cfgParams
        batch_size = self.cfgParams.batch_size
        pairIdx = self.tvPairIdx
        #pairLabels = self.tvPairLabels
        y = self.tvY
        margin = self.pair_neg_margin


        diff = self.descrNet.output[pairIdx[:,0]] - self.descrNet.output[pairIdx[:,1]]
        dst = T.sum(diff**2,axis=1) / dnParams.outputDim[1]  # divide by number of outputs, such that the max distance is 1

        pairLabels = T.eq(y[pairIdx[:,0]],y[pairIdx[:,1]])  #  same class / different class ?
        pair_cost = pairLabels*dst + (1-pairLabels)*T.sqr(T.maximum(0,margin - T.sqrt(dst)))                

        # indices for all pairs of vectors in the minibatch
        pidx1,pidx2 = numpy.triu_indices(batch_size, 1) #numpy.mask_indices(batch_size, numpy.triu, 1)
        pidx1 = pidx1.reshape((len(pidx1),1))
        pidx2 = pidx2.reshape((len(pidx2),1))
        comb_pairIdx = numpy.concatenate((pidx1,pidx2),axis=1).astype(numpy.int32)

        dm = self.dataManager

        if isinstance(self.tvX,list):            
            givens = { tv: data[self.tvIndex * batch_size:(self.tvIndex + 1) * batch_size] for (tv,data) in zip(self.tvX,dm.tvsData_x) }
        else:
            givens = { self.tvX : dm.tvsData_x[self.tvIndex * batch_size:(self.tvIndex + 1) * batch_size] }
        givens[self.y] = dm.tvsData_y[self.tvIndex * batch_size:(self.tvIndex + 1) * batch_size]
        givens[pairIdx] = comb_pairIdx 

        tf = theano.function(inputs=[self.tvIndex],
                            outputs=[pair_cost],
                            givens=givens)

        # for every sample get the index of the other sample with which together it forms the most expensive (highest cost) pair 
        nSamp = self.n_train_batches*batch_size
        idx = numpy.zeros(nSamp,dtype=numpy.int32)
        labels = numpy.zeros(nSamp,dtype=numpy.int32)  
        for i in range(self.n_train_batches):  
#             if self.macroBatchSize > 0:
#                 self.setMacroBatchData(self.traindataDB,numpy.floor(i / self.macroBatchSize).astype(numpy.int))
#                 miniBatchIdx = numpy.mod(i,self.macroBatchSize)
#             else:
#                 miniBatchIdx = i
            miniBatchIdx = self.dataManager.makeMinibatchAvailable(self.traindataDB,i)            
            c = tf(miniBatchIdx)
            c = scipy.spatial.distance.squareform(c[0])
            # find the max for each
            offset = i*batch_size
            maxIdx = numpy.argmax(c,axis=0) + offset 
            idx[i*batch_size:(i+1)*batch_size] = maxIdx 
            labels[i*batch_size:(i+1)*batch_size] = self.traindataDB.y[maxIdx] == self.traindataDB.y[i*batch_size:(i+1)*batch_size]

            #print(c)

        idx = numpy.concatenate((numpy.arange(nSamp,dtype=numpy.int32).reshape(nSamp,1),idx.reshape(nSamp,1)),axis=1)

        return idx,labels