gmm_ridge.py 文件源码-python代码片段

def split_data_class(self,y,v=5):
        ''' The function split the data into v folds. The samples of each class are split approximatly in v folds
        Input:
            n : the number of samples
            v : the number of folds
        Output: None
        '''
        # Get parameters
        n = y.size
        C = y.max().astype('int')

        # Get the step for each class
        tc = []
        for j in range(v):
            tempit = []
            tempiT = []
            for i in range(C):
                # Get all samples for each class
                t  = sp.where(y==(i+1))[0]
                nc = t.size
                stepc = nc // v # Step size for each class
                if stepc == 0:
                    print "Not enough sample to build "+ str(v) +" folds in class " + str(i)                                    
                sp.random.seed(i)   # Set the random generator to the same initial state
                tc = t[sp.random.permutation(nc)] # Random sampling of indices of samples for class i

                # Set testing and training samples
                if j < (v-1):
                    start,end = j*stepc,(j+1)*stepc
                else:
                    start,end = j*stepc,nc
                tempiT.extend(sp.asarray(tc[start:end])) #Testing
                k = range(v)
                k.remove(j)
                for l in k:
                    if l < (v-1):
                        start,end = l*stepc,(l+1)*stepc
                    else:
                        start,end = l*stepc,nc
                    tempit.extend(sp.asarray(tc[start:end])) #Training

            self.it.append(tempit)
            self.iT.append(tempiT)