def split_data_class(self,y,v=5):
''' The function split the data into v folds. The samples of each class are split approximatly in v folds
Input:
n : the number of samples
v : the number of folds
Output: None
'''
# Get parameters
n = y.size
C = y.max().astype('int')
# Get the step for each class
tc = []
for j in range(v):
tempit = []
tempiT = []
for i in range(C):
# Get all samples for each class
t = sp.where(y==(i+1))[0]
nc = t.size
stepc = nc // v # Step size for each class
if stepc == 0:
print "Not enough sample to build "+ str(v) +" folds in class " + str(i)
sp.random.seed(i) # Set the random generator to the same initial state
tc = t[sp.random.permutation(nc)] # Random sampling of indices of samples for class i
# Set testing and training samples
if j < (v-1):
start,end = j*stepc,(j+1)*stepc
else:
start,end = j*stepc,nc
tempiT.extend(sp.asarray(tc[start:end])) #Testing
k = range(v)
k.remove(j)
for l in k:
if l < (v-1):
start,end = l*stepc,(l+1)*stepc
else:
start,end = l*stepc,nc
tempit.extend(sp.asarray(tc[start:end])) #Training
self.it.append(tempit)
self.iT.append(tempiT)
评论列表
文章目录