def fit(self, X, Y):
if len(Y.shape)==1:
Y = np.array([Y]).transpose() # Transform vector into column matrix
# This is NOT what we want: Y = Y.reshape( -1, 1 ), because Y.shape[1] out of range
self.n_target = Y.shape[1] # Num target values = num col of Y
self.n_label = len(set(Y.ravel())) # Num labels = num classes (categories of categorical var if n_target=1 or n_target if labels are binary )
# Create the right number of copies of the predictor instance
if len(self.predictors)!=self.n_target:
predictorInstance = self.predictors[0]
self.predictors = [predictorInstance]
for i in range(1,self.n_target):
self.predictors.append(copy.copy(predictorInstance))
# Fit all predictors
for i in range(self.n_target):
# Update the number of desired prodictos
if hasattr(self.predictors[i], 'n_estimators'):
self.predictors[i].n_estimators=self.n_estimators
# Subsample if desired
if self.balance:
pos = Y[:,i]>0
neg = Y[:,i]<=0
if sum(pos)<sum(neg):
chosen = pos
not_chosen = neg
else:
chosen = neg
not_chosen = pos
num = sum(chosen)
idx=filter(lambda(x): x[1]==True, enumerate(not_chosen))
idx=np.array(zip(*idx)[0])
np.random.shuffle(idx)
chosen[idx[0:min(num, len(idx))]]=True
# Train with chosen samples
self.predictors[i].fit(X[chosen,:],Y[chosen,i])
else:
self.predictors[i].fit(X,Y[:,i])
return
评论列表
文章目录