def train(self):
""""""
print('size before truncated outliers is %d ' % len(self.TrainData))
self.TrainData = self.TrainData[(self.TrainData['logerror'] > self._low) & (self.TrainData['logerror'] < self._up)]
print('size after truncated outliers is %d ' % len(self.TrainData))
X = self.TrainData.drop(self._l_drop_cols, axis=1)
Y = self.TrainData['logerror']
self._l_train_columns = X.columns
FeatCols = list(self._l_train_columns)
etr = ExtraTreesRegressor(
n_estimators= self._iter,
criterion= 'mse',
max_features= int(math.sqrt(len(FeatCols))),
max_depth = self._depth,
n_jobs= 2,
random_state= 2017,
verbose= True
)
self._model = etr.fit(X, Y)
## evaluate on valid data
self._f_eval_train_model = '{0}/{1}_{2}.pkl'.format(self.OutputDir, self.__class__.__name__,
datetime.now().strftime('%Y%m%d-%H:%M:%S'))
with open(self._f_eval_train_model, 'wb') as o_file:
pickle.dump(self._model, o_file, -1)
o_file.close()
self.TrainData = pd.concat([self.TrainData, self.ValidData[self.TrainData.columns]],
ignore_index=True) ## ignore_index will reset the index or index will be overlaped
return
评论列表
文章目录