def train(self):
""""""
start = time.time()
print('size before truncated outliers is %d ' % len(self.TrainData))
TrainData = self.TrainData[(self.TrainData['logerror'] > self._low) & (self.TrainData['logerror'] < self._up)]
print('size after truncated outliers is %d ' % len(self.TrainData))
TrainData['longitude'] -= -118600000
TrainData['latitude'] -= 34220000
#extra_tr = pd.read_hdf(path_or_buf='%s/p21/eval_train.hdf' % self.InputDir, key='train')
#self.TrainData = pd.concat([self.TrainData, extra_tr.drop('parcelid', axis= 1)], axis = 1)
X = self.TrainData.drop(self._l_drop_cols, axis=1)
Y = self.TrainData['logerror']
self._l_train_columns = X.columns
X = X.values.astype(np.float32, copy=False)
lr = LassoLars(alpha= self._lr_alpha, max_iter= self._lr_iter, verbose= True)
self._model = lr.fit(X, Y)
end = time.time()
print('Training iterates %d, time consumed %d ' % (self._model.n_iter_, (end - start)))
self._f_eval_train_model = '{0}/{1}_{2}.pkl'.format(self.OutputDir, self.__class__.__name__,
datetime.now().strftime('%Y%m%d-%H:%M:%S'))
#with open(self._f_eval_train_model, 'wb') as o_file:
# pickle.dump(self._model, o_file, -1)
#o_file.close()
#self.TrainData = pd.concat([self.TrainData, self.ValidData[self.TrainData.columns]],
# ignore_index=True) ## ignore_index will reset the index or index will be overlaped
return
评论列表
文章目录