def submit(self):
""""""
## retrain with the whole training data
self.TrainData = self.TrainData[(self.TrainData['logerror'] > self._low) & (self.TrainData['logerror'] < self._up)]
self.TrainData['longitude'] -= -118600000
self.TrainData['latitude'] -= 34220000
X = self.TrainData.drop(self._l_drop_cols, axis=1)
Y = self.TrainData['logerror']
X = X.values.astype(np.float32, copy=False)
en = ElasticNet(alpha= self._alpha, l1_ratio = self._ratio, max_iter= self._iter, tol= 1e-4, selection= self._sel, random_state= 2017)
self._model = en.fit(X, Y)
del self.TrainData, X, Y
gc.collect()
self.TestData = self._data.LoadFromHdfFile(self.InputDir, 'test')
#self.TestData = self.TestData.sample(frac = 0.01)
self._sub = pd.DataFrame(index=self.TestData.index)
self._sub['ParcelId'] = self.TestData['parcelid']
self.TestData['longitude'] -= -118600000
self.TestData['latitude'] -= 34220000
N = 200000
start = time.time()
for d in self._l_test_predict_columns:
s0 = time.time()
print('Prediction for column %s ' % d)
l_test_columns = ['%s%s' % (c, d) if (c in ['lastgap', 'monthyear', 'buildingage']) else c for c in
self._l_train_columns]
x_test = self.TestData[l_test_columns]
for idx in range(0, len(x_test), N):
x_test_block = x_test[idx:idx + N].values.astype(np.float32, copy=False)
ret = self._model.predict(x_test_block)# * 0.99 + 0.011 * 0.01
self._sub.loc[x_test[idx:idx + N].index, d] = ret
print(np.mean(np.abs(ret)))
e0 = time.time()
print('Prediction for column %s is done. time elapsed %ds' % (d, (e0 - s0)))
## clean
del self.TestData
gc.collect()
end = time.time()
print('Prediction is done. time elapsed %ds' % (end - start))
if (os.path.exists(self.OutputDir) == False):
os.makedirs(self.OutputDir)
self._sub.to_csv(
'{0}/{1}_{2}.csv'.format(self.OutputDir, self.__class__.__name__, datetime.now().strftime('%Y%m%d-%H:%M:%S')),
index=False, float_format='%.4f')
评论列表
文章目录