ElasticNet.py 文件源码

python
阅读 25 收藏 0 点赞 0 评论 0

项目:kaggle 作者: RankingAI 项目源码 文件源码
def submit(self):
      """"""
      ## retrain with the whole training data
      self.TrainData = self.TrainData[(self.TrainData['logerror'] > self._low) & (self.TrainData['logerror'] < self._up)]

      self.TrainData['longitude'] -= -118600000
      self.TrainData['latitude'] -= 34220000

      X = self.TrainData.drop(self._l_drop_cols, axis=1)
      Y = self.TrainData['logerror']
      X = X.values.astype(np.float32, copy=False)

      en = ElasticNet(alpha= self._alpha, l1_ratio = self._ratio, max_iter= self._iter, tol= 1e-4, selection= self._sel, random_state= 2017)
      self._model = en.fit(X, Y)

      del self.TrainData, X, Y
      gc.collect()

      self.TestData = self._data.LoadFromHdfFile(self.InputDir, 'test')
      #self.TestData = self.TestData.sample(frac = 0.01)

      self._sub = pd.DataFrame(index=self.TestData.index)
      self._sub['ParcelId'] = self.TestData['parcelid']

      self.TestData['longitude'] -= -118600000
      self.TestData['latitude'] -= 34220000
      N = 200000
      start = time.time()
      for d in self._l_test_predict_columns:
         s0 = time.time()

         print('Prediction for column %s ' % d)
         l_test_columns = ['%s%s' % (c, d) if (c in ['lastgap', 'monthyear', 'buildingage']) else c for c in
                           self._l_train_columns]
         x_test = self.TestData[l_test_columns]

         for idx in range(0, len(x_test), N):
            x_test_block = x_test[idx:idx + N].values.astype(np.float32, copy=False)
            ret = self._model.predict(x_test_block)# * 0.99 + 0.011 * 0.01
            self._sub.loc[x_test[idx:idx + N].index, d] = ret
            print(np.mean(np.abs(ret)))

         e0 = time.time()
         print('Prediction for column %s is done. time elapsed %ds' % (d, (e0 - s0)))

      ## clean
      del self.TestData
      gc.collect()

      end = time.time()
      print('Prediction is done. time elapsed %ds' % (end - start))

      if (os.path.exists(self.OutputDir) == False):
         os.makedirs(self.OutputDir)

      self._sub.to_csv(
         '{0}/{1}_{2}.csv'.format(self.OutputDir, self.__class__.__name__, datetime.now().strftime('%Y%m%d-%H:%M:%S')),
         index=False, float_format='%.4f')
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号