def evaluate(self):
""""""
## not truncate outliers
pred_valid = pd.DataFrame(index=self.ValidData.index)
pred_valid['parcelid'] = self.ValidData['parcelid']
truth_valid = pd.DataFrame(index=self.ValidData.index)
truth_valid['parcelid'] = self.ValidData['parcelid']
start = time.time()
for d in self._l_valid_predict_columns:
l_valid_columns = ['%s%s' % (c, d) if (c in ['lastgap', 'monthyear', 'buildingage']) else c for c in
self._l_train_columns]
extra_va = pd.read_hdf(path_or_buf='%s/p21/eval_valid_%s.hdf' % (self.InputDir, d), key='valid')
#ValidData = self.ValidData.join(extra_va, on= 'parcelid', how= 'left')
ValidData = pd.concat([self.ValidData, extra_va.drop('parcelid', axis= 1)], axis= 1)
x_valid = ValidData[l_valid_columns]
x_valid = x_valid.values.astype(np.float32, copy=False)
pred_valid[d] = self._model.predict(x_valid) # * 0.99 + 0.011 * 0.01
df_tmp = ValidData[ValidData['transactiondate'].dt.month == int(d[-2:])]
truth_valid.loc[df_tmp.index, d] = df_tmp['logerror']
score = 0.0
ae = np.abs(pred_valid - truth_valid)
for col in ae.columns:
score += np.sum(ae[col])
score /= len(pred_valid) ##!! divided by number of instances, not the number of 'cells'
print('============================= ')
print('Local MAE is %.6f' % score)
print('=============================')
end = time.time()
del self.ValidData
gc.collect()
print('time elapsed %ds' % (end - start))
评论列表
文章目录