def __init__(self):
with pd.HDFStore("../input/train.h5", "r") as hfdata:
self.timestamp = 0
fullset = hfdata.get("train")
self.unique_timestamp = fullset["timestamp"].unique()
# Get a list of unique timestamps
# use the first half for training and
# the second half for the test set
n = len(self.unique_timestamp)
i = int(n/2)
timesplit = self.unique_timestamp[i]
self.n = n
self.unique_idx = i
self.train = fullset[fullset.timestamp < timesplit]
self.test = fullset[fullset.timestamp >= timesplit]
self.y_test_full = self.test['y'] # Just in case the full labels are needed later
self.y_pred_full = []
self.temp_test_y = None
self.ID_COL_NAME = 'id'
self.SAMPLE_COL_NAME = 'sample'
self.TARGET_COL_NAME = 'y'
self.TIME_COL_NAME = 'timestamp'
评论列表
文章目录