def __MergeData(cls, InputDir, OutputDir, mode):
""""""
if(mode == 'train'):
ActionDataFile = '%s/train_2016_v2.csv' % InputDir
OutputFile = '%s/train.pkl' % OutputDir
else:
ActionDataFile = '%s/sample_submission.csv' % InputDir
OutputFile = '%s/test.pkl' % OutputDir
print(OutputFile)
PropertyDataFile = '%s/properties_2016.csv' % InputDir
## load
ActionData = pd.read_csv(ActionDataFile, parse_dates=['transactiondate'])
PropertyData = pd.read_csv(PropertyDataFile)
## left join
MergedData = ActionData.merge(PropertyData, how='left', on='parcelid')
## output into pkl file
if (os.path.exists(OutputDir) == False):
os.makedirs(OutputDir)
with open(OutputFile, 'wb') as o_file:
pickle.dump(MergedData, o_file, -1)
o_file.close()
return
## split rawcensustractandblock into census, tract and block
评论列表
文章目录