Preprocess.py 文件源码

python
阅读 23 收藏 0 点赞 0 评论 0

项目:kaggle 作者: RankingAI 项目源码 文件源码
def __MergeData(cls, InputDir, OutputDir, mode):
        """"""
        if(mode == 'train'):
            ActionDataFile = '%s/train_2016_v2.csv' % InputDir
            OutputFile = '%s/train.pkl' % OutputDir
        else:
            ActionDataFile = '%s/sample_submission.csv' % InputDir
            OutputFile = '%s/test.pkl' % OutputDir

        print(OutputFile)

        PropertyDataFile = '%s/properties_2016.csv' % InputDir

        ## load
        ActionData = pd.read_csv(ActionDataFile, parse_dates=['transactiondate'])
        PropertyData = pd.read_csv(PropertyDataFile)

        ## left join
        MergedData = ActionData.merge(PropertyData, how='left', on='parcelid')

        ## output into pkl file
        if (os.path.exists(OutputDir) == False):
            os.makedirs(OutputDir)
        with open(OutputFile, 'wb') as o_file:
            pickle.dump(MergedData, o_file, -1)
        o_file.close()

        return

    ## split rawcensustractandblock into census, tract and block
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号