Preprocess.py 文件源码

python
阅读 20 收藏 0 点赞 0 评论 0

项目:kaggle 作者: RankingAI 项目源码 文件源码
def __ParseCTB(cls, InputDir, OutputDir, mode):
        """"""
        if(mode == 'train'):
            InputFile = '%s/train.pkl' % InputDir
            OutputFile = '%s/train.pkl' % OutputDir
        else:
            InputFile = '%s/test.pkl' % InputDir
            OutputFile = '%s/test.pkl' % OutputDir

        ## load
        with open(InputFile, 'rb') as i_file:
            df_data = pickle.load(i_file)
        i_file.close()

        ## extract census, tract and block identifies
        df_data['rawcensustractandblock'] = (df_data['rawcensustractandblock'] * 1000).astype(np.float64).astype(np.int64)
        df_data['fipsid'] = ((df_data['rawcensustractandblock'] / 10000000).astype(np.int64)).astype(str)
        df_data['tractandblock'] = df_data['rawcensustractandblock'] % 10000000
        df_data['tractid'] = ((df_data['tractandblock'] / 10).astype(np.int64)).astype(str)
        df_data['blockid'] = ((df_data['tractandblock'] % 10).astype(np.int64)).astype(str)
        df_data.drop(['fips', 'rawcensustractandblock', 'tractandblock'], axis = 1, inplace = True)

        ## output into pkl file
        if (os.path.exists(OutputDir) == False):
            os.makedirs(OutputDir)
        with open(OutputFile, 'wb') as o_file:
            pickle.dump(df_data, o_file, -1)
        o_file.close()

        return
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号