def __ParseCTB(cls, InputDir, OutputDir, mode):
""""""
if(mode == 'train'):
InputFile = '%s/train.pkl' % InputDir
OutputFile = '%s/train.pkl' % OutputDir
else:
InputFile = '%s/test.pkl' % InputDir
OutputFile = '%s/test.pkl' % OutputDir
## load
with open(InputFile, 'rb') as i_file:
df_data = pickle.load(i_file)
i_file.close()
## extract census, tract and block identifies
df_data['rawcensustractandblock'] = (df_data['rawcensustractandblock'] * 1000).astype(np.float64).astype(np.int64)
df_data['fipsid'] = ((df_data['rawcensustractandblock'] / 10000000).astype(np.int64)).astype(str)
df_data['tractandblock'] = df_data['rawcensustractandblock'] % 10000000
df_data['tractid'] = ((df_data['tractandblock'] / 10).astype(np.int64)).astype(str)
df_data['blockid'] = ((df_data['tractandblock'] % 10).astype(np.int64)).astype(str)
df_data.drop(['fips', 'rawcensustractandblock', 'tractandblock'], axis = 1, inplace = True)
## output into pkl file
if (os.path.exists(OutputDir) == False):
os.makedirs(OutputDir)
with open(OutputFile, 'wb') as o_file:
pickle.dump(df_data, o_file, -1)
o_file.close()
return
评论列表
文章目录