def district_hash_map(data_frame):
district_map_f = "cluster_map.pickle"
district_map_f_path = os.path.join(DATA_DIR, CONCRETE_DIR, CLUSTER_MAP_SHEET_DIR,
district_map_f)
if not os.path.exists(district_map_f_path):
create_hash_district_map_dict()
# load the needed map file
with open(district_map_f_path, "rb") as f:
map_rule = pickle.load(f)
# map the needed cols..
for i in range(len(data_frame.columns)):
if "district_hash" in data_frame.columns[i]:
# map the hash according to the map rule
district_hash_col = data_frame.columns[i]
data_frame[district_hash_col] = data_frame[district_hash_col].replace(map_rule)
# after mapping, delete its hash str
new_name = re.sub("_hash","",district_hash_col)
data_frame.rename(columns={district_hash_col: new_name}, inplace = True)
return data_frame
## input the dir you want to map the hash
评论列表
文章目录