def clean_data(DT_df, attributes):
"""data preprocessing"""
# DT_df = DT_df.drop(drop_cols, axis=1)
DT_df["fs_scan_amt_pre"] = DT_df["fs_scan_amt_pre"].astype(float)
DT_df["fs_scan_amt_pos"] = DT_df["fs_scan_amt_pos"].astype(float)
DT_df["fs_scan_amt_pos_PF"] = DT_df["fs_scan_amt_pos_PF"].astype(float)
DT_df["dyn_margin_amt_pre"] = DT_df["dyn_margin_amt_pre"].astype(float)
DT_df["dyn_margin_amt_pos"] = DT_df["dyn_margin_amt_pos"].astype(float)
DT_df["dyn_margin_amt_pos_PF"] = DT_df[
"dyn_margin_amt_pos_PF"].astype(float)
DT_df["ctl_grp_ind"] = DT_df["ctl_grp_ind"].astype(int)
DT_df["mailer_version_id"] = DT_df["mailer_version_id"].astype(int)
DT_df["tcm_redeem_md"] = pd.to_numeric(DT_df["tcm_redeem_md"])
for attr in attributes:
DT_df[attr] = DT_df[attr].astype(int)
fields = attributes + ["fs_scan_amt_pre", "fs_scan_amt_pos", "fs_scan_amt_pos_PF", "dyn_margin_amt_pre", "dyn_margin_amt_pos", "dyn_margin_amt_pos_PF",
"ctl_grp_ind", "mailer_version_id", "tcm_redeem_md", "xtra_card_nbr"]
DT_df = DT_df[fields]
return DT_df
评论列表
文章目录