def load_data(in_file):
# read csv file prepared by freddie_data_analysis module
df = pd.read_csv(in_file)
# drop unneeded columns
columns = df.columns.tolist()
for col in columns:
if 'Unnamed' in col:
df.drop(col, axis=1, inplace=True)
df['published_date'] = pd.to_datetime(df['published_date'], errors='coerce')
df.drop(['published_date'], axis=1, inplace=True)
# replace nan values with 0
df.replace([np.inf, -np.inf], np.nan, inplace=True)
df.fillna(0, inplace=True)
# apply get_dummies to particular columns
df = pd.get_dummies(df, prefix=['state'], columns=['property_state'])
df = pd.get_dummies(df, prefix=['ss'], columns=['special_servicer'])
# return prepared dataframe
return df
mf_mrtg_default_model.py 文件源码
python
阅读 25
收藏 0
点赞 0
评论 0
评论列表
文章目录