def preprocess(file,istrian):
df=pd.read_csv(file,parse_dates=['Date'],dayfirst=True)
end_missing=['Average_Atmospheric_Pressure','Max_Atmospheric_Pressure',
'Min_Atmospheric_Pressure','Min_Ambient_Pollution','Max_Ambient_Pollution']
df=df.fillna(-1)
if istrian:
outcome=df.Footfall
df=df.drop(['Footfall'],axis=1)
else:
outcome=np.nan
df['month']=df['Date'].apply(lambda x: x.month)
df['date']=df['Date'].apply(lambda x: x.day)
df['weekday']=df['Date'].apply(lambda x: x.weekday())
df['sardiya']=df['month'].apply(lambda x: 1 if x in [1,2,11,12,3] else 0)
df.date=df.date.apply(get_normal_date)
dummies=pd.get_dummies(df.Park_ID,prefix='park')
dummies=pd.get_dummies(df.Location_Type,prefix='location')
df['Direction_Of_Wind2']=df.Direction_Of_Wind.apply(get_wind_dir)
return df,outcome
#load training set
评论列表
文章目录