def add_date_features_df(col_data, date_col):
# Pandas nicely tries to prevent you from doing stupid things, like setting values on a copy of a df, not your real one
# However, it's a bit overzealous in this case, so we'll side-step a bunch of warnings by setting is_copy to false here
result = {}
col_data = pd.to_datetime(col_data)
if pandas_version < '0.20.0':
result[date_col + '_day_of_week'] = col_data.apply(lambda x: x.weekday()).astype(int, raise_on_error=False)
else:
result[date_col + '_day_of_week'] = col_data.apply(lambda x: x.weekday()).astype(int, errors='ignore')
try:
if pandas_version < '0.20.0':
result[date_col + '_hour'] = col_data.apply(lambda x: x.hour).astype(int, raise_on_error=False)
else:
result[date_col + '_hour'] = col_data.apply(lambda x: x.hour).astype(int, errors='ignore')
result[date_col + '_minutes_into_day'] = col_data.apply(lambda x: x.hour * 60 + x.minute)
result[date_col + '_hour'] = result[date_col + '_hour'].fillna(0)
result[date_col + '_minutes_into_day'] = result[date_col + '_minutes_into_day'].fillna(0)
except AttributeError:
pass
result[date_col + '_is_weekend'] = col_data.apply(lambda x: x.weekday() in (5,6))
result[date_col + '_day_part'] = result[date_col + '_minutes_into_day'].apply(minutes_into_day_parts)
result[date_col + '_day_of_week'] = result[date_col + '_day_of_week'].fillna(0)
result[date_col + '_is_weekend'] = result[date_col + '_is_weekend'].fillna(0)
result[date_col + '_day_part'] = result[date_col + '_day_part'].fillna(0)
return result
# Same logic as above, except implemented for a single dictionary, which is much faster at prediction time when getting just a single prediction
评论列表
文章目录