utils_data_cleaning.py 文件源码

python
阅读 25 收藏 0 点赞 0 评论 0

项目:auto_ml 作者: ClimbsRocks 项目源码 文件源码
def add_date_features_df(col_data, date_col):
    # Pandas nicely tries to prevent you from doing stupid things, like setting values on a copy of a df, not your real one
    # However, it's a bit overzealous in this case, so we'll side-step a bunch of warnings by setting is_copy to false here

    result = {}

    col_data = pd.to_datetime(col_data)

    if pandas_version < '0.20.0':
        result[date_col + '_day_of_week'] = col_data.apply(lambda x: x.weekday()).astype(int, raise_on_error=False)
    else:
        result[date_col + '_day_of_week'] = col_data.apply(lambda x: x.weekday()).astype(int, errors='ignore')

    try:
        if pandas_version < '0.20.0':
            result[date_col + '_hour'] = col_data.apply(lambda x: x.hour).astype(int, raise_on_error=False)
        else:
            result[date_col + '_hour'] = col_data.apply(lambda x: x.hour).astype(int, errors='ignore')


        result[date_col + '_minutes_into_day'] = col_data.apply(lambda x: x.hour * 60 + x.minute)

        result[date_col + '_hour'] = result[date_col + '_hour'].fillna(0)
        result[date_col + '_minutes_into_day'] = result[date_col + '_minutes_into_day'].fillna(0)
    except AttributeError:
        pass

    result[date_col + '_is_weekend'] = col_data.apply(lambda x: x.weekday() in (5,6))
    result[date_col + '_day_part'] = result[date_col + '_minutes_into_day'].apply(minutes_into_day_parts)

    result[date_col + '_day_of_week'] = result[date_col + '_day_of_week'].fillna(0)
    result[date_col + '_is_weekend'] = result[date_col + '_is_weekend'].fillna(0)
    result[date_col + '_day_part'] = result[date_col + '_day_part'].fillna(0)
    return result

# Same logic as above, except implemented for a single dictionary, which is much faster at prediction time when getting just a single prediction
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号