def Xy_matrix(df_qual_and_race, columns, df_wet):
df_q_r_out = df_qual_and_race.loc[:, columns].reset_index(drop=1)
df_q_r_out = df_q_r_out[(pd.isnull(
df_q_r_out[y_label]) == False) & (pd.isnull(df_q_r_out.q_min) == False)].reset_index(drop=1)
X = df_q_r_out.loc[:, ['q_min', 'position_qual', 'raceId', 'circuitId',
'driverId', 'year', 'round', 'dob', y_label]]
# birth year / mo
X['birth_year'] = map(lambda x: int(x.year), df_q_r_out['dob'])
X['birth_mo'] = map(lambda x: int(x.month), df_q_r_out['dob'])
X.drop('dob', axis=1, inplace=1)
# adding wet as a feature
# weather data
df_races = d['races'].copy()
# df_races.head()
X = X.merge(df_wet.drop(['circuitId'], 1),
how='left', on=['year', 'round'])
# pit stop
df_pits = d['pitStops'].groupby(['raceId', 'driverId'], as_index=0)[
'milliseconds'].sum()
df_pits.reset_index(drop=1, inplace=1)
X_y = X.merge(df_pits, how='left', on=['raceId', 'driverId'])
X_y.fillna(0, inplace=1)
return X_y
评论列表
文章目录