def gen_features(train, y, test):
ntrain = len(train)
df_all = pd.concat([train, test])
poly = preprocessing.PolynomialFeatures(degree=3)
dpoly = poly.fit_transform(df_all)
df_all['ap_diff'] = df_all.ap_hi - df_all.ap_lo
h = df_all['height'] / 100
df_all['BWI'] = df_all['weight'] / (h * h)
df_all['bad_bwi'] = (df_all.BWI > 60).values * 1 + (df_all.BWI < 10).values * 1
df_all['bad_height'] = (df_all.height < 130).values * 1
df_all['bad_weight'] = (df_all.weight + 120 < df_all.height).values * 1
df_all['bad_ap_hi'] = 0
df_all.ix[(df_all.ap_hi < 80).values + (df_all.ap_hi > 220).values, 'bad_ap_hi'] = 1
df_all['bad_ap_lo'] = 0
df_all.ix[(df_all.ap_lo < 40).values + (df_all.ap_lo > 200).values, 'bad_ap_lo'] = 1
df_all['has_bad_data'] = (df_all.bad_bwi + df_all.bad_height + df_all.bad_weight + df_all.bad_ap_hi + df_all.bad_ap_lo) > 0
return df_all[:ntrain].reindex(), y, df_all[ntrain:].reindex()
评论列表
文章目录