def build(self):
train, _, test, _ = data.get()
cset = []
ntrain = len(train)
df = pd.concat([train, test], axis=0)
to_drop = df.columns
for sc in ['height', 'weight', 'ap_hi', 'ap_lo']:
tc = df[sc].apply(str)
maxc = tc.apply(len).max()
for n in range(maxc):
df['ft_l_'+sc+'_'+str(n)] = tc.apply(lambda s:ord(s[n]) if n < len(s) else -1)
df['ft_r_'+sc+'_'+str(n)] = tc.apply(lambda s:ord(s[-n]) if n < len(s) else -1)
cset.append('ft_l_'+sc+'_'+str(n))
cset.append('ft_r_'+sc+'_'+str(n))
df = pd.get_dummies(df, columns=cset).drop(to_drop, axis=1)
self.train_= df[:ntrain]
self.test_ = df[ntrain:]
return self.train_, self.test_, None
评论列表
文章目录