def mungeskewed(train, test, numeric_feats):
ntrain = train.shape[0]
test['loss'] = 0
train_test = pd.concat((train, test)).reset_index(drop=True)
skewed_feats = train[numeric_feats].apply(lambda x: skew(x.dropna()))
skewed_feats = skewed_feats[skewed_feats > 0.25]
skewed_feats = skewed_feats.index
for feats in skewed_feats:
train_test[feats] = train_test[feats] + 1
train_test[feats], lam = boxcox(train_test[feats])
return train_test, ntrain
评论列表
文章目录