def build(self):
train, y, test, _ = data.get()
ntrain = len(train)
df = pd.concat([train, test], axis=0)
to_drop = df.columns
dcn = []
for n in [2, 5, 10, 15, 25]:
cname = 'kmeans_' + str(n)
dcn.append(cname)
df[cname] = cluster.KMeans(n_clusters=n).fit_predict(df)
df = pd.get_dummies(df, columns=dcn)
df = df.drop(to_drop, axis=1)
train = df[:ntrain]
test = df[ntrain:].copy()
return train.astype('int32'), test.astype('int32'), None
评论列表
文章目录