def fit():
X, y = generate()
dX = dd.from_pandas(X, npartitions=10)
y = dd.from_pandas(y, npartitions=10)
pre_pipe = make_pipeline(
CategoricalEncoder(),
DummyEncoder(),
Imputer(),
SGDRegressor(),
)
pipe = make_pipeline(
SelectFromModel(pre_pipe),
GradientBoostingRegressor(),
)
X_ = pre_pipe.fit_transform(dX)
for i in range(X_.npartitions):
for j in range(5):
print(i, j)
X_sub = X_.get_partition(i).compute()
y_sub = y.get_partition(i).compute()
clf.partial_fit(X_sub, y_sub)
sfm = SelectFromModel(clf, prefit=True)
return pipe, clf, sfm
评论列表
文章目录