def onehot_encode(tr,te,cols=None):
if cols is None:
cols = [i for i in tr.columns.values if i in te.columns.values]
vec = DictVectorizer()
for col in cols:
tr[col] = tr[col].map(str)
te[col] = te[col].map(str)
print("start fitting")
X = vec.fit_transform(tr[cols].T.to_dict().values())
Xt = vec.transform(te[cols].T.to_dict().values())
print("done fitting",X.shape,Xt.shape)
return X,Xt
评论列表
文章目录