def ohEncoding(data, cols=None, replace=False):
if cols is None:
cols = []
for el, v in data.dtypes.iteritems():
if v == 'object':
cols.append(el)
print "Categorical features not set, detected as categorical: %s" % str(cols)
vec = DictVectorizer()
mkdict = lambda row: dict((col, row[col]) for col in cols)
vecData = pd.DataFrame(vec.fit_transform(data[cols].apply(mkdict, axis=1)).toarray())
vecData.columns = vec.get_feature_names()
vecData.index = data.index
if replace is True:
data = data.drop(cols, axis=1)
data = data.join(vecData)
return data, vecData, vec
评论列表
文章目录