def OHETr(self, tr):
""""""
OHEDict = {}
for col in tr.columns:
ValueCounts = [str(int(v)) for v in tr[col].value_counts().index.values]
ValueCounts.append('missing')
SelectedValues = dict((k, v) for (v, k) in enumerate(ValueCounts, start=0))
OHTr = self.__ApplyOH(tr[col].values, SelectedValues)
headers = dict((('%s_%s' % (col, k)), SelectedValues[k]) for k in SelectedValues)
tmp = [v[0] for v in sorted(headers.items(), key=lambda x: x[1])]
OHDFTr = pd.DataFrame(OHTr, index=tr.index, columns=tmp)
tr = pd.concat([tr, OHDFTr], axis=1)
tr.drop(col, axis=1, inplace=True)
OHEDict[col] = SelectedValues
#print('Column %s was encoded.' % col)
return tr, OHEDict
评论列表
文章目录