def __ApplyOHE(cls, data, d_feat):
""""""
n = len(data)
result = np.zeros((n, len(d_feat)), dtype='int8')
##
d_stat = {}
for i in range(n):
for col in cls.CategoryCols:
v = data.ix[i, col]
if(col not in d_stat):
d_stat[col] = {}
if(pd.isnull(v)):
result[i, d_feat['%s:missing' % col]] = 1
if('missing' in d_stat[col]):
d_stat[col]['missing'] += 1
else:
d_stat[col]['missing'] = 1
elif('%s:%s' % (col, v) in d_feat):
result[i, d_feat['%s:%s' % (col, v)]] = 1
if('hit' in d_stat[col]):
d_stat[col]['hit'] += 1
else:
d_stat[col]['hit'] = 1
else:
result[i, d_feat['%s:less' % col]] = 1
if('less' in d_stat[col]):
d_stat[col]['less'] += 1
else:
d_stat[col]['less'] = 1
## check
for col in d_stat:
if(np.sum(list(d_stat[col].values())) != n):
print('Encoding for column %s error, %d : %d. ' % (col, np.sum(list(d_stat[col].values())),n))
return result
评论列表
文章目录