def quantize_data(x, y, wc=None, continuous_rate=0.1, separate=False):
if isinstance(x, list):
xt = map(list, zip(*x))
else:
xt = x.T
features = [set(feat) for feat in xt]
if wc is None:
wc = np.array([len(feat) >= int(continuous_rate * len(y)) for feat in features])
else:
wc = np.asarray(wc)
feat_dics = [{_l: i for i, _l in enumerate(feats)} if not wc[i] else None
for i, feats in enumerate(features)]
if not separate:
if np.all(~wc):
dtype = np.int
else:
dtype = np.double
x = np.array([[feat_dics[i][_l] if not wc[i] else _l for i, _l in enumerate(sample)]
for sample in x], dtype=dtype)
else:
x = np.array([[feat_dics[i][_l] if not wc[i] else _l for i, _l in enumerate(sample)]
for sample in x], dtype=np.double)
x = (x[:, ~wc].astype(np.int), x[:, wc])
label_dic = {_l: i for i, _l in enumerate(set(y))}
y = np.array([label_dic[yy] for yy in y], dtype=np.int8)
label_dic = {i: _l for _l, i in label_dic.items()}
return x, y, wc, features, feat_dics, label_dic
评论列表
文章目录