def prepare_input_data(self, input_data, name="", category_map=None):
'''
Prepare input data dicts
'''
print ("-"*40 + " Preparing %s" % name)
X = input_data[self.continuous_columns].values.astype(np.float32)
Y = input_data[self.label_column].values.astype(np.float32)
Y = Y.reshape([-1, 1])
if self.verbose:
print (" Y shape=%s, X shape=%s" % (Y.shape, X.shape))
X_dict = {"wide_X": X}
if 'deep' in self.model_type:
# map categorical value strings to integers
td = input_data
if category_map is None:
category_map = {}
for cc in self.categorical_columns:
if not cc in td.columns:
continue
cc_values = sorted(td[cc].unique())
cc_max = 1+len(cc_values)
cc_map = dict(zip(cc_values, range(1, cc_max))) # start from 1 to avoid 0:0 mapping (save 0 for missing)
if self.verbose:
print (" category %s max=%s, map=%s" % (cc, cc_max, cc_map))
category_map[cc] = cc_map
td = td.replace(category_map)
# bin ages (cuts off extreme values)
age_bins = [ 0, 12, 18, 25, 30, 35, 40, 45, 50, 55, 60, 65, 80, 65535 ]
td['age_binned'] = pd.cut(td['age'], age_bins, labels=False)
td = td.replace({'age_binned': {np.nan: 0}})
print (" %d age bins: age bins = %s" % (len(age_bins), age_bins))
X_dict.update({ ("%s_in" % cc): td[cc].values.astype(np.int32).reshape([-1, 1]) for cc in self.categorical_columns})
Y_dict = {"Y": Y}
if self.verbose:
print ("-"*40)
return X_dict, Y_dict, category_map
评论列表
文章目录