tflearn_wide_and_deep.py 文件源码-python代码片段

def prepare_input_data(self, input_data, name="", category_map=None):
        '''
        Prepare input data dicts
        '''
        print ("-"*40 + " Preparing %s" % name)
        X = input_data[self.continuous_columns].values.astype(np.float32)
        Y = input_data[self.label_column].values.astype(np.float32)
        Y = Y.reshape([-1, 1])
        if self.verbose:
            print ("  Y shape=%s, X shape=%s" % (Y.shape, X.shape))

        X_dict = {"wide_X": X}

        if 'deep' in self.model_type:
            # map categorical value strings to integers
            td = input_data
            if category_map is None:
                category_map = {}
                for cc in self.categorical_columns:
                    if not cc in td.columns:
                        continue
                    cc_values = sorted(td[cc].unique())
                    cc_max = 1+len(cc_values)
                    cc_map = dict(zip(cc_values, range(1, cc_max))) # start from 1 to avoid 0:0 mapping (save 0 for missing)
                    if self.verbose:
                        print ("  category %s max=%s,  map=%s" % (cc, cc_max, cc_map))
                    category_map[cc] = cc_map

            td = td.replace(category_map)

            # bin ages (cuts off extreme values)
            age_bins = [ 0, 12, 18, 25, 30, 35, 40, 45, 50, 55, 60, 65, 80, 65535 ]
            td['age_binned'] = pd.cut(td['age'], age_bins, labels=False)
            td = td.replace({'age_binned': {np.nan: 0}})
            print ("  %d age bins: age bins = %s" % (len(age_bins), age_bins))

            X_dict.update({ ("%s_in" % cc): td[cc].values.astype(np.int32).reshape([-1, 1]) for cc in self.categorical_columns})

        Y_dict = {"Y": Y}
        if self.verbose:
            print ("-"*40)
        return X_dict, Y_dict, category_map