def set_dataconf_for_checktype(self, df, node_id, data_dfconf_list):
"""
csv? ?? column type? ???? data_conf? ??(data_conf? ????? )
???? ??? Unique ? ?? ??? cell_feature_unique? ???(Keras?)
:param wf_data_config, df, nnid, ver, node:
:param conf_data:
"""
try:
#TODO : set_default_dataconf_from_csv ???? ?? ??
data_conf = dict()
data_conf_unique_v = dict()
data_conf_col_unique_v = dict()
data_conf_col_type = dict()
numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
# Wdnn??? data_dfconf? ??? ??? ?? ??? ??? ?? ??? ??
if len(data_dfconf_list) > 0:
_wf_data_conf = wf_data_conf(data_dfconf_list)
_cell_feature_unique = _wf_data_conf.cell_feature_unique if hasattr(_wf_data_conf,
'cell_feature_unique') else list() # ?? ???? ????? ??? ? ??? ??
for i, v in df.dtypes.iteritems():
# label
column_dtypes = dict()
column_unique_value = dict()
if (str(v) in numerics): # maybe need float
col_type = 'CONTINUOUS'
columns_unique_value = list()
else:
col_type = 'CATEGORICAL'
columns_unique_value = pd.unique(df[i].fillna('').values.ravel()).tolist() # null?? ???
column_dtypes['column_type'] = col_type
origin_feature_unique = _cell_feature_unique[i].get('column_u_values') if (i in _cell_feature_unique) else list()
combined_col_u_list = utils.get_combine_label_list(origin_feature_unique, columns_unique_value)
column_unique_value['column_u_values'] = combined_col_u_list #???? ???? ?? ????.
data_conf_col_type[i] = column_dtypes
data_conf_col_unique_v[i] = column_unique_value
data_conf['cell_feature'] = data_conf_col_type
data_conf_unique_v['cell_feature_unique'] = data_conf_col_unique_v
data_conf_json_str = json.dumps(data_conf) #Json?? ???
data_conf_json = json.loads(data_conf_json_str)
data_conf_unique_json_str = json.dumps(data_conf_unique_v)
data_conf_unique_json = json.loads(data_conf_unique_json_str)
return data_conf_json, data_conf_unique_json
except Exception as e:
logging.error("set_dataconf_for_checktype {0} {1}".format(e, e.__traceback__.tb_lineno))
评论列表
文章目录