def write_metadata(hdf5_out, dim, metadata_df, convert_back_to_neg_666):
"""
Writes either column or row metadata to proper node of gctx out (hdf5) file.
Input:
- hdf5_out (h5py): open hdf5 file to write to
- dim (str; must be "row" or "col"): dimension of metadata to write to
- metadata_df (pandas DataFrame): metadata DataFrame to write to file
- convert_back_to_neg_666 (bool): Whether to convert numpy.nans back to "-666",
as per CMap metadata null convention
"""
if dim == "col":
hdf5_out.create_group(col_meta_group_node)
metadata_node_name = col_meta_group_node
elif dim == "row":
hdf5_out.create_group(row_meta_group_node)
metadata_node_name = row_meta_group_node
else:
logger.error("'dim' argument must be either 'row' or 'col'!")
# write id field to expected node
hdf5_out.create_dataset(metadata_node_name + "/id", data=[str(x) for x in metadata_df.index])
metadata_fields = list(metadata_df.columns.copy())
# if specified, convert numpy.nans in metadata back to -666
if convert_back_to_neg_666:
for c in metadata_fields:
metadata_df[[c]] = metadata_df[[c]].replace([numpy.nan], ["-666"])
# write metadata columns to their own arrays
for field in [entry for entry in metadata_fields if entry != "ind"]:
hdf5_out.create_dataset(metadata_node_name + "/" + field,
data=numpy.array(list(metadata_df.loc[:, field])))
评论列表
文章目录