def concatenate_sort(out_filename, in_filenames, sort_cols, metrics=None):
in_mcs = [MoleculeCounter.open(f, 'r') for f in in_filenames]
out_mc = MoleculeCounter.open(out_filename, mode='w')
if metrics is None:
metrics = in_mcs[0].get_all_metrics()
out_mc.set_all_metrics(metrics)
for col, array in in_mcs[0].ref_columns.iteritems():
out_mc.set_ref_column(col, array[:])
sort_array = []
# reverse sort columns so they get sorted in the right order
for col in reversed(sort_cols):
sort_array.append(np.concatenate([mc.get_column(col) for mc in in_mcs]))
sort_index = np.lexsort(sort_array)
for col in MOLECULE_INFO_COLUMNS:
col_sorted = np.concatenate([mc.get_column(col) for mc in in_mcs])[sort_index]
out_mc.add_many(col, col_sorted)
for mc in in_mcs:
mc.close()
out_mc.save()
评论列表
文章目录