def read_data_frame_indexed_no_concat(fn, tabix_queries, query_cols = [], coords = True):
''' Read rows from the HDF5 data frame that match each tabix query in the
queries list. A tabix query is in the form ('chr1', 100, 200). query_cols
is a list of columns you want to return. If coords is True, then it it will
return coordinates regardless of query_cols. If coords is False, it will
only return the columns specified in query_cols. Returns a list of pandas
DataFrames, one for each query. '''
f = h5py.File(fn, 'r')
# read the index
tabix_index = read_tabix_index(f)
dfs = []
for q in tabix_queries:
r = _read_data_frame_indexed_sub(f, tabix_index, q, query_cols = query_cols, coords = coords)
dfs.append(r)
f.close()
# Return the union of the queries
return dfs
评论列表
文章目录