hdf5.py 文件源码-python代码片段

hdf5.py 文件源码

python

阅读 28 收藏 0 点赞 0 评论 0

项目：cellranger 作者: 10XGenomics 项目源码文件源码

def read_data_frame(fn, query_cols=[]):
    ''' Load a pandas DataFrame from an HDF5 file. If a column list is specified, only load the matching columns '''

    with h5py.File(fn, 'r') as f:

        column_names = f.attrs.get("column_names")
        column_names = get_column_intersection(column_names, query_cols)

        df = p.DataFrame()

        # Add the columns progressively to save memory
        for name in column_names:
            ds = f[name]
            if has_levels(ds):
                indices = ds[:]
                uniques = get_levels(ds)
                # This method of constructing of Categorical avoids copying the indices array
                # which saves memory for big datasets
                df[name] = p.Categorical(indices, categories=uniques, ordered=False, fastpath=True)
            else:
                df[name] = p.Series(ds[:])

        return df