batch.py 文件源码

python
阅读 32 收藏 0 点赞 0 评论 0

项目:dataset 作者: analysiscenter 项目源码 文件源码
def _load_table(self, src, fmt, components=None, *args, **kwargs):
        """ Load a data frame from table formats: csv, hdf5, feather """
        if fmt == 'csv':
            _data = pd.read_csv(src, *args, **kwargs)
        elif fmt == 'feather':
            _data = feather.read_dataframe(src, *args, **kwargs)  # pylint: disable=redefined-variable-type
        elif fmt == 'hdf5':
            _data = pd.read_hdf(src, *args, **kwargs)         # pylint: disable=redefined-variable-type

        # Put into this batch only part of it (defined by index)
        if isinstance(_data, pd.DataFrame):
            _data = _data.loc[self.indices]
        elif isinstance(_data, dd.DataFrame):
            # dask.DataFrame.loc supports advanced indexing only with lists
            _data = _data.loc[list(self.indices)].compute()

        components = tuple(components or self.components)
        for i, comp in enumerate(components):
            setattr(self, comp, _data.iloc[:, i].values)
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号