python类open_dataset()的实例源码

testing.py 文件源码 项目:sympl 作者: mcgibbon 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def load_dictionary(filename):
    dataset = xr.open_dataset(filename, engine='scipy')
    return dict(dataset.data_vars)
netcdf.py 文件源码 项目:sympl 作者: mcgibbon 项目源码 文件源码 阅读 36 收藏 0 点赞 0 评论 0
def load(self):
        """
        Load the state from the restart file.

        Returns
        -------
        state : dict
            The model state stored in the restart file.
        """
        dataset = xr.open_dataset(self._filename)
        state = {}
        for name, value in dataset.data_vars.items():
            state[name] = DataArray(value[0, :])  # remove time axis
        state['time'] = datetime64_to_datetime(dataset['time'][0])
        return state
test_fractional_cover.py 文件源码 项目:fc 作者: GeoscienceAustralia 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def open_dataset(file_path):
    ds = xr.open_dataset(file_path, mask_and_scale=False, drop_variables='crs')
    ds.attrs['crs'] = datacube.utils.geometry.CRS('EPSG:32754')
    return ds
io.py 文件源码 项目:PyPSA 作者: PyPSA 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def __init__(self, path):
            self.path = path
            if isinstance(path, string_types):
                self.ds = xr.open_dataset(path)
data_set.py 文件源码 项目:auDeep 作者: auDeep 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def load(path: Path) -> DataSet:
    """
    Loads a data set from the specified NetCDF4 file.

    Parameters
    ----------
    path: pathlib.Path
        Path to the file which should be loaded.

    Returns
    -------
    DataSet
        The data set loaded from the specified file
    """
    log = logging.getLogger(__name__)
    log.info("loading data set from %s", path)

    data = xr.open_dataset(str(path))  # type: xr.Dataset

    # restore data types
    data[_DataVar.FILENAME] = data[_DataVar.FILENAME].astype(np.object).fillna(None)
    data[_DataVar.CHUNK_NR] = data[_DataVar.CHUNK_NR].astype(np.object).fillna(None)
    data[_DataVar.CV_FOLDS] = data[_DataVar.CV_FOLDS].astype(np.object).fillna(None)
    data[_DataVar.PARTITION] = data[_DataVar.PARTITION].astype(np.object).fillna(None)
    data[_DataVar.LABEL_NOMINAL] = data[_DataVar.LABEL_NOMINAL].astype(np.object).fillna(None)
    data[_DataVar.LABEL_NUMERIC] = data[_DataVar.LABEL_NUMERIC].astype(np.object)
    data[_DataVar.FEATURES] = data[_DataVar.FEATURES].astype(np.float32)

    return DataSet(data=data,
                   mutable=False)
netcdf.py 文件源码 项目:earthio 作者: ContinuumIO 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def load_netcdf_array(datafile, meta, layer_specs=None):
    '''
    Loads metadata for NetCDF

    Parameters:
        :datafile: str: Path on disk to NetCDF file
        :meta: dict: netcdf metadata object
        :variables: dict<str:str>, list<str>: list of variables to load

    Returns:
        :new_es: xr.Dataset
    '''
    logger.debug('load_netcdf_array: {}'.format(datafile))
    ds = xr.open_dataset(datafile)
    if layer_specs:
        data = []
        if isinstance(layer_specs, dict):
            data = { k: ds[getattr(v, 'name', v)] for k, v in layer_specs.items() }
            layer_spec = tuple(layer_specs.values())[0]
        if isinstance(layer_specs, (list, tuple)):
            data = {getattr(v, 'name', v): ds[getattr(v, 'name', v)]
                    for v in layer_specs }
            layer_spec = layer_specs[0]
        data = OrderedDict(data)
    else:
        data = OrderedDict([(v, ds[v]) for v in meta['variables']])
        layer_spec = None
    geo_transform = take_geo_transform_from_meta(layer_spec=layer_spec,
                                                 required=True,
                                                 **meta)
    for b, sub_dataset_name in zip(meta['layer_meta'], data):
        b['geo_transform'] = meta['geo_transform'] = geo_transform
        b['sub_dataset_name'] = sub_dataset_name
    new_es = xr.Dataset(data,
                    coords=_normalize_coords(ds),
                    attrs=meta)
    return new_es
test_modelgrids.py 文件源码 项目:oocgcm 作者: lesommer 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def __call__(self,filename=None,varname=None):
    if self.array_type == 'numpy':
            out = Dataset(filename).variables[varname][:].squeeze()
        elif self.array_type == 'xarray':
            ds = xr.open_dataset(filename,chunks=self.chunks,lock=False)
            out = ds[varname]
    elif self.array_type == 'dask_from_numpy':
        d = Dataset(filename).variables[varname][:].squeeze()
        out = da.from_array(np.array(d), chunks=self.chunks)
        elif self.array_type == 'dask_from_netcdf':
            d = Dataset(filename).variables[varname]
            out = da.from_array(d, chunks=self.chunks)
        return out
io.py 文件源码 项目:oocgcm 作者: lesommer 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def return_xarray_dataset(filename,chunks=None,**kwargs):
    """Return an xarray dataset corresponding to filename.

    Parameters
    ----------
    filename : str
        path to the netcdf file from which to create a xarray dataset
    chunks : dict-like
        dictionnary of sizes of chunk for creating xarray.Dataset.

    Returns
    -------
    ds : xarray.Dataset
    """
    return xr.open_dataset(filename,chunks=chunks,**kwargs)
utils.py 文件源码 项目:xcesm 作者: Yefee 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def open_data(self, **kwargs):

        data = self.get_path()
        if self.iTRACE_flag:
            ico = xr.open_mfdataset(data['ico'], **kwargs).sortby('time')
            ice = xr.open_mfdataset(data['ice'], **kwargs).sortby('time')
            igo = xr.open_mfdataset(data['igo'], **kwargs).sortby('time')
            igom = xr.open_mfdataset(data['igom'], **kwargs).sortby('time')
            return ice, ico, igo, igom
        else:
            if len(data) > 1:
                return xr.open_mfdataset(data, **kwargs).sortby('time')
            else:
                return xr.open_dataset(data[0], **kwargs).sortby('time')
read_netcdf.py 文件源码 项目:pycovjson 作者: Reading-eScience-Centre 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def __init__(self, dataset_path):
        self.dataset_path = dataset_path
        try:
            self.dataset = xr.open_dataset(self.dataset_path)

        except OSError:
            print('File not found.')
            exit()
read_netcdf.py 文件源码 项目:pycovjson 作者: Reading-eScience-Centre 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def read(self, file_path):
        self.file_path = file_path
        self.dataset = xr.open_dataset(self.file_path)
        self.var_names = self.get_var_names(self.dataset)
read_netcdf.py 文件源码 项目:pycovjson 作者: Reading-eScience-Centre 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def get_xarray(self):
        self.dataset = xr.open_dataset(self.dataset_path)
        return self.dataset
data.py 文件源码 项目:psyplot 作者: Chilipp 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def open_mfdataset(paths, decode_cf=True, decode_times=True,
                   decode_coords=True, engine=None, gridfile=None,
                   t_format=None, **kwargs):
    """
    Open multiple files as a single dataset.

    This function is essentially the same as the :func:`xarray.open_mfdataset`
    function but (as the :func:`open_dataset`) supports additional decoding
    and the ``'gdal'`` engine.
    You can further specify the `t_format` parameter to get the time
    information from the files and use the results to concatenate the files

    Parameters
    ----------
    %(xarray.open_mfdataset.parameters.no_engine)s
    %(open_dataset.parameters.engine)s
    %(get_tdata.parameters.t_format)s
    %(CFDecoder.decode_coords.parameters.gridfile)s

    Returns
    -------
    xarray.Dataset
        The dataset that contains the variables from `filename_or_obj`"""
    if t_format is not None or engine == 'gdal':
        if isinstance(paths, six.string_types):
            paths = sorted(glob(paths))
        if not paths:
            raise IOError('no files to open')
    if t_format is not None:
        time, paths = get_tdata(t_format, paths)
        kwargs['concat_dim'] = time
    if engine == 'gdal':
        from psyplot.gdal_store import GdalStore
        paths = list(map(GdalStore, paths))
        engine = None
        kwargs['lock'] = False

    ds = xr.open_mfdataset(
        paths, decode_cf=decode_cf, decode_times=decode_times, engine=engine,
        decode_coords=False, **kwargs)
    if decode_cf:
        return CFDecoder.decode_ds(ds, gridfile=gridfile, inplace=True,
                                   decode_coords=decode_coords,
                                   decode_times=decode_times)
    return ds
nldas_soil_moisture_ml.py 文件源码 项目:elm 作者: ContinuumIO 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def get_nldas_fora_X_and_vic_y(year, month, day, hour,
                           vic_or_fora, band_order=None,
                           prefix=None, data_arrs=None,
                           keep_columns=None):
    '''Load data from VIC for NLDAS Forcing A Grib files

    Parameters:
        year: year of forecast time
        month: month of forecast time
        day: day of forecast time
        vic_or_fora: string indicating which NLDAS data source
        band_order: list of DataArray names already loaded
        prefix: add a prefix to the DataArray name from Grib
        data_arrs: Add the DataArrays to an existing dict
        keep_columns: Retain only the DataArrays in this list, if given
    Returns:
        tuple or (data_arrs, band_order) where data_arrs is
        an OrderedDict of DataArrays and band_order is their
        order when they are flattened from rasters to a single
        2-D matrix
    '''
    data_arrs = data_arrs or OrderedDict()
    band_order = band_order or []
    path = get_file(year, month, day, hour, dset=vic_or_fora)
    dset = xr.open_dataset(path, engine='pynio')
    for k in dset.data_vars:
        if keep_columns and k not in keep_columns:
            continue
        arr = getattr(dset, k)
        if sorted(arr.dims) != ['lat_110', 'lon_110']:
            continue
        #print('Model: ',f, 'Param:', k, 'Detail:', arr.long_name)
        lon, lat = arr.lon_110, arr.lat_110
        geo_transform = [lon.Lo1, lon.Di, 0.0,
                         lat.La1, 0.0, lat.Dj]
        shp = arr.shape
        canvas = Canvas(geo_transform, shp[1], shp[0], arr.dims)
        arr.attrs['canvas'] = canvas
        if prefix:
            band_name = '{}_{}'.format(prefix, k)
        else:
            band_name = k
        data_arrs[band_name] = arr
        band_order.append(band_name)
    return data_arrs, band_order
gard_utils.py 文件源码 项目:storylines 作者: jhamman 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def get_filelist(pattern, date_range=None, timevar='time', calendar=None):
    '''given a glob pattern, return a list of files between daterange'''
    files = glob.glob(pattern)

    if date_range is not None:
        date_range = pd.to_datetime(list(date_range)).values
        sublist = []
        for f in files:
            try:
                kwargs = dict(mask_and_scale=False, concat_characters=False,
                              decode_coords=False)
                if calendar:
                    ds = xr.open_dataset(f, decode_cf=False,
                                         decode_times=False, **kwargs)

                    if (('XTIME' in ds) and not
                            ('calendar' not in ds['XTIME'].attrs)):
                        ds['XTIME'].attrs['calendar'] = calendar

                    elif 'calendar' not in ds[timevar].attrs:
                        ds[timevar].attrs['calendar'] = calendar
                    # else decode using callendar attribute in file

                    ds = xr.decode_cf(ds, decode_times=True, **kwargs)
                else:
                    ds = xr.open_dataset(f, decode_cf=True, decode_times=True,
                                         **kwargs)
            except Exception as e:
                warnings.warn('failed to open {}: {}'.format(f, e))

            try:
                ds[timevar] = ds['XTIME']
            except KeyError:
                pass

            if CHECK_TIMEVARS:
                try:
                    check_times(ds[timevar].values, f=f)
                except ValueError as e:
                    warnings.warn(
                        'time check raised an error for file %s: %s' % (f, e))

            start = ds[timevar].values[0]
            end = ds[timevar].values[-1]
            ds.close()
            if (((start >= date_range[0]) and (start <= date_range[1])) or
                    ((end >= date_range[0]) and (end <= date_range[1])) or
                    (start <= date_range[0]) and (end >= date_range[1])):
                sublist.append(f)
        files = sublist
    files.sort()
    return files
idw.py 文件源码 项目:climate 作者: FIDS-UWO 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def load_data(file, varname, extent=None, period=None, **kwargs):
    """
    Loads netCDF files and extracts data given a spatial extend and time period
    of interest.
    """
    # Open either single or multi-file data set depending if list of wildcard
    if "*" in file or isinstance(file, list):
        ds = xr.open_mfdataset(file, decode_times=False)
    else:
        ds = xr.open_dataset(file, decode_times=False)

    # Construct condition based on spatial extents
    if extent:
        n, e, s, w = extent
        ds = ds.sel(lat=(ds.lat >= s) & (ds.lat <= n))
        # Account for extent crossing Greenwich
        if w > e:
            ds = ds.sel(lon=(ds.lon >= w) | (ds.lon <= e))
        else:
            ds = ds.sel(lon=(ds.lon >= w) & (ds.lon <= e))

    # Construct condition base on time period
    if period:
        t1 = date2num(datetime(*period[0]), ds.time.units, ds.time.calendar)
        t2 = date2num(datetime(*period[1]), ds.time.units, ds.time.calendar)
        ds = ds.sel(time=(ds.time >= t1) & (ds.time <= t2))

    # Extra keyword arguments to select from additional dimensions (e.g. plev)
    if kwargs:
        ds = ds.sel(**kwargs)

    # Load in the data to a numpy array
    dates = num2date(ds.time, ds.time.units, ds.time.calendar)
    arr = ds[varname].values
    lat = ds.lat.values
    lon = ds.lon.values

    # Convert pr units to mm/day
    if ds[varname].units == 'kg m-2 s-1':
        arr *= 86400
    # Convert tas units to degK
    elif ds[varname].units == 'K':
        arr -= 273.15

    return arr, lat, lon, dates
io.py 文件源码 项目:experiment 作者: darothen 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def load_variable(var_name, path_to_file, squeeze=False,
                  fix_times=True, **extr_kwargs):
    """ Interface for loading an extracted variable into memory, using
    either iris or xarray. If `path_to_file` is instead a raw dataset,
    then the entire contents of the file will be loaded!

    Parameters
    ----------
    var_name : string
        The name of the variable to load
    path_to_file : string
        Location of file containing variable
    squeeze : bool
        Load only the requested field (ignore all others) and
        associated dims
    fix_times : bool
        Correct the timestamps to the middle of the bounds
        in the variable metadata (CESM puts them at the right
        boundary which sucks!)
    extr_kwargs : dict
        Additional keyword arguments to pass to the extractor

    """

    logger.info("Loading %s from %s" % (var_name, path_to_file))

    ds = xr.open_dataset(path_to_file, decode_cf=False, **extr_kwargs)

    # TODO: Revise this logic as part of generalizing time post-processing.
    # Fix time unit, if necessary
    # interval, timestamp = ds.time.units.split(" since ")
    # timestamp = timestamp.split(" ")
    # yr, mm, dy = timestamp[0].split("-")
    #
    # if int(yr) < 1650:
    #     yr = 2001
    # yr = str(yr)
    #
    # # Re-construct at Jan 01, 2001 and re-set
    # timestamp[0] = "-".join([yr, mm, dy])
    # new_units = " ".join([interval, "since"] + timestamp)
    # ds.time.attrs['units'] = new_units

    # TODO: Generalize time post-processing.
    # if fix_times:
    #     assert hasattr(ds, 'time_bnds')
    #     bnds = ds.time_bnds.values
    #     mean_times = np.mean(bnds, axis=1)
    #
    #     ds.time.values = mean_times

    # Be pedantic and check that we don't have a "missing_value" attr
    for field in ds:
        if hasattr(ds[field], 'missing_value'):
            del ds[field].attrs['missing_value']

    # Lazy decode CF
    # TODO: There's potentially a bug where decode_cf eagerly loads dask arrays
    # ds = xr.decode_cf(ds)

    return ds


问题


面经


文章

微信
公众号

扫码关注公众号