def apply_quantile_mapping_by_month(input_data, data_to_match, **kwargs):
'''apply quantile mapping by month
'''
obs_groups = data_to_match.groupby('time.month')
input_groups = input_data.groupby('time.month')
results = []
for (key_obs, group_obs), (key_input, group_input) in zip(obs_groups,
input_groups):
results.append(quantile_mapping(group_input, group_obs, **kwargs))
# put the groups back together
new_concat = xr.concat(results, dim='time')
# Now sort the time dimension again
sort_inds = np.argsort(new_concat.time.values)
new_concat = new_concat.isel(time=sort_inds)
return new_concat
python类concat()的实例源码
def load_data(sub_tile_slice, sources):
"""
Load a masked chunk of data from the datacube, based on a specification and list of datasets in `sources`.
:param sub_tile_slice: A portion of a tile, tuple coordinates
:param sources: a dictionary containing `data`, `spec` and `masks`
:return: :class:`xarray.Dataset` containing loaded data. Will be indexed and sorted by time.
"""
datasets = [load_masked_data(sub_tile_slice, source_prod)
for source_prod in sources] # list of datasets
datasets = _remove_emptys(datasets)
if len(datasets) == 0:
raise EmptyChunkException()
# TODO: Add check for compatible data variable attributes
# flags_definition between pq products is different and is silently dropped
datasets = xarray.concat(datasets, dim='time') # Copies all the data
if len(datasets.time) == 0:
raise EmptyChunkException()
# sort along time dim
return datasets.isel(time=datasets.time.argsort()) # Copies all the data again
def concat(objs, dim=None, **kwargs):
xref = objs[0].xref.values
yref = objs[0].yref.values
for obj in objs:
obj.coords.update({'xref': xref, 'yref': yref})
return xr.concat(objs, dim=dim, **kwargs)
def concat_results(result_list, coord):
"""Concatenate several results from :py:meth:`smrt.core.model.Model.run` (of type :py:class:`Result`) into a single result (of type :py:class:`Result`). This extends
the number of dimension in the xarray hold by the instance. The new dimension is specified with coord
:param result_list: list of results returned by :py:meth:`smrt.core.model.Model.run` or other functions.
:param coord: a tuple (dimension_name, dimension_values) for the new dimension. Dimension_values must be a sequence or array with the same length as result_list.
:returns: :py:class:`Result` instance
"""
dim_name, dim_value = coord
return Result(xr.concat([result.data for result in result_list], pd.Index(dim_value, name=dim_name)))
def xarray_unravel_levels(arr, names, my_convert = lambda x: x):
''' Takes in nested dict of dict of dataframes
names : names of lists; you dont have to include the last two dataframe columns & rows, but you can to override them
requires xarray
'''
import xarray
if type(arr) == pd.DataFrame:
return xarray.DataArray(arr, dims = None if len(names)==0 else names)
elif type(arr) in [OrderedDict, dict]:
return xarray.concat([xarray_unravel_levels(item, names[1:]) for k, item in arr.items()], pd.Index(arr.keys(), name=names[0]) )
elif type(arr) == xarray.DataArray:
return arr
else:
return my_convert(arr)
def concat_instances(data_sets: Iterable[DataSet]) -> DataSet:
"""
Concatenates the specified data sets along the instance dimension.
All data sets must have exactly matching metadata.
Parameters
----------
data_sets: list of DataSet
The data sets to concatenate
Returns
-------
DataSet
A new data set containing all instances in the specified data sets
"""
# noinspection PyProtectedMember
mutable = all([data_set._mutable for data_set in data_sets])
filenames = np.concatenate([data_set._data[_DataVar.FILENAME].values for data_set in data_sets])
if len(np.unique(filenames)) != len(filenames):
raise ValueError("data sets contain duplicate instances - refusing to concatenate")
# noinspection PyProtectedMember
new_data = xr.concat([data_set._data for data_set in data_sets],
dim=_Dimension.INSTANCE,
data_vars="minimal",
compat="identical")
# noinspection PyTypeChecker
return DataSet(data=new_data,
mutable=mutable)
def _aggregate_state(self):
"""Aggregate data out of the state file and load it into `met_data`"""
# Precipitation record
begin_record = self.params['start'] - pd.Timedelta("90 days")
end_record = self.params['start'] - pd.Timedelta("1 days")
record_dates = date_range(begin_record, end_record,
calendar=self.params['calendar'])
trailing = self.state['prec'].sel(time=record_dates)
total_precip = xr.concat([trailing, self.met_data['prec']], dim='time')
total_precip = (cnst.DAYS_PER_YEAR * total_precip.rolling(
time=90).mean().drop(record_dates, dim='time'))
self.met_data['seasonal_prec'] = total_precip
# Smoothed daily temperature range
trailing = self.state['t_max'] - self.state['t_min']
begin_record = self.params['start'] - pd.Timedelta("90 days")
end_record = self.params['start'] - pd.Timedelta("1 days")
record_dates = date_range(begin_record, end_record,
calendar=self.params['calendar'])
trailing['time'] = record_dates
dtr = self.met_data['t_max'] - self.met_data['t_min']
sm_dtr = xr.concat([trailing, dtr], dim='time')
sm_dtr = sm_dtr.rolling(time=30).mean().drop(record_dates, dim='time')
self.met_data['smoothed_dtr'] = sm_dtr
# Put in SWE data
self.state['swe'] = self.state.sel(time=end_record).swe.drop('time')
self.met_data['swe'] = self.state.swe.copy()
def concat_ml_features(*dsets, **kwargs):
'''Concatenate MLDataset / Dataset (dsets) along concat_dim
(by default the column dimension, typically called "layer")
Parameters:
:dsets: Any number of MLDataset / Dataset objects that are
2D
:features_layer: Typically "layer", the column dimension
:concat_dim: If None, the column dimension is guessed
:keep_attrs: If True, keep the attrs of the first dset in *dsets
TODO - Gui: This could use the astype logic discussed elsewhere?
'''
features_layer = kwargs.get('features_layer', FEATURES_LAYER)
concat_dim = kwargs.get('concat_dim', None)
keep_attrs = kwargs.get('keep_attrs', False) # TODO True or False (convention?)
from xarray_filters.mldataset import MLDataset
if not dsets:
raise ValueError('No MLDataset / Dataset arguments passed. Expected >= 1')
if keep_attrs:
attrs = deepcopy(dsets[0].attrs)
else:
attrs = OrderedDict()
concat_dim = concat_dim or FEATURES_LAYER_DIMS[1]
data_arrs = []
for dset in dsets:
if not isinstance(dset, (MLDataset, xr.Dataset)):
raise ValueError('TODO -error message here')
data_arr = dset.data_vars.get(features_layer, None)
if data_arr is None:
raise ValueError('TODO -error message here')
data_arrs.append(data_arr)
data_arr = xr.concat(data_arrs, dim=concat_dim)
return MLDataset(OrderedDict([(features_layer, data_arr)]), attrs=attrs)
def expand_categoricals(x, drop_first):
if x.shape[1] == 0:
return x
return pd.concat([convert_columns(x[c], drop_first) for c in x.columns], axis=1)
def concat_features(data_sets: Iterable[DataSet],
dimension: str = "generated") -> DataSet:
"""
Concatenates the specified data sets along the specified feature dimension.
The feature matrices of each instance are concatenated along the specified dimension. All data sets must have the
specified feature dimension. Additionally, all metadata, including names and sizes of other feature dimensions must
be identical for all data sets.
Parameters
----------
data_sets: list of DataSet
Data sets which should be concatenated
dimension: str, default "generated"
Feature dimension along which features should be concatenated.
Returns
-------
DataSet
A new data set created by concatenating the specified data sets along the specified feature dimension
Raises
------
ValueError
If the specified feature dimension is not present in some data sets
"""
for data_set in data_sets:
# noinspection PyProtectedMember
if dimension not in data_set._data.dims:
raise ValueError("dimension '{}' missing in some data sets".format(dimension))
# noinspection PyProtectedMember
mutable = all([data_set._mutable for data_set in data_sets])
# noinspection PyProtectedMember
new_data = xr.concat([data_set._data for data_set in data_sets],
dim=dimension,
data_vars="minimal",
compat="identical")
# noinspection PyTypeChecker
return DataSet(data=new_data,
mutable=mutable)