def concat_ml_features(*dsets, **kwargs):
'''Concatenate MLDataset / Dataset (dsets) along concat_dim
(by default the column dimension, typically called "layer")
Parameters:
:dsets: Any number of MLDataset / Dataset objects that are
2D
:features_layer: Typically "layer", the column dimension
:concat_dim: If None, the column dimension is guessed
:keep_attrs: If True, keep the attrs of the first dset in *dsets
TODO - Gui: This could use the astype logic discussed elsewhere?
'''
features_layer = kwargs.get('features_layer', FEATURES_LAYER)
concat_dim = kwargs.get('concat_dim', None)
keep_attrs = kwargs.get('keep_attrs', False) # TODO True or False (convention?)
from xarray_filters.mldataset import MLDataset
if not dsets:
raise ValueError('No MLDataset / Dataset arguments passed. Expected >= 1')
if keep_attrs:
attrs = deepcopy(dsets[0].attrs)
else:
attrs = OrderedDict()
concat_dim = concat_dim or FEATURES_LAYER_DIMS[1]
data_arrs = []
for dset in dsets:
if not isinstance(dset, (MLDataset, xr.Dataset)):
raise ValueError('TODO -error message here')
data_arr = dset.data_vars.get(features_layer, None)
if data_arr is None:
raise ValueError('TODO -error message here')
data_arrs.append(data_arr)
data_arr = xr.concat(data_arrs, dim=concat_dim)
return MLDataset(OrderedDict([(features_layer, data_arr)]), attrs=attrs)
评论列表
文章目录