diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst index 3f936506234..076b0eb452a 100644 --- a/doc/api-hidden.rst +++ b/doc/api-hidden.rst @@ -41,6 +41,7 @@ core.rolling.DatasetCoarsen.all core.rolling.DatasetCoarsen.any + core.rolling.DatasetCoarsen.construct core.rolling.DatasetCoarsen.count core.rolling.DatasetCoarsen.max core.rolling.DatasetCoarsen.mean @@ -185,6 +186,7 @@ core.rolling.DataArrayCoarsen.all core.rolling.DataArrayCoarsen.any + core.rolling.DataArrayCoarsen.construct core.rolling.DataArrayCoarsen.count core.rolling.DataArrayCoarsen.max core.rolling.DataArrayCoarsen.mean diff --git a/doc/howdoi.rst b/doc/howdoi.rst index 9f985b66171..c518b0daba6 100644 --- a/doc/howdoi.rst +++ b/doc/howdoi.rst @@ -24,7 +24,7 @@ How do I ... * - change the order of dimensions - :py:meth:`DataArray.transpose`, :py:meth:`Dataset.transpose` * - reshape dimensions - - :py:meth:`DataArray.stack`, :py:meth:`Dataset.stack` + - :py:meth:`DataArray.stack`, :py:meth:`Dataset.stack`, :py:meth:`Dataset.coarsen.construct`, :py:meth:`DataArray.coarsen.construct` * - remove a variable from my object - :py:meth:`Dataset.drop_vars`, :py:meth:`DataArray.drop_vars` * - remove dimensions of length 1 or 0 diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 451bbfdfba4..04bd6272fec 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -22,6 +22,8 @@ v0.18.3 (unreleased) New Features ~~~~~~~~~~~~ +- Added :py:meth:`Dataset.coarsen.construct`, :py:meth:`DataArray.coarsen.construct` (:issue:`5454`, :pull:`5475`). + By `Deepak Cherian `_. - Xarray now uses consolidated metadata by default when writing and reading Zarr stores (:issue:`5251`). By `Stephan Hoyer `_. @@ -724,7 +726,7 @@ Documentation By `Pieter Gijsbers `_. - Fix grammar and typos in the :doc:`contributing` guide (:pull:`4545`). By `Sahid Velji `_. -- Fix grammar and typos in the :doc:`io` guide (:pull:`4553`). +- Fix grammar and typos in the :doc:`user-guide/io` guide (:pull:`4553`). By `Sahid Velji `_. - Update link to NumPy docstring standard in the :doc:`contributing` guide (:pull:`4558`). By `Sahid Velji `_. @@ -3033,7 +3035,7 @@ Documentation - Added apply_ufunc example to :ref:`/examples/weather-data.ipynb#Toy-weather-data` (:issue:`1844`). By `Liam Brannigan `_. - New entry `Why don’t aggregations return Python scalars?` in the - :doc:`faq` (:issue:`1726`). + :doc:`getting-started-guide/faq` (:issue:`1726`). By `0x0L `_. Enhancements diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index 870df122aa9..b87dcda24b0 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -1,4 +1,5 @@ import functools +import itertools import warnings from typing import Any, Callable, Dict @@ -8,6 +9,7 @@ from .arithmetic import CoarsenArithmetic from .options import _get_keep_attrs from .pycompat import is_duck_dask_array +from .utils import either_dict_or_kwargs try: import bottleneck @@ -845,6 +847,109 @@ def __repr__(self): klass=self.__class__.__name__, attrs=",".join(attrs) ) + def construct( + self, + window_dim=None, + keep_attrs=None, + **window_dim_kwargs, + ): + """ + Convert this Coarsen object to a DataArray or Dataset, + where the coarsening dimension is split or reshaped to two + new dimensions. + + Parameters + ---------- + window_dim: mapping + A mapping from existing dimension name to new dimension names. + The size of the second dimension will be the length of the + coarsening window. + keep_attrs: bool, optional + Preserve attributes if True + **window_dim_kwargs : {dim: new_name, ...} + The keyword arguments form of ``window_dim``. + + Returns + ------- + Dataset or DataArray with reshaped dimensions + + Examples + -------- + >>> da = xr.DataArray(np.arange(24), dims="time") + >>> da.coarsen(time=12).construct(time=("year", "month")) + + array([[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], + [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]]) + Dimensions without coordinates: year, month + + See Also + -------- + DataArrayRolling.construct + DatasetRolling.construct + """ + + from .dataarray import DataArray + from .dataset import Dataset + + window_dim = either_dict_or_kwargs( + window_dim, window_dim_kwargs, "Coarsen.construct" + ) + if not window_dim: + raise ValueError( + "Either window_dim or window_dim_kwargs need to be specified." + ) + + bad_new_dims = tuple( + win + for win, dims in window_dim.items() + if len(dims) != 2 or isinstance(dims, str) + ) + if bad_new_dims: + raise ValueError( + f"Please provide exactly two dimension names for the following coarsening dimensions: {bad_new_dims}" + ) + + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=True) + + missing_dims = set(window_dim) - set(self.windows) + if missing_dims: + raise ValueError( + f"'window_dim' must contain entries for all dimensions to coarsen. Missing {missing_dims}" + ) + extra_windows = set(self.windows) - set(window_dim) + if extra_windows: + raise ValueError( + f"'window_dim' includes dimensions that will not be coarsened: {extra_windows}" + ) + + reshaped = Dataset() + if isinstance(self.obj, DataArray): + obj = self.obj._to_temp_dataset() + else: + obj = self.obj + + reshaped.attrs = obj.attrs if keep_attrs else {} + + for key, var in obj.variables.items(): + reshaped_dims = tuple( + itertools.chain(*[window_dim.get(dim, [dim]) for dim in list(var.dims)]) + ) + if reshaped_dims != var.dims: + windows = {w: self.windows[w] for w in window_dim if w in var.dims} + reshaped_var, _ = var.coarsen_reshape(windows, self.boundary, self.side) + attrs = var.attrs if keep_attrs else {} + reshaped[key] = (reshaped_dims, reshaped_var, attrs) + else: + reshaped[key] = var + + should_be_coords = set(window_dim) & set(self.obj.coords) + result = reshaped.set_coords(should_be_coords) + if isinstance(self.obj, DataArray): + return self.obj._from_temp_dataset(result) + else: + return result + class DataArrayCoarsen(Coarsen): __slots__ = () diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 67ff6c6c9db..78dae44bed7 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -2158,7 +2158,7 @@ def coarsen( if not windows: return self._replace(attrs=_attrs) - reshaped, axes = self._coarsen_reshape(windows, boundary, side) + reshaped, axes = self.coarsen_reshape(windows, boundary, side) if isinstance(func, str): name = func func = getattr(duck_array_ops, name, None) @@ -2167,7 +2167,7 @@ def coarsen( return self._replace(data=func(reshaped, axis=axes, **kwargs), attrs=_attrs) - def _coarsen_reshape(self, windows, boundary, side): + def coarsen_reshape(self, windows, boundary, side): """ Construct a reshaped-array for coarsen """ @@ -2183,7 +2183,9 @@ def _coarsen_reshape(self, windows, boundary, side): for d, window in windows.items(): if window <= 0: - raise ValueError(f"window must be > 0. Given {window}") + raise ValueError( + f"window must be > 0. Given {window} for dimension {d}" + ) variable = self for d, window in windows.items(): @@ -2193,8 +2195,8 @@ def _coarsen_reshape(self, windows, boundary, side): if boundary[d] == "exact": if n * window != size: raise ValueError( - "Could not coarsen a dimension of size {} with " - "window {}".format(size, window) + f"Could not coarsen a dimension of size {size} with " + f"window {window} and boundary='exact'. Try a different 'boundary' option." ) elif boundary[d] == "trim": if side[d] == "left": diff --git a/xarray/tests/test_coarsen.py b/xarray/tests/test_coarsen.py index ca4725a579f..503c742252a 100644 --- a/xarray/tests/test_coarsen.py +++ b/xarray/tests/test_coarsen.py @@ -5,7 +5,14 @@ import xarray as xr from xarray import DataArray, Dataset, set_options -from . import assert_allclose, assert_equal, has_dask, requires_cftime +from . import ( + assert_allclose, + assert_equal, + assert_identical, + has_dask, + raise_if_dask_computes, + requires_cftime, +) from .test_dataarray import da from .test_dataset import ds @@ -299,3 +306,73 @@ def test_coarsen_da_reduce(da, window, name): actual = coarsen_obj.reduce(getattr(np, f"nan{name}")) expected = getattr(coarsen_obj, name)() assert_allclose(actual, expected) + + +@pytest.mark.parametrize("dask", [True, False]) +def test_coarsen_construct(dask): + + ds = Dataset( + { + "vart": ("time", np.arange(48), {"a": "b"}), + "varx": ("x", np.arange(10), {"a": "b"}), + "vartx": (("x", "time"), np.arange(480).reshape(10, 48), {"a": "b"}), + "vary": ("y", np.arange(12)), + }, + coords={"time": np.arange(48), "y": np.arange(12)}, + attrs={"foo": "bar"}, + ) + + if dask and has_dask: + ds = ds.chunk({"x": 4, "time": 10}) + + expected = xr.Dataset(attrs={"foo": "bar"}) + expected["vart"] = (("year", "month"), ds.vart.data.reshape((-1, 12)), {"a": "b"}) + expected["varx"] = (("x", "x_reshaped"), ds.varx.data.reshape((-1, 5)), {"a": "b"}) + expected["vartx"] = ( + ("x", "x_reshaped", "year", "month"), + ds.vartx.data.reshape(2, 5, 4, 12), + {"a": "b"}, + ) + expected["vary"] = ds.vary + expected.coords["time"] = (("year", "month"), ds.time.data.reshape((-1, 12))) + + with raise_if_dask_computes(): + actual = ds.coarsen(time=12, x=5).construct( + {"time": ("year", "month"), "x": ("x", "x_reshaped")} + ) + assert_identical(actual, expected) + + with raise_if_dask_computes(): + actual = ds.coarsen(time=12, x=5).construct( + time=("year", "month"), x=("x", "x_reshaped") + ) + assert_identical(actual, expected) + + with raise_if_dask_computes(): + actual = ds.coarsen(time=12, x=5).construct( + {"time": ("year", "month"), "x": ("x", "x_reshaped")}, keep_attrs=False + ) + for var in actual: + assert actual[var].attrs == {} + assert actual.attrs == {} + + with raise_if_dask_computes(): + actual = ds.vartx.coarsen(time=12, x=5).construct( + {"time": ("year", "month"), "x": ("x", "x_reshaped")} + ) + assert_identical(actual, expected["vartx"]) + + with pytest.raises(ValueError): + ds.coarsen(time=12).construct(foo="bar") + + with pytest.raises(ValueError): + ds.coarsen(time=12, x=2).construct(time=("year", "month")) + + with pytest.raises(ValueError): + ds.coarsen(time=12).construct() + + with pytest.raises(ValueError): + ds.coarsen(time=12).construct(time="bar") + + with pytest.raises(ValueError): + ds.coarsen(time=12).construct(time=("bar",))