diff --git a/ci/310.yaml b/ci/310.yaml index 3fe24e5..5979c3b 100644 --- a/ci/310.yaml +++ b/ci/310.yaml @@ -19,4 +19,7 @@ dependencies: - geopandas-base - geodatasets - pyogrio + - pip + - pip: + - exactextract==0.2.0.dev0 diff --git a/ci/311.yaml b/ci/311.yaml index 3c6bf40..1fe52c1 100644 --- a/ci/311.yaml +++ b/ci/311.yaml @@ -19,4 +19,7 @@ dependencies: - geopandas-base - geodatasets - pyogrio + - pip + - pip: + - exactextract==0.2.0.dev0 diff --git a/ci/312.yaml b/ci/312.yaml index 08cef9e..24e8474 100644 --- a/ci/312.yaml +++ b/ci/312.yaml @@ -20,4 +20,7 @@ dependencies: - geodatasets - pyogrio - mypy + - pip + - pip: + - exactextract==0.2.0.dev0 diff --git a/ci/39.yaml b/ci/39.yaml index 0bda0b1..7382d33 100644 --- a/ci/39.yaml +++ b/ci/39.yaml @@ -19,3 +19,6 @@ dependencies: - geopandas-base - geodatasets - pyogrio + - pip + - pip: + - exactextract==0.2.0.dev0 diff --git a/ci/dev.yaml b/ci/dev.yaml index db48d96..57d4840 100644 --- a/ci/dev.yaml +++ b/ci/dev.yaml @@ -24,3 +24,4 @@ dependencies: - git+https://github.com/shapely/shapely.git@main - git+https://github.com/pydata/xarray.git@main - git+https://github.com/pyproj4/pyproj.git + - exactextract==0.2.0.dev0 diff --git a/doc/source/conf.py b/doc/source/conf.py index b266cc9..695a1c4 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -45,6 +45,7 @@ "geopandas": ("https://geopandas.org/en/latest", None), "pandas": ("https://pandas.pydata.org/docs", None), "rasterio": ("https://rasterio.readthedocs.io/en/latest/", None), + "exactextract": ("https://isciences.github.io/exactextract/", None), } # -- Options for HTML output ------------------------------------------------- diff --git a/doc/source/zonal_stats.ipynb b/doc/source/zonal_stats.ipynb index 9a0ac80..12d77b0 100644 --- a/doc/source/zonal_stats.ipynb +++ b/doc/source/zonal_stats.ipynb @@ -4698,6 +4698,690 @@ "aggregated_iterative" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Another option is to use method=\"exactextract\", which is using exactextract.exact_extract. It provides a fast and accurate statistcs by determining the fraction of each pixel that is covered by the polygon. On the other hand, the aggregation options limited to be string or list of strings (e.g., \"mean\",\"sum\"), the quantile option should be in this pattern quantile(q=0.20)." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 19kB\n",
+       "Dimensions:   (geometry: 127, month: 2, level: 3)\n",
+       "Coordinates:\n",
+       "  * geometry  (geometry) object 1kB POLYGON ((-59.57209469261153 -80.04017872...\n",
+       "  * month     (month) int32 8B 1 7\n",
+       "  * level     (level) int32 12B 200 500 850\n",
+       "Data variables:\n",
+       "    z         (geometry, month, level) float64 6kB 1.1e+05 ... 1.394e+04\n",
+       "    u         (geometry, month, level) float64 6kB 2.342 1.357 ... 2.395 0.8823\n",
+       "    v         (geometry, month, level) float64 6kB 0.4431 0.211 ... 1.116 0.6409\n",
+       "Indexes:\n",
+       "    geometry  GeometryIndex (crs=EPSG:4326)\n",
+       "Attributes:\n",
+       "    Conventions:  CF-1.0\n",
+       "    Info:         Monthly ERA-Interim data. Downloaded and edited by fabien.m...
" + ], + "text/plain": [ + " Size: 19kB\n", + "Dimensions: (geometry: 127, month: 2, level: 3)\n", + "Coordinates:\n", + " * geometry (geometry) object 1kB POLYGON ((-59.57209469261153 -80.04017872...\n", + " * month (month) int32 8B 1 7\n", + " * level (level) int32 12B 200 500 850\n", + "Data variables:\n", + " z (geometry, month, level) float64 6kB 1.1e+05 ... 1.394e+04\n", + " u (geometry, month, level) float64 6kB 2.342 1.357 ... 2.395 0.8823\n", + " v (geometry, month, level) float64 6kB 0.4431 0.211 ... 1.116 0.6409\n", + "Indexes:\n", + " geometry GeometryIndex (crs=EPSG:4326)\n", + "Attributes:\n", + " Conventions: CF-1.0\n", + " Info: Monthly ERA-Interim data. Downloaded and edited by fabien.m..." + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "aggregated_iterative = ds.xvec.zonal_stats(\n", + " world.geometry,\n", + " x_coords=\"longitude\",\n", + " y_coords=\"latitude\",\n", + " method=\"exactextract\",\n", + ")\n", + "aggregated_iterative" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -5218,9 +5902,9 @@ ], "metadata": { "kernelspec": { - "display_name": "xvec", + "display_name": "xvec_dev", "language": "python", - "name": "python3" + "name": "xvec_dev" }, "language_info": { "codemirror_mode": { @@ -5232,9 +5916,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.1" + "version": "3.12.3" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/pyproject.toml b/pyproject.toml index 38684a0..77de409 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,7 +54,8 @@ exclude_lines = [ [tool.ruff] line-length = 88 -select = ["E", "F", "W", "I", "UP", "B", "A", "C4", "Q"] exclude = ["doc"] -target-version = "py39" + +[tool.ruff.lint] ignore = ['E501', 'Q000', 'Q001', 'Q002', 'Q003', 'W191'] +select = ["E", "F", "W", "I", "UP", "B", "A", "C4", "Q"] diff --git a/xvec/accessor.py b/xvec/accessor.py index f9d49a1..30e5f4d 100644 --- a/xvec/accessor.py +++ b/xvec/accessor.py @@ -11,7 +11,11 @@ from pyproj import CRS, Transformer from .index import GeometryIndex -from .zonal import _zonal_stats_iterative, _zonal_stats_rasterize +from .zonal import ( + _zonal_stats_exactextract, + _zonal_stats_iterative, + _zonal_stats_rasterize, +) if TYPE_CHECKING: from geopandas import GeoDataFrame @@ -950,15 +954,21 @@ def zonal_stats( name of the coordinates containing ``y`` coordinates (i.e. the second value in the coordinate pair encoding the vertex of the polygon) stats : string | Callable | Sequence[str | Callable | tuple] - Spatial aggregation statistic method, by default "mean". Any of the - aggregations available as :class:`xarray.DataArray` or - :class:`xarray.DataArrayGroupBy` methods like + Spatial aggregation statistic method, by default ``"mean"``. + + Any of the aggregations available as :class:`xarray.DataArray` or + :class:`~xarray.core.groupby.DataArrayGroupBy` methods like :meth:`~xarray.DataArray.mean`, :meth:`~xarray.DataArray.min`, :meth:`~xarray.DataArray.max`, or :meth:`~xarray.DataArray.quantile` are available. Alternatively, you can pass a ``Callable`` supported by :meth:`~xarray.DataArray.reduce` or a list with ``strings``, ``callables`` or ``tuples`` in a ``(name, func, {kwargs})`` format, where ``func`` can be a string or a callable. + + If the method is ``"exactextract"`` then the stats should be string or list + of strings that can be used to construct :class:`exactextract.Operation` + objects supported by :func:`exactextract.exact_extract` (e.g., ``"mean"``, + ``"quantile(q=0.20)"``). name : str, optional Name of the dimension that will hold the ``geometry``, by default "geometry" index : bool, optional @@ -969,16 +979,28 @@ def zonal_stats( will never be stored. This is useful as an attribute link between the resulting array and the GeoPandas object from which the geometry is sourced. method : str, optional - The method of data extraction. The default is ``"rasterize"``, which uses - :func:`rasterio.features.rasterize` and is faster, but can lead to loss of - information in case of small polygons or lines. Other option is - ``"iterate"``, which iterates over geometries and uses - :func:`rasterio.features.geometry_mask`. ``"iterate"`` method requires - ``joblib`` on top of ``rioxarray``. + The method of data extraction. + + ``"rasterize"`` + uses :func:`rasterio.features.rasterize` and is faster, but can lead to + loss of information in case of small polygons or lines. + + ``"iterate"`` + iterates over geometries and uses + :func:`rasterio.features.geometry_mask`. Requires ``joblib`` on top of + ``rioxarray``. + + ``"exactextract"`` + calculates precise stats by determining the fraction of each raster cell + that is covered by the polygon and uses + :func:`exactextract.exact_extract`. + + The default is ``"rasterize"``. all_touched : bool, optional If True, all pixels touched by geometries will be considered. If False, only pixels whose center is within the polygon or that are selected by - Bresenham’s line algorithm will be considered. + Bresenham’s line algorithm will be considered. Applies only if ``method="iterate"`` + or ``method="rasterize"``. n_jobs : int, optional Number of parallel threads to use. It is recommended to set this to the number of physical cores of the CPU. ``-1`` uses all available cores. @@ -1088,6 +1110,16 @@ def zonal_stats( n_jobs=n_jobs, **kwargs, ) + elif method == "exactextract": + result = _zonal_stats_exactextract( + self, + geometry=geometry, + x_coords=x_coords, + y_coords=y_coords, + stats=stats, + name=name, + **kwargs, + ) else: raise ValueError( f"method '{method}' is not supported. Allowed options are 'rasterize' " diff --git a/xvec/tests/test_zonal_stats.py b/xvec/tests/test_zonal_stats.py index 42e415e..c46aaf3 100644 --- a/xvec/tests/test_zonal_stats.py +++ b/xvec/tests/test_zonal_stats.py @@ -9,7 +9,7 @@ import xvec # noqa: F401 -@pytest.mark.parametrize("method", ["rasterize", "iterate"]) +@pytest.mark.parametrize("method", ["rasterize", "iterate", "exactextract"]) def test_structure(method): da = xr.DataArray( np.ones((10, 10, 5)), @@ -24,14 +24,22 @@ def test_structure(method): polygon2 = shapely.geometry.Polygon([(6, 22), (9, 22), (9, 29), (6, 26)]) polygons = gpd.GeoSeries([polygon1, polygon2], crs="EPSG:4326") - expected = xr.DataArray( - np.array([[12.0] * 5, [18.0] * 5]), - coords={ - "geometry": polygons, - "time": pd.date_range("2023-01-01", periods=5), - }, - ).xvec.set_geom_indexes("geometry", crs="EPSG:4326") - + if method == "exactextract": + expected = xr.DataArray( + np.array([[12.0] * 5, [16.5] * 5]), + coords={ + "geometry": polygons, + "time": pd.date_range("2023-01-01", periods=5), + }, + ).xvec.set_geom_indexes("geometry", crs="EPSG:4326") + else: + expected = xr.DataArray( + np.array([[12.0] * 5, [18.0] * 5]), + coords={ + "geometry": polygons, + "time": pd.date_range("2023-01-01", periods=5), + }, + ).xvec.set_geom_indexes("geometry", crs="EPSG:4326") actual = da.xvec.zonal_stats(polygons, "x", "y", stats="sum", method=method) xr.testing.assert_identical(actual, expected) @@ -43,35 +51,36 @@ def test_structure(method): ) # dataset - ds = da.to_dataset(name="test") - - expected_ds = expected.to_dataset(name="test").set_coords("geometry") - actual_ds = ds.xvec.zonal_stats(polygons, "x", "y", stats="sum", method=method) - xr.testing.assert_identical(actual_ds, expected_ds) - - actual_ix_ds = ds.xvec.zonal_stats( - polygons, "x", "y", stats="sum", method=method, index=True - ) - xr.testing.assert_identical( - actual_ix_ds, expected_ds.assign_coords({"index": ("geometry", polygons.index)}) - ) + if method == "rasterize" or method == "iterate": + ds = da.to_dataset(name="test") + expected_ds = expected.to_dataset(name="test").set_coords("geometry") + actual_ds = ds.xvec.zonal_stats(polygons, "x", "y", stats="sum", method=method) + xr.testing.assert_identical(actual_ds, expected_ds) + + actual_ix_ds = ds.xvec.zonal_stats( + polygons, "x", "y", stats="sum", method=method, index=True + ) + xr.testing.assert_identical( + actual_ix_ds, + expected_ds.assign_coords({"index": ("geometry", polygons.index)}), + ) - # named index - polygons.index.name = "my_index" - actual_ix_named = da.xvec.zonal_stats( - polygons, "x", "y", stats="sum", method=method - ) - xr.testing.assert_identical( - actual_ix_named, - expected.assign_coords({"my_index": ("geometry", polygons.index)}), - ) - actual_ix_names_ds = ds.xvec.zonal_stats( - polygons, "x", "y", stats="sum", method=method - ) - xr.testing.assert_identical( - actual_ix_names_ds, - expected_ds.assign_coords({"my_index": ("geometry", polygons.index)}), - ) + # named index + polygons.index.name = "my_index" + actual_ix_named = da.xvec.zonal_stats( + polygons, "x", "y", stats="sum", method=method + ) + xr.testing.assert_identical( + actual_ix_named, + expected.assign_coords({"my_index": ("geometry", polygons.index)}), + ) + actual_ix_names_ds = ds.xvec.zonal_stats( + polygons, "x", "y", stats="sum", method=method + ) + xr.testing.assert_identical( + actual_ix_names_ds, + expected_ds.assign_coords({"my_index": ("geometry", polygons.index)}), + ) def test_match(): @@ -87,25 +96,37 @@ def test_match(): xr.testing.assert_allclose(rasterize, iterate) -@pytest.mark.parametrize("method", ["rasterize", "iterate"]) +@pytest.mark.parametrize("method", ["rasterize", "iterate", "exactextract"]) def test_dataset(method): ds = xr.tutorial.open_dataset("eraint_uvz") world = gpd.read_file(geodatasets.get_path("naturalearth land")) result = ds.xvec.zonal_stats(world.geometry, "longitude", "latitude", method=method) - xr.testing.assert_allclose( - xr.Dataset( - { - "z": np.array(61367.76185577), - "u": np.array(4.19631497), - "v": np.array(-0.49170332), - } - ), - result.mean(), - ) + if method == "exactextract": + xr.testing.assert_allclose( + xr.Dataset( + { + "z": np.array(61625.53438858), + "u": np.array(4.15009377), + "v": np.array(-0.5161478), + } + ), + result.mean(), + ) + else: + xr.testing.assert_allclose( + xr.Dataset( + { + "z": np.array(61367.76185577), + "u": np.array(4.19631497), + "v": np.array(-0.49170332), + } + ), + result.mean(), + ) -@pytest.mark.parametrize("method", ["rasterize", "iterate"]) +@pytest.mark.parametrize("method", ["rasterize", "iterate", "exactextract"]) def test_dataarray(method): ds = xr.tutorial.open_dataset("eraint_uvz") world = gpd.read_file(geodatasets.get_path("naturalearth land")) @@ -115,10 +136,13 @@ def test_dataarray(method): assert result.shape == (127, 2, 3) assert result.dims == ("geometry", "month", "level") - assert result.mean() == pytest.approx(61367.76185577) + if method == "exactextract": + assert result.mean() == pytest.approx(61625.53438858) + else: + assert result.mean() == pytest.approx(61367.76185577) -@pytest.mark.parametrize("method", ["rasterize", "iterate"]) +@pytest.mark.parametrize("method", ["rasterize", "iterate", "exactextract"]) def test_stat(method): ds = xr.tutorial.open_dataset("eraint_uvz") world = gpd.read_file(geodatasets.get_path("naturalearth land")) @@ -129,13 +153,32 @@ def test_stat(method): median_ = ds.z.xvec.zonal_stats( world.geometry, "longitude", "latitude", method=method, stats="median" ) - quantile_ = ds.z.xvec.zonal_stats( - world.geometry, "longitude", "latitude", method=method, stats="quantile", q=0.2 - ) + if method == "exactextract": + quantile_ = ds.z.xvec.zonal_stats( + world.geometry, + "longitude", + "latitude", + method=method, + stats="quantile(q=0.2)", + ) + else: + quantile_ = ds.z.xvec.zonal_stats( + world.geometry, + "longitude", + "latitude", + method=method, + stats="quantile", + q=0.2, + ) - assert mean_.mean() == pytest.approx(61367.76185577) - assert median_.mean() == pytest.approx(61370.18563539) - assert quantile_.mean() == pytest.approx(61279.93619836) + if method == "exactextract": + assert mean_.mean() == pytest.approx(61625.53438858) + assert median_.mean() == pytest.approx(61628.67168691) + assert quantile_.mean() == pytest.approx(61540.75632235) + else: + assert mean_.mean() == pytest.approx(61367.76185577) + assert median_.mean() == pytest.approx(61370.18563539) + assert quantile_.mean() == pytest.approx(61279.93619836) @pytest.mark.parametrize("method", ["rasterize", "iterate"]) @@ -207,8 +250,15 @@ def test_crs(method): }, ).xvec.set_geom_indexes("geometry", crs=None) - actual = da.xvec.zonal_stats(polygons, "x", "y", stats="sum", method=method) - xr.testing.assert_identical(actual, expected) + if method == "exactextract": + with pytest.raises( + AttributeError, + match="Geometry input does not have a Coordinate Reference System", + ): + da.xvec.zonal_stats(polygons, "x", "y", stats="sum", method=method) + else: + actual = da.xvec.zonal_stats(polygons, "x", "y", stats="sum", method=method) + xr.testing.assert_identical(actual, expected) @pytest.mark.parametrize("method", ["rasterize", "iterate"]) @@ -237,40 +287,65 @@ def test_callable(method): xr.testing.assert_identical(da_agg, da_std) -@pytest.mark.parametrize("method", ["rasterize", "iterate"]) +@pytest.mark.parametrize("method", ["rasterize", "iterate", "exactextract"]) def test_multiple(method): ds = xr.tutorial.open_dataset("eraint_uvz") world = gpd.read_file(geodatasets.get_path("naturalearth land")) - result = ds.xvec.zonal_stats( - world.geometry[:10].boundary, - "longitude", - "latitude", - stats=[ - "mean", - "sum", - ("quantile", "quantile", {"q": [0.1, 0.2, 0.3]}), - ("numpymean", np.nanmean), - np.nanmean, - ], - method=method, - n_jobs=1, - ) - assert sorted(result.dims) == sorted( - [ - "level", - "zonal_statistics", - "geometry", - "month", - "quantile", - ] - ) + if method == "exactextract": + result = ds.xvec.zonal_stats( + world.geometry[:10].boundary, + "longitude", + "latitude", + stats=[ + "mean", + "sum", + "quantile(q=0.20)", + ], + method=method, + n_jobs=1, + ) + assert sorted(result.dims) == sorted( + [ + "level", + "zonal_statistics", + "geometry", + "month", + ] + ) + + assert (result.zonal_statistics == ["mean", "sum", "quantile(q=0.20)"]).all() + else: + result = ds.xvec.zonal_stats( + world.geometry[:10].boundary, + "longitude", + "latitude", + stats=[ + "mean", + "sum", + ("quantile", "quantile", {"q": [0.1, 0.2, 0.3]}), + ("numpymean", np.nanmean), + np.nanmean, + ], + method=method, + n_jobs=1, + ) + assert sorted(result.dims) == sorted( + [ + "level", + "zonal_statistics", + "geometry", + "month", + "quantile", + ] + ) - assert ( - result.zonal_statistics == ["mean", "sum", "quantile", "numpymean", "nanmean"] - ).all() + assert ( + result.zonal_statistics + == ["mean", "sum", "quantile", "numpymean", "nanmean"] + ).all() -@pytest.mark.parametrize("method", ["rasterize", "iterate"]) +@pytest.mark.parametrize("method", ["rasterize", "iterate", "exactextract"]) def test_invalid(method): ds = xr.tutorial.open_dataset("eraint_uvz") world = gpd.read_file(geodatasets.get_path("naturalearth land")) diff --git a/xvec/zonal.py b/xvec/zonal.py index 5ed8159..060eff1 100644 --- a/xvec/zonal.py +++ b/xvec/zonal.py @@ -283,3 +283,214 @@ def _agg_geom( gc.collect() return result + + +def _zonal_stats_exactextract( + acc, + geometry: Sequence[shapely.Geometry], + x_coords: Hashable, + y_coords: Hashable, + stats: str | Callable | Sequence[str | Callable | tuple] = "mean", + name: str = "geometry", +) -> xr.DataArray | xr.Dataset: + """Extract the values from a dataset indexed by a set of geometries + + The CRS of the raster and that of geometry need to be equal. + Xvec does not verify their equality. + + Parameters + ---------- + geometry : Sequence[shapely.Geometry] + An arrray-like (1-D) of shapely geometries, like a numpy array or + :class:`geopandas.GeoSeries`. + x_coords : Hashable + name of the coordinates containing ``x`` coordinates (i.e. the first value + in the coordinate pair encoding the vertex of the polygon) + y_coords : Hashable + name of the coordinates containing ``y`` coordinates (i.e. the second value + in the coordinate pair encoding the vertex of the polygon) + stats : Hashable + Spatial aggregation statistic method, by default "mean". Any of the + strings that can be used to construct :py:class:`Operation` objects + supported by :func:`exactextract.exact_extract` (e.g., ``"mean"``, + ``"quantile(q=0.20)"``) + name : str, optional + Name of the dimension that will hold the ``geometry``, by default "geometry" + + Returns + ------- + Dataset | DataArray + A subset of the original object with N-1 dimensions indexed by + the the GeometryIndex. + + """ + if hasattr(geometry, "crs"): + crs = geometry.crs # type: ignore + else: + raise AttributeError( + "Geometry input does not have a Coordinate Reference System (CRS)." + ) + + # the input should be xarray.DataArray + original_is_ds = False + if not isinstance(acc._obj, xr.core.dataarray.DataArray): + original_ds = acc._obj + acc._obj = acc._obj.to_dataarray() + original_is_ds = True + + # Unstack the results + agg = {} + if pd.api.types.is_list_like(stats): + for stat in stats: # type: ignore + if not isinstance(stat, str): + raise ValueError(f"{stat} is not a valid aggregation.") + + results, original_shape, coords_info, locs = _agg_exactextract( + acc, geometry, crs, x_coords, y_coords, stats, name, original_is_ds + ) + i = 0 + for stat in stats: # type: ignore + df = results.iloc[:, i : i + locs] + # Unstack the result + arr = df.values.reshape(original_shape) + if original_is_ds is True: + data_vars = {} + for idx, data_var in enumerate(acc._obj["variable"].values): + data_vars[data_var] = (coords_info.keys(), arr[:, idx]) + result = xr.Dataset( + data_vars=data_vars, + coords=coords_info, + ).xvec.set_geom_indexes(name, crs=crs) + else: + result = xr.DataArray( + arr, coords=coords_info, dims=coords_info.keys() + ).xvec.set_geom_indexes(name, crs=crs) + + agg[stat] = result + i += locs + vec_cube = xr.concat( + agg.values(), + dim=xr.DataArray( + list(agg.keys()), name="zonal_statistics", dims="zonal_statistics" + ), + ) + elif isinstance(stats, str): + results, original_shape, coords_info, _ = _agg_exactextract( + acc, geometry, crs, x_coords, y_coords, stats, name, original_is_ds + ) + # Unstack the result + arr = results.values.reshape(original_shape) + if original_is_ds is True: + data_vars = {} + for idx, data_var in enumerate(acc._obj["variable"].values): + data_vars[data_var] = (coords_info.keys(), arr[:, idx]) + vec_cube = xr.Dataset( + data_vars=data_vars, + coords=coords_info, + ).xvec.set_geom_indexes(name, crs=crs) + else: + vec_cube = xr.DataArray( + arr, coords=coords_info, dims=coords_info.keys() + ).xvec.set_geom_indexes(name, crs=crs) + else: + raise ValueError(f"{stats} is not a valid aggregation.") + + vec_cube.attrs = acc._obj.attrs + + if original_is_ds: + acc._obj = original_ds + + return vec_cube + + +def _agg_exactextract( + acc, + geometry, + crs, + x_coords: Hashable, + y_coords: Hashable, + stats: str | Callable | Iterable[str | Callable | tuple] = "mean", + name: str = "geometry", + original_is_ds: bool = False, +): + """Extract the values from a dataset indexed by a set of geometries + + The CRS of the raster and that of geometry need to be equal. + Xvec does not verify their equality. + + Parameters + ---------- + geometry : Sequence[shapely.Geometry] + An arrray-like (1-D) of shapely geometries, like a numpy array or + :class:`geopandas.GeoSeries`. + crs : Coordinate Reference System of the geometry objects. + x_coords : Hashable + name of the coordinates containing ``x`` coordinates (i.e. the first value + in the coordinate pair encoding the vertex of the polygon) + y_coords : Hashable + name of the coordinates containing ``y`` coordinates (i.e. the second value + in the coordinate pair encoding the vertex of the polygon) + stats : Hashable + Spatial aggregation statistic method, by default "mean". Any of the + strings that can be used to construct :py:class:`Operation` objects + supported by :func:`exactextract.exact_extract` (e.g., ``"mean"``, + ``"quantile(q=0.20)"``) + name : str, optional + Name of the dimension that will hold the ``geometry``, by default "geometry" + original_is_ds : bool + If True, all pixels touched by geometries will be considered. If False, only + pixels whose center is within the polygon or that are selected by + Bresenham’s line algorithm will be considered. + + Returns + ------- + pandas.DataFrame + Aggregated values over the geometry. + + """ + try: + import exactextract + except ImportError as err: + raise ImportError( + "The exactextract package is required for `zonal_stats()` with " + "method='exactextract'." + ) from err + try: + import geopandas as gpd + except ImportError as err: + raise ImportError( + "The geopandas package is required for `zonal_stats()` with " + "method='exactextract'. " + "You can install it using 'conda install -c conda-forge geopandas' or " + "'pip install geopandas'." + ) from err + + # Stack the other dimensions into one dimension called "location" + arr_dims = tuple(dim for dim in acc._obj.dims if dim not in [x_coords, y_coords]) + data = acc._obj.stack(location=arr_dims) + locs = data.location.size + + # Check the order of dimensions + data = data.transpose("location", y_coords, x_coords) + + # Aggregation result + gdf = gpd.GeoDataFrame(geometry=geometry, crs=crs) + results = exactextract.exact_extract(rast=data, vec=gdf, ops=stats, output="pandas") + # Get all the dimensions execpt x_coords, y_coords, they will be used to stack the + # dataarray later + if original_is_ds is True: + # Get the original dataset information to use for unstacking the resulte later + coords_info = {name: geometry} + original_shape = [len(geometry)] + for dim in arr_dims: + original_shape.append(acc._obj[dim].size) + if dim != "variable": + coords_info[dim] = acc._obj[dim].values + else: + # Get the original dataarray information to use for unstacking the resulte later + coords_info = {name: geometry} + original_shape = [len(geometry)] + for dim in arr_dims: + original_shape.append(acc._obj[dim].size) + coords_info[dim] = acc._obj[dim].values + return results, original_shape, coords_info, locs