From 725f6d50770f767fd0ee6f26bac69e3100a0ae89 Mon Sep 17 00:00:00 2001 From: masawdah Date: Sat, 22 Jun 2024 13:46:23 +0200 Subject: [PATCH 01/16] exactextract --- ci/310.yaml | 1 + ci/311.yaml | 1 + ci/312.yaml | 1 + ci/39.yaml | 1 + ci/dev.yaml | 1 + xvec/accessor.py | 16 +++++++- xvec/zonal.py | 99 ++++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 119 insertions(+), 1 deletion(-) diff --git a/ci/310.yaml b/ci/310.yaml index 3fe24e5..6971a4f 100644 --- a/ci/310.yaml +++ b/ci/310.yaml @@ -11,6 +11,7 @@ dependencies: - rasterio - tqdm - pyproj + - exactextract # testing - pytest - pytest-cov diff --git a/ci/311.yaml b/ci/311.yaml index 3c6bf40..361559a 100644 --- a/ci/311.yaml +++ b/ci/311.yaml @@ -11,6 +11,7 @@ dependencies: - rasterio - tqdm - pyproj + - exactextract # testing - pytest - pytest-cov diff --git a/ci/312.yaml b/ci/312.yaml index 08cef9e..6820faf 100644 --- a/ci/312.yaml +++ b/ci/312.yaml @@ -11,6 +11,7 @@ dependencies: - rasterio - tqdm - pyproj + - exactextract # testing - pytest - pytest-cov diff --git a/ci/39.yaml b/ci/39.yaml index 0bda0b1..a0d3c47 100644 --- a/ci/39.yaml +++ b/ci/39.yaml @@ -11,6 +11,7 @@ dependencies: - rasterio - tqdm - pyproj + - exactextract # testing - pytest - pytest-cov diff --git a/ci/dev.yaml b/ci/dev.yaml index db48d96..8c36ffe 100644 --- a/ci/dev.yaml +++ b/ci/dev.yaml @@ -11,6 +11,7 @@ dependencies: - joblib - rasterio - tqdm + - exactextract # testing - pytest - pytest-cov diff --git a/xvec/accessor.py b/xvec/accessor.py index f9d49a1..f2780af 100644 --- a/xvec/accessor.py +++ b/xvec/accessor.py @@ -11,7 +11,11 @@ from pyproj import CRS, Transformer from .index import GeometryIndex -from .zonal import _zonal_stats_iterative, _zonal_stats_rasterize +from .zonal import ( + _zonal_stats_exactextract, + _zonal_stats_iterative, + _zonal_stats_rasterize, +) if TYPE_CHECKING: from geopandas import GeoDataFrame @@ -1088,6 +1092,16 @@ def zonal_stats( n_jobs=n_jobs, **kwargs, ) + elif method == "exactextract": + result = _zonal_stats_exactextract( + self, + geometry=geometry, + x_coords=x_coords, + y_coords=y_coords, + stats=stats, + name=name, + **kwargs, + ) else: raise ValueError( f"method '{method}' is not supported. Allowed options are 'rasterize' " diff --git a/xvec/zonal.py b/xvec/zonal.py index 5ed8159..487c978 100644 --- a/xvec/zonal.py +++ b/xvec/zonal.py @@ -283,3 +283,102 @@ def _agg_geom( gc.collect() return result + + +def _zonal_stats_exactextract( + acc, + geometry: Sequence[shapely.Geometry], + x_coords: Hashable, + y_coords: Hashable, + stats: str | Callable | Sequence[str | Callable | tuple] = "mean", + name: str = "geometry", + **kwargs, +) -> xr.DataArray: + try: + import exactextract + except ImportError as err: + raise ImportError( + "The exactextract package is required for `zonal_stats()`. " + "You can install it using or 'pip install exactextract'." + ) from err + + try: + import geopandas as gpd + except ImportError as err: + raise ImportError( + "The geopandas package is required for `xvec.to_geodataframe()`. " + "You can install it using 'conda install -c conda-forge geopandas' or " + "'pip install geopandas'." + ) from err + + if hasattr(geometry, "crs"): + crs = geometry.crs # type: ignore + else: + crs = None + + # the input should be xarray.DataArray + if not isinstance(acc._obj, xr.core.dataarray.DataArray): + acc._obj = acc._obj.to_dataarray() + + # Get all the dimensions execpt x_coords, y_coords, they will be used to stack the dataarray later + arr_dims = tuple(dim for dim in acc._obj.dims if dim not in [x_coords, y_coords]) + + # Get the original information to use for unstacking the resulte later + coords_info = {name: geometry} + original_shape = [geometry.size] + for dim in arr_dims: + original_shape.append(acc._obj[dim].size) + coords_info[dim] = acc._obj[dim].values + + # Stack the other dimensions into one dimension called "location" + data = acc._obj.stack(location=arr_dims) + locs = data.location.size + + # Check the order of dimensions + data = data.transpose("location", y_coords, x_coords) + + # Aggregation result + stats = _prep_stats(stats) + results = exactextract.exact_extract( + rast=data, vec=gpd.GeoDataFrame(geometry), ops=stats, output="pandas" + ) + + # Unstack the results + agg = {} + i = 0 + for stat in stats: + df = results.iloc[:, i : i + locs] + + # Unstack the result + arr = df.values.reshape(original_shape) + result = xr.DataArray( + arr, coords=coords_info, dims=coords_info.keys() + ).xvec.set_geom_indexes(name, crs=crs) + + agg[stat] = result + + i += locs + + vec_cube = xr.concat( + agg.values(), + dim=xr.DataArray( + list(agg.keys()), name="zonal_statistics", dims="zonal_statistics" + ), + ) + + return vec_cube + + +def _prep_stats(stats): + if isinstance(stats, str): + stats = [stats] + + prepared_stats = [] + for stat in stats: + if isinstance(stat, str): + prepared_stats.append(stat) + else: + raise ValueError( + f'{stat} is not supported. It supports strings (e.g., "mean", "quantile(q=0.25)")' + ) + return prepared_stats From 114e119d1f03a9ae4ce58ef6261f9f5bed2822e0 Mon Sep 17 00:00:00 2001 From: Martin Fleischmann Date: Mon, 24 Jun 2024 15:31:44 +0200 Subject: [PATCH 02/16] try getting exactextract from PyPI dev0 --- ci/310.yaml | 4 +++- ci/311.yaml | 4 +++- ci/312.yaml | 4 +++- ci/39.yaml | 4 +++- ci/dev.yaml | 2 +- 5 files changed, 13 insertions(+), 5 deletions(-) diff --git a/ci/310.yaml b/ci/310.yaml index 6971a4f..5979c3b 100644 --- a/ci/310.yaml +++ b/ci/310.yaml @@ -11,7 +11,6 @@ dependencies: - rasterio - tqdm - pyproj - - exactextract # testing - pytest - pytest-cov @@ -20,4 +19,7 @@ dependencies: - geopandas-base - geodatasets - pyogrio + - pip + - pip: + - exactextract==0.2.0.dev0 diff --git a/ci/311.yaml b/ci/311.yaml index 361559a..1fe52c1 100644 --- a/ci/311.yaml +++ b/ci/311.yaml @@ -11,7 +11,6 @@ dependencies: - rasterio - tqdm - pyproj - - exactextract # testing - pytest - pytest-cov @@ -20,4 +19,7 @@ dependencies: - geopandas-base - geodatasets - pyogrio + - pip + - pip: + - exactextract==0.2.0.dev0 diff --git a/ci/312.yaml b/ci/312.yaml index 6820faf..24e8474 100644 --- a/ci/312.yaml +++ b/ci/312.yaml @@ -11,7 +11,6 @@ dependencies: - rasterio - tqdm - pyproj - - exactextract # testing - pytest - pytest-cov @@ -21,4 +20,7 @@ dependencies: - geodatasets - pyogrio - mypy + - pip + - pip: + - exactextract==0.2.0.dev0 diff --git a/ci/39.yaml b/ci/39.yaml index a0d3c47..7382d33 100644 --- a/ci/39.yaml +++ b/ci/39.yaml @@ -11,7 +11,6 @@ dependencies: - rasterio - tqdm - pyproj - - exactextract # testing - pytest - pytest-cov @@ -20,3 +19,6 @@ dependencies: - geopandas-base - geodatasets - pyogrio + - pip + - pip: + - exactextract==0.2.0.dev0 diff --git a/ci/dev.yaml b/ci/dev.yaml index 8c36ffe..57d4840 100644 --- a/ci/dev.yaml +++ b/ci/dev.yaml @@ -11,7 +11,6 @@ dependencies: - joblib - rasterio - tqdm - - exactextract # testing - pytest - pytest-cov @@ -25,3 +24,4 @@ dependencies: - git+https://github.com/shapely/shapely.git@main - git+https://github.com/pydata/xarray.git@main - git+https://github.com/pyproj4/pyproj.git + - exactextract==0.2.0.dev0 From 222be565dc9c4430abf66ee6c48bd9eed603b9c7 Mon Sep 17 00:00:00 2001 From: masawdah Date: Tue, 25 Jun 2024 11:05:17 +0200 Subject: [PATCH 03/16] fix errors --- xvec/zonal.py | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/xvec/zonal.py b/xvec/zonal.py index 487c978..abc5763 100644 --- a/xvec/zonal.py +++ b/xvec/zonal.py @@ -325,7 +325,7 @@ def _zonal_stats_exactextract( # Get the original information to use for unstacking the resulte later coords_info = {name: geometry} - original_shape = [geometry.size] + original_shape = [len(geometry)] for dim in arr_dims: original_shape.append(acc._obj[dim].size) coords_info[dim] = acc._obj[dim].values @@ -344,20 +344,18 @@ def _zonal_stats_exactextract( ) # Unstack the results - agg = {} - i = 0 - for stat in stats: - df = results.iloc[:, i : i + locs] - - # Unstack the result - arr = df.values.reshape(original_shape) - result = xr.DataArray( - arr, coords=coords_info, dims=coords_info.keys() - ).xvec.set_geom_indexes(name, crs=crs) - - agg[stat] = result - - i += locs + if pd.api.types.is_list_like(stats): + agg = {} + i = 0 + for stat in stats: + df = results.iloc[:, i : i + locs] + # Unstack the result + arr = df.values.reshape(original_shape) + result = xr.DataArray( + arr, coords=coords_info, dims=coords_info.keys() + ).xvec.set_geom_indexes(name, crs=crs) + agg[stat] = result + i += locs vec_cube = xr.concat( agg.values(), From 8c8e7435de162021f505c3c5cdf97153ef7039b9 Mon Sep 17 00:00:00 2001 From: masawdah Date: Tue, 25 Jun 2024 11:13:53 +0200 Subject: [PATCH 04/16] item iterable --- xvec/zonal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xvec/zonal.py b/xvec/zonal.py index abc5763..cecd708 100644 --- a/xvec/zonal.py +++ b/xvec/zonal.py @@ -347,7 +347,7 @@ def _zonal_stats_exactextract( if pd.api.types.is_list_like(stats): agg = {} i = 0 - for stat in stats: + for stat in stats: # type: ignore df = results.iloc[:, i : i + locs] # Unstack the result arr = df.values.reshape(original_shape) From 720a07e780e20c38df01663ecdf1b4322c4d9827 Mon Sep 17 00:00:00 2001 From: masawdah Date: Tue, 25 Jun 2024 11:15:41 +0200 Subject: [PATCH 05/16] item iterable --- xvec/zonal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xvec/zonal.py b/xvec/zonal.py index cecd708..ee4eabe 100644 --- a/xvec/zonal.py +++ b/xvec/zonal.py @@ -347,7 +347,7 @@ def _zonal_stats_exactextract( if pd.api.types.is_list_like(stats): agg = {} i = 0 - for stat in stats: # type: ignore + for stat in stats: # type: ignore df = results.iloc[:, i : i + locs] # Unstack the result arr = df.values.reshape(original_shape) From e6f6ebd684b1f487f20646761ad0b591ddd1d744 Mon Sep 17 00:00:00 2001 From: masawdah Date: Thu, 27 Jun 2024 12:34:59 +0200 Subject: [PATCH 06/16] pytest --- xvec/tests/test_zonal_stats.py | 123 +++++++++++++++++++++------------ xvec/zonal.py | 42 +++++------ 2 files changed, 93 insertions(+), 72 deletions(-) diff --git a/xvec/tests/test_zonal_stats.py b/xvec/tests/test_zonal_stats.py index 42e415e..11b8cb8 100644 --- a/xvec/tests/test_zonal_stats.py +++ b/xvec/tests/test_zonal_stats.py @@ -9,7 +9,7 @@ import xvec # noqa: F401 -@pytest.mark.parametrize("method", ["rasterize", "iterate"]) +@pytest.mark.parametrize("method", ["rasterize", "iterate", "exactextract"]) def test_structure(method): da = xr.DataArray( np.ones((10, 10, 5)), @@ -24,14 +24,22 @@ def test_structure(method): polygon2 = shapely.geometry.Polygon([(6, 22), (9, 22), (9, 29), (6, 26)]) polygons = gpd.GeoSeries([polygon1, polygon2], crs="EPSG:4326") - expected = xr.DataArray( - np.array([[12.0] * 5, [18.0] * 5]), - coords={ - "geometry": polygons, - "time": pd.date_range("2023-01-01", periods=5), - }, - ).xvec.set_geom_indexes("geometry", crs="EPSG:4326") - + if method == "exactextract": + expected = xr.DataArray( + np.array([[12.0] * 5, [16.5] * 5]), + coords={ + "geometry": polygons, + "time": pd.date_range("2023-01-01", periods=5), + }, + ).xvec.set_geom_indexes("geometry", crs="EPSG:4326") + else: + expected = xr.DataArray( + np.array([[12.0] * 5, [18.0] * 5]), + coords={ + "geometry": polygons, + "time": pd.date_range("2023-01-01", periods=5), + }, + ).xvec.set_geom_indexes("geometry", crs="EPSG:4326") actual = da.xvec.zonal_stats(polygons, "x", "y", stats="sum", method=method) xr.testing.assert_identical(actual, expected) @@ -43,35 +51,36 @@ def test_structure(method): ) # dataset - ds = da.to_dataset(name="test") - - expected_ds = expected.to_dataset(name="test").set_coords("geometry") - actual_ds = ds.xvec.zonal_stats(polygons, "x", "y", stats="sum", method=method) - xr.testing.assert_identical(actual_ds, expected_ds) - - actual_ix_ds = ds.xvec.zonal_stats( - polygons, "x", "y", stats="sum", method=method, index=True - ) - xr.testing.assert_identical( - actual_ix_ds, expected_ds.assign_coords({"index": ("geometry", polygons.index)}) - ) + if method == "rasterize" or method == "iterate": + ds = da.to_dataset(name="test") + expected_ds = expected.to_dataset(name="test").set_coords("geometry") + actual_ds = ds.xvec.zonal_stats(polygons, "x", "y", stats="sum", method=method) + xr.testing.assert_identical(actual_ds, expected_ds) + + actual_ix_ds = ds.xvec.zonal_stats( + polygons, "x", "y", stats="sum", method=method, index=True + ) + xr.testing.assert_identical( + actual_ix_ds, + expected_ds.assign_coords({"index": ("geometry", polygons.index)}), + ) - # named index - polygons.index.name = "my_index" - actual_ix_named = da.xvec.zonal_stats( - polygons, "x", "y", stats="sum", method=method - ) - xr.testing.assert_identical( - actual_ix_named, - expected.assign_coords({"my_index": ("geometry", polygons.index)}), - ) - actual_ix_names_ds = ds.xvec.zonal_stats( - polygons, "x", "y", stats="sum", method=method - ) - xr.testing.assert_identical( - actual_ix_names_ds, - expected_ds.assign_coords({"my_index": ("geometry", polygons.index)}), - ) + # named index + polygons.index.name = "my_index" + actual_ix_named = da.xvec.zonal_stats( + polygons, "x", "y", stats="sum", method=method + ) + xr.testing.assert_identical( + actual_ix_named, + expected.assign_coords({"my_index": ("geometry", polygons.index)}), + ) + actual_ix_names_ds = ds.xvec.zonal_stats( + polygons, "x", "y", stats="sum", method=method + ) + xr.testing.assert_identical( + actual_ix_names_ds, + expected_ds.assign_coords({"my_index": ("geometry", polygons.index)}), + ) def test_match(): @@ -105,7 +114,7 @@ def test_dataset(method): ) -@pytest.mark.parametrize("method", ["rasterize", "iterate"]) +@pytest.mark.parametrize("method", ["rasterize", "iterate", "exactextract"]) def test_dataarray(method): ds = xr.tutorial.open_dataset("eraint_uvz") world = gpd.read_file(geodatasets.get_path("naturalearth land")) @@ -115,10 +124,13 @@ def test_dataarray(method): assert result.shape == (127, 2, 3) assert result.dims == ("geometry", "month", "level") - assert result.mean() == pytest.approx(61367.76185577) + if method == "exactextract": + assert result.mean() == pytest.approx(61625.53438858) + else: + assert result.mean() == pytest.approx(61367.76185577) -@pytest.mark.parametrize("method", ["rasterize", "iterate"]) +@pytest.mark.parametrize("method", ["rasterize", "iterate", "exactextract"]) def test_stat(method): ds = xr.tutorial.open_dataset("eraint_uvz") world = gpd.read_file(geodatasets.get_path("naturalearth land")) @@ -129,13 +141,32 @@ def test_stat(method): median_ = ds.z.xvec.zonal_stats( world.geometry, "longitude", "latitude", method=method, stats="median" ) - quantile_ = ds.z.xvec.zonal_stats( - world.geometry, "longitude", "latitude", method=method, stats="quantile", q=0.2 - ) + if method == "exactextract": + quantile_ = ds.z.xvec.zonal_stats( + world.geometry, + "longitude", + "latitude", + method=method, + stats="quantile(q=0.33)", + ) + else: + quantile_ = ds.z.xvec.zonal_stats( + world.geometry, + "longitude", + "latitude", + method=method, + stats="quantile", + q=0.2, + ) - assert mean_.mean() == pytest.approx(61367.76185577) - assert median_.mean() == pytest.approx(61370.18563539) - assert quantile_.mean() == pytest.approx(61279.93619836) + if method == "exactextract": + assert mean_.mean() == pytest.approx(61625.53438858) + assert median_.mean() == pytest.approx(61628.67168691) + assert quantile_.mean() == pytest.approx(61576.0883029) + else: + assert mean_.mean() == pytest.approx(61367.76185577) + assert median_.mean() == pytest.approx(61370.18563539) + assert quantile_.mean() == pytest.approx(61279.93619836) @pytest.mark.parametrize("method", ["rasterize", "iterate"]) diff --git a/xvec/zonal.py b/xvec/zonal.py index ee4eabe..c34930b 100644 --- a/xvec/zonal.py +++ b/xvec/zonal.py @@ -338,10 +338,8 @@ def _zonal_stats_exactextract( data = data.transpose("location", y_coords, x_coords) # Aggregation result - stats = _prep_stats(stats) - results = exactextract.exact_extract( - rast=data, vec=gpd.GeoDataFrame(geometry), ops=stats, output="pandas" - ) + gdf = gpd.GeoDataFrame(geometry=geometry, crs=crs) + results = exactextract.exact_extract(rast=data, vec=gdf, ops=stats, output="pandas") # Unstack the results if pd.api.types.is_list_like(stats): @@ -356,27 +354,19 @@ def _zonal_stats_exactextract( ).xvec.set_geom_indexes(name, crs=crs) agg[stat] = result i += locs - - vec_cube = xr.concat( - agg.values(), - dim=xr.DataArray( - list(agg.keys()), name="zonal_statistics", dims="zonal_statistics" - ), - ) + vec_cube = xr.concat( + agg.values(), + dim=xr.DataArray( + list(agg.keys()), name="zonal_statistics", dims="zonal_statistics" + ), + ) + elif isinstance(stats, str): + # Unstack the result + arr = results.values.reshape(original_shape) + vec_cube = xr.DataArray( + arr, coords=coords_info, dims=coords_info.keys() + ).xvec.set_geom_indexes(name, crs=crs) + else: + raise ValueError(f"{stats} is not a valid aggregation for exactextract method.") return vec_cube - - -def _prep_stats(stats): - if isinstance(stats, str): - stats = [stats] - - prepared_stats = [] - for stat in stats: - if isinstance(stat, str): - prepared_stats.append(stat) - else: - raise ValueError( - f'{stat} is not supported. It supports strings (e.g., "mean", "quantile(q=0.25)")' - ) - return prepared_stats From 64c57f5dc380e57fc703bb4d8d572488a80e8ee3 Mon Sep 17 00:00:00 2001 From: Mohammad Alasawedah <61426508+masawdah@users.noreply.github.com> Date: Thu, 27 Jun 2024 12:48:42 +0200 Subject: [PATCH 07/16] Update xvec/tests/test_zonal_stats.py it was a typo. I used same values Co-authored-by: Martin Fleischmann --- xvec/tests/test_zonal_stats.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xvec/tests/test_zonal_stats.py b/xvec/tests/test_zonal_stats.py index 11b8cb8..e53cf5a 100644 --- a/xvec/tests/test_zonal_stats.py +++ b/xvec/tests/test_zonal_stats.py @@ -147,7 +147,7 @@ def test_stat(method): "longitude", "latitude", method=method, - stats="quantile(q=0.33)", + stats="quantile(q=0.2)", ) else: quantile_ = ds.z.xvec.zonal_stats( From 3f7b09f5b5742c4890f7af071bc4b5bb2ccbac0a Mon Sep 17 00:00:00 2001 From: masawdah Date: Thu, 27 Jun 2024 15:34:06 +0200 Subject: [PATCH 08/16] q20 --- xvec/tests/test_zonal_stats.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xvec/tests/test_zonal_stats.py b/xvec/tests/test_zonal_stats.py index 11b8cb8..a55d187 100644 --- a/xvec/tests/test_zonal_stats.py +++ b/xvec/tests/test_zonal_stats.py @@ -147,7 +147,7 @@ def test_stat(method): "longitude", "latitude", method=method, - stats="quantile(q=0.33)", + stats="quantile(q=0.2)", ) else: quantile_ = ds.z.xvec.zonal_stats( @@ -162,7 +162,7 @@ def test_stat(method): if method == "exactextract": assert mean_.mean() == pytest.approx(61625.53438858) assert median_.mean() == pytest.approx(61628.67168691) - assert quantile_.mean() == pytest.approx(61576.0883029) + assert quantile_.mean() == pytest.approx(61540.75632235) else: assert mean_.mean() == pytest.approx(61367.76185577) assert median_.mean() == pytest.approx(61370.18563539) From 806d0a829b4ffa3130da6b5735043f19542fc49c Mon Sep 17 00:00:00 2001 From: masawdah Date: Thu, 27 Jun 2024 18:15:10 +0200 Subject: [PATCH 09/16] doc --- doc/source/zonal_stats.ipynb | 6 +++--- xvec/accessor.py | 10 ++++++++-- xvec/zonal.py | 34 ++++++++++++++++++++++++++++++++-- 3 files changed, 43 insertions(+), 7 deletions(-) diff --git a/doc/source/zonal_stats.ipynb b/doc/source/zonal_stats.ipynb index 9a0ac80..42426fc 100644 --- a/doc/source/zonal_stats.ipynb +++ b/doc/source/zonal_stats.ipynb @@ -5218,7 +5218,7 @@ ], "metadata": { "kernelspec": { - "display_name": "xvec", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -5232,9 +5232,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.1" + "version": "3.11.9" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/xvec/accessor.py b/xvec/accessor.py index f2780af..69af615 100644 --- a/xvec/accessor.py +++ b/xvec/accessor.py @@ -962,7 +962,10 @@ def zonal_stats( available. Alternatively, you can pass a ``Callable`` supported by :meth:`~xarray.DataArray.reduce` or a list with ``strings``, ``callables`` or ``tuples`` in a ``(name, func, {kwargs})`` format, where ``func`` can be - a string or a callable. + a string or a callable. If the method is ``"exactextract"`` then the stats + should be string or list of strings that can be used + to construct :py:class:`Operation` objects supported + by :func:`exactextract.exact_extract` (e.g., ``"mean"``,``"quantile(q=0.20)"``) name : str, optional Name of the dimension that will hold the ``geometry``, by default "geometry" index : bool, optional @@ -978,7 +981,10 @@ def zonal_stats( information in case of small polygons or lines. Other option is ``"iterate"``, which iterates over geometries and uses :func:`rasterio.features.geometry_mask`. ``"iterate"`` method requires - ``joblib`` on top of ``rioxarray``. + ``joblib`` on top of ``rioxarray``. Other option is ``"exactextract"``, + which calculates precise stats by determining the fraction of each raster + cell that is covered by the polygon and uses + :func:`exactextract.exact_extract`. all_touched : bool, optional If True, all pixels touched by geometries will be considered. If False, only pixels whose center is within the polygon or that are selected by diff --git a/xvec/zonal.py b/xvec/zonal.py index c34930b..f09e42d 100644 --- a/xvec/zonal.py +++ b/xvec/zonal.py @@ -290,10 +290,40 @@ def _zonal_stats_exactextract( geometry: Sequence[shapely.Geometry], x_coords: Hashable, y_coords: Hashable, - stats: str | Callable | Sequence[str | Callable | tuple] = "mean", + stats: str | Sequence[str] = "mean", name: str = "geometry", - **kwargs, ) -> xr.DataArray: + """Extract the values from a dataset indexed by a set of geometries + + The CRS of the raster and that of geometry need to be equal. + Xvec does not verify their equality. + + Parameters + ---------- + geometry : Sequence[shapely.Geometry] + An arrray-like (1-D) of shapely geometries, like a numpy array or + :class:`geopandas.GeoSeries`. + x_coords : Hashable + name of the coordinates containing ``x`` coordinates (i.e. the first value + in the coordinate pair encoding the vertex of the polygon) + y_coords : Hashable + name of the coordinates containing ``y`` coordinates (i.e. the second value + in the coordinate pair encoding the vertex of the polygon) + stats : Hashable + Spatial aggregation statistic method, by default "mean". Any of the + strings that can be used to construct :py:class:`Operation` objects + supported by :func:`exactextract.exact_extract` (e.g., ``"mean"``, + ``"quantile(q=0.20)"``) + name : str, optional + Name of the dimension that will hold the ``geometry``, by default "geometry" + + Returns + ------- + DataArray + A subset of the original object with N-1 dimensions indexed by + the the GeometryIndex. + + """ try: import exactextract except ImportError as err: From c64e19a3a4df0b8254af4cb2142891a0e7349e21 Mon Sep 17 00:00:00 2001 From: masawdah Date: Fri, 28 Jun 2024 10:37:51 +0200 Subject: [PATCH 10/16] doc --- doc/source/zonal_stats.ipynb | 672 +++++++++++++++++++++++++++++++++++ xvec/accessor.py | 3 +- 2 files changed, 674 insertions(+), 1 deletion(-) diff --git a/doc/source/zonal_stats.ipynb b/doc/source/zonal_stats.ipynb index 42426fc..aba8617 100644 --- a/doc/source/zonal_stats.ipynb +++ b/doc/source/zonal_stats.ipynb @@ -4698,6 +4698,678 @@ "aggregated_iterative" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Another option is to use method=\"exactextract\", which is using exactextract.exact_extract. It provides a fast and accurate statistcs by determining the fraction of each pixel that is covered by the polygon. On the other hand, the aggregation options limited to be string or list of strings (e.g., \"mean\",\"sum\"), the quantile option should be in this pattern quantile(q=0.20)." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray (geometry: 127, variable: 3, month: 2, level: 3)> Size: 18kB\n",
+       "array([[[[ 1.10020109e+05,  5.02467617e+04,  1.18000068e+04],\n",
+       "         [ 1.04340867e+05,  4.81415742e+04,  1.09721250e+04]],\n",
+       "\n",
+       "        [[ 2.34193492e+00,  1.35657084e+00, -3.27713132e-01],\n",
+       "         [ 8.90285873e+00,  4.60525036e+00, -4.70965743e-01]],\n",
+       "\n",
+       "        [[ 4.43092197e-01,  2.10974604e-01,  4.24667656e-01],\n",
+       "         [ 3.05642295e+00,  2.40520573e+00,  1.84174263e+00]]],\n",
+       "\n",
+       "\n",
+       "       [[[ 1.10053125e+05,  5.00340469e+04,  1.15946436e+04],\n",
+       "         [ 1.03918828e+05,  4.75455391e+04,  1.03834092e+04]],\n",
+       "\n",
+       "        [[ 3.97441006e+00,  1.51359951e+00, -6.49608791e-01],\n",
+       "         [ 6.68128681e+00,  2.07990789e+00, -1.41006517e+00]],\n",
+       "\n",
+       "        [[-1.90807950e+00, -2.13966990e+00,  1.76289782e-01],\n",
+       "         [-4.23149252e+00, -3.24040627e+00, -1.06101977e-02]]],\n",
+       "\n",
+       "\n",
+       "...\n",
+       "\n",
+       "\n",
+       "       [[[ 1.06766375e+05,  4.93664219e+04,  1.24442344e+04],\n",
+       "         [ 1.14854156e+05,  5.36254414e+04,  1.36962217e+04]],\n",
+       "\n",
+       "        [[-4.01998013e-02, -1.99692643e+00, -1.63161933e+00],\n",
+       "         [ 8.49522471e-01,  1.41874683e+00,  5.10474503e-01]],\n",
+       "\n",
+       "        [[-3.72977763e-01,  7.79981986e-02, -1.80853570e+00],\n",
+       "         [-2.93938547e-01,  3.65662515e-01,  5.30625522e-01]]],\n",
+       "\n",
+       "\n",
+       "       [[[ 1.07647242e+05,  5.00465078e+04,  1.25338223e+04],\n",
+       "         [ 1.15183195e+05,  5.39953516e+04,  1.39444912e+04]],\n",
+       "\n",
+       "        [[ 7.12047434e+00,  3.38985515e+00,  3.39736611e-01],\n",
+       "         [ 2.80123234e+00,  2.39476514e+00,  8.82261574e-01]],\n",
+       "\n",
+       "        [[ 6.94990921e+00,  5.62016869e+00,  1.68521845e+00],\n",
+       "         [ 1.85186934e+00,  1.11626792e+00,  6.40948594e-01]]]])\n",
+       "Coordinates:\n",
+       "  * geometry  (geometry) object 1kB POLYGON ((-59.57209469261153 -80.04017872...\n",
+       "  * variable  (variable) object 24B 'z' 'u' 'v'\n",
+       "  * month     (month) int32 8B 1 7\n",
+       "  * level     (level) int32 12B 200 500 850\n",
+       "Indexes:\n",
+       "    geometry  GeometryIndex (crs=EPSG:4326)
" + ], + "text/plain": [ + " Size: 18kB\n", + "array([[[[ 1.10020109e+05, 5.02467617e+04, 1.18000068e+04],\n", + " [ 1.04340867e+05, 4.81415742e+04, 1.09721250e+04]],\n", + "\n", + " [[ 2.34193492e+00, 1.35657084e+00, -3.27713132e-01],\n", + " [ 8.90285873e+00, 4.60525036e+00, -4.70965743e-01]],\n", + "\n", + " [[ 4.43092197e-01, 2.10974604e-01, 4.24667656e-01],\n", + " [ 3.05642295e+00, 2.40520573e+00, 1.84174263e+00]]],\n", + "\n", + "\n", + " [[[ 1.10053125e+05, 5.00340469e+04, 1.15946436e+04],\n", + " [ 1.03918828e+05, 4.75455391e+04, 1.03834092e+04]],\n", + "\n", + " [[ 3.97441006e+00, 1.51359951e+00, -6.49608791e-01],\n", + " [ 6.68128681e+00, 2.07990789e+00, -1.41006517e+00]],\n", + "\n", + " [[-1.90807950e+00, -2.13966990e+00, 1.76289782e-01],\n", + " [-4.23149252e+00, -3.24040627e+00, -1.06101977e-02]]],\n", + "\n", + "\n", + "...\n", + "\n", + "\n", + " [[[ 1.06766375e+05, 4.93664219e+04, 1.24442344e+04],\n", + " [ 1.14854156e+05, 5.36254414e+04, 1.36962217e+04]],\n", + "\n", + " [[-4.01998013e-02, -1.99692643e+00, -1.63161933e+00],\n", + " [ 8.49522471e-01, 1.41874683e+00, 5.10474503e-01]],\n", + "\n", + " [[-3.72977763e-01, 7.79981986e-02, -1.80853570e+00],\n", + " [-2.93938547e-01, 3.65662515e-01, 5.30625522e-01]]],\n", + "\n", + "\n", + " [[[ 1.07647242e+05, 5.00465078e+04, 1.25338223e+04],\n", + " [ 1.15183195e+05, 5.39953516e+04, 1.39444912e+04]],\n", + "\n", + " [[ 7.12047434e+00, 3.38985515e+00, 3.39736611e-01],\n", + " [ 2.80123234e+00, 2.39476514e+00, 8.82261574e-01]],\n", + "\n", + " [[ 6.94990921e+00, 5.62016869e+00, 1.68521845e+00],\n", + " [ 1.85186934e+00, 1.11626792e+00, 6.40948594e-01]]]])\n", + "Coordinates:\n", + " * geometry (geometry) object 1kB POLYGON ((-59.57209469261153 -80.04017872...\n", + " * variable (variable) object 24B 'z' 'u' 'v'\n", + " * month (month) int32 8B 1 7\n", + " * level (level) int32 12B 200 500 850\n", + "Indexes:\n", + " geometry GeometryIndex (crs=EPSG:4326)" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "aggregated_iterative = ds.xvec.zonal_stats(\n", + " world.geometry,\n", + " x_coords=\"longitude\",\n", + " y_coords=\"latitude\",\n", + " method=\"exactextract\",\n", + ")\n", + "aggregated_iterative" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/xvec/accessor.py b/xvec/accessor.py index 69af615..5bae92f 100644 --- a/xvec/accessor.py +++ b/xvec/accessor.py @@ -988,7 +988,8 @@ def zonal_stats( all_touched : bool, optional If True, all pixels touched by geometries will be considered. If False, only pixels whose center is within the polygon or that are selected by - Bresenham’s line algorithm will be considered. + Bresenham’s line algorithm will be considered. Applies only if ``method="iterate"`` + or ``method="rasterize"``. n_jobs : int, optional Number of parallel threads to use. It is recommended to set this to the number of physical cores of the CPU. ``-1`` uses all available cores. From 5e0886ea9063df699e3b4d685c1a6e2966f9ecbe Mon Sep 17 00:00:00 2001 From: masawdah Date: Fri, 28 Jun 2024 10:46:05 +0200 Subject: [PATCH 11/16] expected type --- xvec/zonal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xvec/zonal.py b/xvec/zonal.py index f09e42d..ce26507 100644 --- a/xvec/zonal.py +++ b/xvec/zonal.py @@ -290,7 +290,7 @@ def _zonal_stats_exactextract( geometry: Sequence[shapely.Geometry], x_coords: Hashable, y_coords: Hashable, - stats: str | Sequence[str] = "mean", + stats: str | Callable | Sequence[str | Callable | tuple] = "mean", name: str = "geometry", ) -> xr.DataArray: """Extract the values from a dataset indexed by a set of geometries From b05ceb989e8754ce9479b3558de88a1a9d77c6ec Mon Sep 17 00:00:00 2001 From: Martin Fleischmann Date: Fri, 28 Jun 2024 11:45:54 +0200 Subject: [PATCH 12/16] minor docstring change --- doc/source/conf.py | 1 + xvec/accessor.py | 43 +++++++++++++++++++++++++++---------------- 2 files changed, 28 insertions(+), 16 deletions(-) diff --git a/doc/source/conf.py b/doc/source/conf.py index b266cc9..695a1c4 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -45,6 +45,7 @@ "geopandas": ("https://geopandas.org/en/latest", None), "pandas": ("https://pandas.pydata.org/docs", None), "rasterio": ("https://rasterio.readthedocs.io/en/latest/", None), + "exactextract": ("https://isciences.github.io/exactextract/", None), } # -- Options for HTML output ------------------------------------------------- diff --git a/xvec/accessor.py b/xvec/accessor.py index 5bae92f..30e5f4d 100644 --- a/xvec/accessor.py +++ b/xvec/accessor.py @@ -954,18 +954,21 @@ def zonal_stats( name of the coordinates containing ``y`` coordinates (i.e. the second value in the coordinate pair encoding the vertex of the polygon) stats : string | Callable | Sequence[str | Callable | tuple] - Spatial aggregation statistic method, by default "mean". Any of the - aggregations available as :class:`xarray.DataArray` or - :class:`xarray.DataArrayGroupBy` methods like + Spatial aggregation statistic method, by default ``"mean"``. + + Any of the aggregations available as :class:`xarray.DataArray` or + :class:`~xarray.core.groupby.DataArrayGroupBy` methods like :meth:`~xarray.DataArray.mean`, :meth:`~xarray.DataArray.min`, :meth:`~xarray.DataArray.max`, or :meth:`~xarray.DataArray.quantile` are available. Alternatively, you can pass a ``Callable`` supported by :meth:`~xarray.DataArray.reduce` or a list with ``strings``, ``callables`` or ``tuples`` in a ``(name, func, {kwargs})`` format, where ``func`` can be - a string or a callable. If the method is ``"exactextract"`` then the stats - should be string or list of strings that can be used - to construct :py:class:`Operation` objects supported - by :func:`exactextract.exact_extract` (e.g., ``"mean"``,``"quantile(q=0.20)"``) + a string or a callable. + + If the method is ``"exactextract"`` then the stats should be string or list + of strings that can be used to construct :class:`exactextract.Operation` + objects supported by :func:`exactextract.exact_extract` (e.g., ``"mean"``, + ``"quantile(q=0.20)"``). name : str, optional Name of the dimension that will hold the ``geometry``, by default "geometry" index : bool, optional @@ -976,15 +979,23 @@ def zonal_stats( will never be stored. This is useful as an attribute link between the resulting array and the GeoPandas object from which the geometry is sourced. method : str, optional - The method of data extraction. The default is ``"rasterize"``, which uses - :func:`rasterio.features.rasterize` and is faster, but can lead to loss of - information in case of small polygons or lines. Other option is - ``"iterate"``, which iterates over geometries and uses - :func:`rasterio.features.geometry_mask`. ``"iterate"`` method requires - ``joblib`` on top of ``rioxarray``. Other option is ``"exactextract"``, - which calculates precise stats by determining the fraction of each raster - cell that is covered by the polygon and uses - :func:`exactextract.exact_extract`. + The method of data extraction. + + ``"rasterize"`` + uses :func:`rasterio.features.rasterize` and is faster, but can lead to + loss of information in case of small polygons or lines. + + ``"iterate"`` + iterates over geometries and uses + :func:`rasterio.features.geometry_mask`. Requires ``joblib`` on top of + ``rioxarray``. + + ``"exactextract"`` + calculates precise stats by determining the fraction of each raster cell + that is covered by the polygon and uses + :func:`exactextract.exact_extract`. + + The default is ``"rasterize"``. all_touched : bool, optional If True, all pixels touched by geometries will be considered. If False, only pixels whose center is within the polygon or that are selected by From d017df402f4050172abc9c71fa2a39bdea1190b0 Mon Sep 17 00:00:00 2001 From: Martin Fleischmann Date: Fri, 28 Jun 2024 11:52:20 +0200 Subject: [PATCH 13/16] minor message changes --- pyproject.toml | 5 +++-- xvec/zonal.py | 14 +++++++++----- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 38684a0..77de409 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,7 +54,8 @@ exclude_lines = [ [tool.ruff] line-length = 88 -select = ["E", "F", "W", "I", "UP", "B", "A", "C4", "Q"] exclude = ["doc"] -target-version = "py39" + +[tool.ruff.lint] ignore = ['E501', 'Q000', 'Q001', 'Q002', 'Q003', 'W191'] +select = ["E", "F", "W", "I", "UP", "B", "A", "C4", "Q"] diff --git a/xvec/zonal.py b/xvec/zonal.py index ce26507..604d6a3 100644 --- a/xvec/zonal.py +++ b/xvec/zonal.py @@ -328,15 +328,16 @@ def _zonal_stats_exactextract( import exactextract except ImportError as err: raise ImportError( - "The exactextract package is required for `zonal_stats()`. " - "You can install it using or 'pip install exactextract'." + "The exactextract package is required for `zonal_stats()` with " + "method='exactextract'." ) from err try: import geopandas as gpd except ImportError as err: raise ImportError( - "The geopandas package is required for `xvec.to_geodataframe()`. " + "The geopandas package is required for `zonal_stats()` with " + "method='exactextract'. " "You can install it using 'conda install -c conda-forge geopandas' or " "'pip install geopandas'." ) from err @@ -350,7 +351,8 @@ def _zonal_stats_exactextract( if not isinstance(acc._obj, xr.core.dataarray.DataArray): acc._obj = acc._obj.to_dataarray() - # Get all the dimensions execpt x_coords, y_coords, they will be used to stack the dataarray later + # Get all the dimensions execpt x_coords, y_coords, they will be used to stack the + # dataarray later arr_dims = tuple(dim for dim in acc._obj.dims if dim not in [x_coords, y_coords]) # Get the original information to use for unstacking the resulte later @@ -397,6 +399,8 @@ def _zonal_stats_exactextract( arr, coords=coords_info, dims=coords_info.keys() ).xvec.set_geom_indexes(name, crs=crs) else: - raise ValueError(f"{stats} is not a valid aggregation for exactextract method.") + raise ValueError( + f"{stats} is not a valid aggregation for the exactextract method." + ) return vec_cube From ec28dbe252a9666667b1847a1c75796afcd9522c Mon Sep 17 00:00:00 2001 From: masawdah Date: Sat, 29 Jun 2024 14:20:17 +0200 Subject: [PATCH 14/16] i/o matches and pytests --- doc/source/zonal_stats.ipynb | 258 +++++++++++++++++---------------- xvec/tests/test_zonal_stats.py | 126 ++++++++++------ xvec/zonal.py | 194 ++++++++++++++++++------- 3 files changed, 358 insertions(+), 220 deletions(-) diff --git a/doc/source/zonal_stats.ipynb b/doc/source/zonal_stats.ipynb index aba8617..4f21f22 100644 --- a/doc/source/zonal_stats.ipynb +++ b/doc/source/zonal_stats.ipynb @@ -5076,95 +5076,18 @@ " stroke: currentColor;\n", " fill: currentColor;\n", "}\n", - "
<xarray.DataArray (geometry: 127, variable: 3, month: 2, level: 3)> Size: 18kB\n",
-       "array([[[[ 1.10020109e+05,  5.02467617e+04,  1.18000068e+04],\n",
-       "         [ 1.04340867e+05,  4.81415742e+04,  1.09721250e+04]],\n",
-       "\n",
-       "        [[ 2.34193492e+00,  1.35657084e+00, -3.27713132e-01],\n",
-       "         [ 8.90285873e+00,  4.60525036e+00, -4.70965743e-01]],\n",
-       "\n",
-       "        [[ 4.43092197e-01,  2.10974604e-01,  4.24667656e-01],\n",
-       "         [ 3.05642295e+00,  2.40520573e+00,  1.84174263e+00]]],\n",
-       "\n",
-       "\n",
-       "       [[[ 1.10053125e+05,  5.00340469e+04,  1.15946436e+04],\n",
-       "         [ 1.03918828e+05,  4.75455391e+04,  1.03834092e+04]],\n",
-       "\n",
-       "        [[ 3.97441006e+00,  1.51359951e+00, -6.49608791e-01],\n",
-       "         [ 6.68128681e+00,  2.07990789e+00, -1.41006517e+00]],\n",
-       "\n",
-       "        [[-1.90807950e+00, -2.13966990e+00,  1.76289782e-01],\n",
-       "         [-4.23149252e+00, -3.24040627e+00, -1.06101977e-02]]],\n",
-       "\n",
-       "\n",
-       "...\n",
-       "\n",
-       "\n",
-       "       [[[ 1.06766375e+05,  4.93664219e+04,  1.24442344e+04],\n",
-       "         [ 1.14854156e+05,  5.36254414e+04,  1.36962217e+04]],\n",
-       "\n",
-       "        [[-4.01998013e-02, -1.99692643e+00, -1.63161933e+00],\n",
-       "         [ 8.49522471e-01,  1.41874683e+00,  5.10474503e-01]],\n",
-       "\n",
-       "        [[-3.72977763e-01,  7.79981986e-02, -1.80853570e+00],\n",
-       "         [-2.93938547e-01,  3.65662515e-01,  5.30625522e-01]]],\n",
-       "\n",
-       "\n",
-       "       [[[ 1.07647242e+05,  5.00465078e+04,  1.25338223e+04],\n",
-       "         [ 1.15183195e+05,  5.39953516e+04,  1.39444912e+04]],\n",
-       "\n",
-       "        [[ 7.12047434e+00,  3.38985515e+00,  3.39736611e-01],\n",
-       "         [ 2.80123234e+00,  2.39476514e+00,  8.82261574e-01]],\n",
-       "\n",
-       "        [[ 6.94990921e+00,  5.62016869e+00,  1.68521845e+00],\n",
-       "         [ 1.85186934e+00,  1.11626792e+00,  6.40948594e-01]]]])\n",
+       "
<xarray.Dataset> Size: 19kB\n",
+       "Dimensions:   (geometry: 127, month: 2, level: 3)\n",
        "Coordinates:\n",
        "  * geometry  (geometry) object 1kB POLYGON ((-59.57209469261153 -80.04017872...\n",
-       "  * variable  (variable) object 24B 'z' 'u' 'v'\n",
        "  * month     (month) int32 8B 1 7\n",
        "  * level     (level) int32 12B 200 500 850\n",
+       "Data variables:\n",
+       "    z         (geometry, month, level) float64 6kB 1.1e+05 ... 1.394e+04\n",
+       "    u         (geometry, month, level) float64 6kB 2.342 1.357 ... 2.395 0.8823\n",
+       "    v         (geometry, month, level) float64 6kB 0.4431 0.211 ... 1.116 0.6409\n",
        "Indexes:\n",
-       "    geometry  GeometryIndex (crs=EPSG:4326)
  • month
    (month)
    int32
    1 7
    array([1, 7], dtype=int32)
  • level
    (level)
    int32
    200 500 850
    array([200, 500, 850], dtype=int32)
    • z
      (geometry, month, level)
      float64
      1.1e+05 5.025e+04 ... 1.394e+04
      array([[[110020.109375  ,  50246.76171875,  11800.00683594],\n",
      +       "        [104340.8671875 ,  48141.57421875,  10972.125     ]],\n",
              "\n",
      -       "        [[ 2.34193492e+00,  1.35657084e+00, -3.27713132e-01],\n",
      -       "         [ 8.90285873e+00,  4.60525036e+00, -4.70965743e-01]],\n",
      +       "       [[110053.125     ,  50034.046875  ,  11594.64355469],\n",
      +       "        [103918.828125  ,  47545.5390625 ,  10383.40917969]],\n",
              "\n",
      -       "        [[ 4.43092197e-01,  2.10974604e-01,  4.24667656e-01],\n",
      -       "         [ 3.05642295e+00,  2.40520573e+00,  1.84174263e+00]]],\n",
      +       "       [[110025.828125  ,  50273.4609375 ,  11804.21582031],\n",
      +       "        [104307.8515625 ,  48103.984375  ,  10926.046875  ]],\n",
      +       "\n",
      +       "       [[110479.078125  ,  50396.87890625,  11584.13671875],\n",
      +       "        [105361.328125  ,  48484.23046875,  10719.97363281]],\n",
      +       "\n",
      +       "       [[110513.9609375 ,  50396.921875  ,  11566.21191406],\n",
      +       "        [105312.8359375 ,  48421.27734375,  10649.96875   ]],\n",
      +       "\n",
      +       "       [[110415.2890625 ,  50324.73046875,  11450.27148438],\n",
      +       "        [105943.171875  ,  48915.36328125,  10910.06445312]],\n",
      +       "\n",
      +       "       [[110451.359375  ,  50507.41796875,  11628.30371094],\n",
      +       "        [106079.1796875 ,  49139.50390625,  11209.51171875]],\n",
      +       "...\n",
      +       "       [[107528.7890625 ,  50089.3671875 ,  12434.04003906],\n",
      +       "        [115396.2265625 ,  53958.40625   ,  13785.15429688]],\n",
              "\n",
      +       "       [[107312.2890625 ,  49961.9453125 ,  12440.63574219],\n",
      +       "        [115326.390625  ,  53867.3359375 ,  13746.83496094]],\n",
              "\n",
      -       "       [[[ 1.10053125e+05,  5.00340469e+04,  1.15946436e+04],\n",
      -       "         [ 1.03918828e+05,  4.75455391e+04,  1.03834092e+04]],\n",
      +       "       [[107058.1640625 ,  49860.69140625,  12479.18066406],\n",
      +       "        [115364.203125  ,  53842.44140625,  13706.61132812]],\n",
              "\n",
      -       "        [[ 3.97441006e+00,  1.51359951e+00, -6.49608791e-01],\n",
      -       "         [ 6.68128681e+00,  2.07990789e+00, -1.41006517e+00]],\n",
      +       "       [[106813.953125  ,  49799.734375  ,  12740.109375  ],\n",
      +       "        [115426.109375  ,  53817.6875    ,  13634.37402344]],\n",
              "\n",
      -       "        [[-1.90807950e+00, -2.13966990e+00,  1.76289782e-01],\n",
      -       "         [-4.23149252e+00, -3.24040627e+00, -1.06101977e-02]]],\n",
      +       "       [[106817.828125  ,  49363.7265625 ,  12472.28125   ],\n",
      +       "        [114880.6875    ,  53629.37890625,  13687.93554688]],\n",
              "\n",
      +       "       [[106766.375     ,  49366.421875  ,  12444.234375  ],\n",
      +       "        [114854.15625   ,  53625.44140625,  13696.22167969]],\n",
              "\n",
      +       "       [[107647.2421875 ,  50046.5078125 ,  12533.82226562],\n",
      +       "        [115183.1953125 ,  53995.3515625 ,  13944.49121094]]])
    • u
      (geometry, month, level)
      float64
      2.342 1.357 ... 2.395 0.8823
      array([[[ 2.34193492e+00,  1.35657084e+00, -3.27713132e-01],\n",
      +       "        [ 8.90285873e+00,  4.60525036e+00, -4.70965743e-01]],\n",
      +       "\n",
      +       "       [[ 3.97441006e+00,  1.51359951e+00, -6.49608791e-01],\n",
      +       "        [ 6.68128681e+00,  2.07990789e+00, -1.41006517e+00]],\n",
      +       "\n",
      +       "       [[ 2.56818128e+00,  1.79067111e+00, -3.20575655e-01],\n",
      +       "        [ 8.18799973e+00,  4.35199261e+00, -1.11983800e+00]],\n",
      +       "\n",
      +       "       [[ 4.53174543e+00,  5.20927548e-01, -4.72114563e+00],\n",
      +       "        [ 1.35154982e+01,  6.19246292e+00, -3.12192583e+00]],\n",
      +       "\n",
      +       "       [[ 4.96762800e+00,  8.81335557e-01, -3.80363226e+00],\n",
      +       "        [ 1.34584637e+01,  6.20137882e+00, -2.26881528e+00]],\n",
      +       "\n",
      +       "       [[ 3.43025708e+00, -6.76046126e-03, -3.22698545e+00],\n",
      +       "        [ 1.53262663e+01,  8.24850082e+00,  1.42534280e+00]],\n",
      +       "\n",
      +       "       [[ 4.05351067e+00,  1.83648229e+00, -3.97203296e-01],\n",
      +       "        [ 1.48342266e+01,  8.78393173e+00,  2.65177608e+00]],\n",
              "...\n",
      +       "       [[ 9.07420349e+00,  5.77803946e+00,  2.15175703e-01],\n",
      +       "        [ 5.28493357e+00,  4.85417986e+00,  2.12207770e+00]],\n",
      +       "\n",
      +       "       [[ 7.26792383e+00,  4.56520796e+00,  2.53050655e-01],\n",
      +       "        [ 5.36358070e+00,  5.04043388e+00,  2.41655588e+00]],\n",
      +       "\n",
      +       "       [[ 3.95153522e+00,  2.41996074e+00, -5.82089484e-01],\n",
      +       "        [ 6.19031620e+00,  5.77235317e+00,  2.67356610e+00]],\n",
      +       "\n",
      +       "       [[ 3.11530083e-01,  5.64418375e-01,  3.75999868e-01],\n",
      +       "        [ 6.57652712e+00,  5.32102251e+00,  1.92829692e+00]],\n",
      +       "\n",
      +       "       [[ 5.19281566e-01, -1.88856721e+00, -1.28003442e+00],\n",
      +       "        [ 1.36903751e+00,  1.48895967e+00,  3.69474202e-01]],\n",
              "\n",
      +       "       [[-4.01998013e-02, -1.99692643e+00, -1.63161933e+00],\n",
      +       "        [ 8.49522471e-01,  1.41874683e+00,  5.10474503e-01]],\n",
              "\n",
      -       "       [[[ 1.06766375e+05,  4.93664219e+04,  1.24442344e+04],\n",
      -       "         [ 1.14854156e+05,  5.36254414e+04,  1.36962217e+04]],\n",
      +       "       [[ 7.12047434e+00,  3.38985515e+00,  3.39736611e-01],\n",
      +       "        [ 2.80123234e+00,  2.39476514e+00,  8.82261574e-01]]])
    • v
      (geometry, month, level)
      float64
      0.4431 0.211 ... 1.116 0.6409
      array([[[ 4.43092197e-01,  2.10974604e-01,  4.24667656e-01],\n",
      +       "        [ 3.05642295e+00,  2.40520573e+00,  1.84174263e+00]],\n",
              "\n",
      -       "        [[-4.01998013e-02, -1.99692643e+00, -1.63161933e+00],\n",
      -       "         [ 8.49522471e-01,  1.41874683e+00,  5.10474503e-01]],\n",
      +       "       [[-1.90807950e+00, -2.13966990e+00,  1.76289782e-01],\n",
      +       "        [-4.23149252e+00, -3.24040627e+00, -1.06101977e-02]],\n",
              "\n",
      -       "        [[-3.72977763e-01,  7.79981986e-02, -1.80853570e+00],\n",
      -       "         [-2.93938547e-01,  3.65662515e-01,  5.30625522e-01]]],\n",
      +       "       [[ 5.48496842e-01, -4.10166383e-01, -3.15844744e-01],\n",
      +       "        [ 3.99047494e+00,  2.30697107e+00,  3.41612220e-01]],\n",
              "\n",
      +       "       [[ 1.39766645e+00,  1.79747254e-01,  2.12549314e-01],\n",
      +       "        [-3.53505230e+00, -3.34392905e+00, -1.93660402e+00]],\n",
              "\n",
      -       "       [[[ 1.07647242e+05,  5.00465078e+04,  1.25338223e+04],\n",
      -       "         [ 1.15183195e+05,  5.39953516e+04,  1.39444912e+04]],\n",
      +       "       [[ 1.19860590e+00, -1.65305912e-01, -3.74669969e-01],\n",
      +       "        [-3.77237749e+00, -3.46171308e+00, -2.43329430e+00]],\n",
              "\n",
      -       "        [[ 7.12047434e+00,  3.38985515e+00,  3.39736611e-01],\n",
      -       "         [ 2.80123234e+00,  2.39476514e+00,  8.82261574e-01]],\n",
      +       "       [[ 1.40259528e+00,  6.83697879e-01, -5.02627611e-01],\n",
      +       "        [-1.05609059e+00, -1.56556702e+00, -1.95661092e+00]],\n",
              "\n",
      -       "        [[ 6.94990921e+00,  5.62016869e+00,  1.68521845e+00],\n",
      -       "         [ 1.85186934e+00,  1.11626792e+00,  6.40948594e-01]]]])\n",
      +       "       [[-7.31962383e-01, -2.59934020e+00, -3.20754123e+00],\n",
      +       "        [ 2.75555968e+00,  3.63321155e-01, -3.71282434e+00]],\n",
      +       "...\n",
      +       "       [[-2.47861218e+00, -8.90987575e-01,  8.10488880e-01],\n",
      +       "        [ 2.44103074e+00,  1.43623161e+00,  6.64893866e-01]],\n",
      +       "\n",
      +       "       [[-2.65672231e+00, -9.47959900e-01,  3.34913373e-01],\n",
      +       "        [ 1.98191726e+00,  9.21227872e-01, -2.26546843e-02]],\n",
      +       "\n",
      +       "       [[-3.33670902e+00, -9.41135287e-01,  1.03623021e+00],\n",
      +       "        [ 6.39158905e-01, -5.63489273e-03, -6.24475300e-01]],\n",
      +       "\n",
      +       "       [[-4.83183265e-02,  5.21117091e-01,  2.55110049e+00],\n",
      +       "        [-2.42682129e-01, -7.05043674e-01, -5.86881876e-01]],\n",
      +       "\n",
      +       "       [[-3.65644240e+00, -2.89416933e+00, -2.24559069e+00],\n",
      +       "        [-1.49572217e+00, -6.49760664e-01,  7.08692014e-01]],\n",
      +       "\n",
      +       "       [[-3.72977763e-01,  7.79981986e-02, -1.80853570e+00],\n",
      +       "        [-2.93938547e-01,  3.65662515e-01,  5.30625522e-01]],\n",
      +       "\n",
      +       "       [[ 6.94990921e+00,  5.62016869e+00,  1.68521845e+00],\n",
      +       "        [ 1.85186934e+00,  1.11626792e+00,  6.40948594e-01]]])
    • month
      PandasIndex
      PandasIndex(Index([1, 7], dtype='int32', name='month'))
    • level
      PandasIndex
      PandasIndex(Index([200, 500, 850], dtype='int32', name='level'))
    • geometry
      GeometryIndex (crs=EPSG:4326)
      GeometryIndex(\n",
      +       "    [<POLYGON ((-59.572 -80.04, -59.866 -80.55, -60.16 -81, -62.255 -80.863, -64....>\n",
      +       "     <POLYGON ((-159.208 -79.497, -161.128 -79.634, -162.44 -79.281, -163.027 -78...>\n",
      +       "     <POLYGON ((-45.155 -78.047, -43.921 -78.478, -43.49 -79.086, -43.372 -79.517...>\n",
      +       "     <POLYGON ((-121.212 -73.501, -119.919 -73.658, -118.724 -73.481, -119.292 -7...>\n",
      +       "     ...\n",
      +       "     <POLYGON ((99.94 78.881, 97.758 78.756, 94.973 79.045, 93.313 79.427, 92.545...>\n",
      +       "     <POLYGON ((-87.02 79.66, -85.814 79.337, -87.188 79.039, -89.035 78.287, -90...>\n",
      +       "     <POLYGON ((-68.5 83.106, -65.827 83.028, -63.68 82.9, -61.85 82.629, -61.894...>\n",
      +       "     <POLYGON ((-27.1 83.52, -20.845 82.727, -22.692 82.342, -26.518 82.298, -31....>],\n",
      +       "    crs=EPSG:4326)
  • " + ], + "text/plain": [ + " Size: 19kB\n", + "Dimensions: (geometry: 127, month: 2, level: 3)\n", "Coordinates:\n", " * geometry (geometry) object 1kB POLYGON ((-59.57209469261153 -80.04017872...\n", - " * variable (variable) object 24B 'z' 'u' 'v'\n", " * month (month) int32 8B 1 7\n", " * level (level) int32 12B 200 500 850\n", + "Data variables:\n", + " z (geometry, month, level) float64 6kB 1.1e+05 ... 1.394e+04\n", + " u (geometry, month, level) float64 6kB 2.342 1.357 ... 2.395 0.8823\n", + " v (geometry, month, level) float64 6kB 0.4431 0.211 ... 1.116 0.6409\n", "Indexes:\n", " geometry GeometryIndex (crs=EPSG:4326)" ] @@ -5890,9 +5896,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "xvec_dev", "language": "python", - "name": "python3" + "name": "xvec_dev" }, "language_info": { "codemirror_mode": { @@ -5904,7 +5910,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.9" + "version": "3.12.3" } }, "nbformat": 4, diff --git a/xvec/tests/test_zonal_stats.py b/xvec/tests/test_zonal_stats.py index a55d187..c46aaf3 100644 --- a/xvec/tests/test_zonal_stats.py +++ b/xvec/tests/test_zonal_stats.py @@ -96,22 +96,34 @@ def test_match(): xr.testing.assert_allclose(rasterize, iterate) -@pytest.mark.parametrize("method", ["rasterize", "iterate"]) +@pytest.mark.parametrize("method", ["rasterize", "iterate", "exactextract"]) def test_dataset(method): ds = xr.tutorial.open_dataset("eraint_uvz") world = gpd.read_file(geodatasets.get_path("naturalearth land")) result = ds.xvec.zonal_stats(world.geometry, "longitude", "latitude", method=method) - xr.testing.assert_allclose( - xr.Dataset( - { - "z": np.array(61367.76185577), - "u": np.array(4.19631497), - "v": np.array(-0.49170332), - } - ), - result.mean(), - ) + if method == "exactextract": + xr.testing.assert_allclose( + xr.Dataset( + { + "z": np.array(61625.53438858), + "u": np.array(4.15009377), + "v": np.array(-0.5161478), + } + ), + result.mean(), + ) + else: + xr.testing.assert_allclose( + xr.Dataset( + { + "z": np.array(61367.76185577), + "u": np.array(4.19631497), + "v": np.array(-0.49170332), + } + ), + result.mean(), + ) @pytest.mark.parametrize("method", ["rasterize", "iterate", "exactextract"]) @@ -238,8 +250,15 @@ def test_crs(method): }, ).xvec.set_geom_indexes("geometry", crs=None) - actual = da.xvec.zonal_stats(polygons, "x", "y", stats="sum", method=method) - xr.testing.assert_identical(actual, expected) + if method == "exactextract": + with pytest.raises( + AttributeError, + match="Geometry input does not have a Coordinate Reference System", + ): + da.xvec.zonal_stats(polygons, "x", "y", stats="sum", method=method) + else: + actual = da.xvec.zonal_stats(polygons, "x", "y", stats="sum", method=method) + xr.testing.assert_identical(actual, expected) @pytest.mark.parametrize("method", ["rasterize", "iterate"]) @@ -268,40 +287,65 @@ def test_callable(method): xr.testing.assert_identical(da_agg, da_std) -@pytest.mark.parametrize("method", ["rasterize", "iterate"]) +@pytest.mark.parametrize("method", ["rasterize", "iterate", "exactextract"]) def test_multiple(method): ds = xr.tutorial.open_dataset("eraint_uvz") world = gpd.read_file(geodatasets.get_path("naturalearth land")) - result = ds.xvec.zonal_stats( - world.geometry[:10].boundary, - "longitude", - "latitude", - stats=[ - "mean", - "sum", - ("quantile", "quantile", {"q": [0.1, 0.2, 0.3]}), - ("numpymean", np.nanmean), - np.nanmean, - ], - method=method, - n_jobs=1, - ) - assert sorted(result.dims) == sorted( - [ - "level", - "zonal_statistics", - "geometry", - "month", - "quantile", - ] - ) + if method == "exactextract": + result = ds.xvec.zonal_stats( + world.geometry[:10].boundary, + "longitude", + "latitude", + stats=[ + "mean", + "sum", + "quantile(q=0.20)", + ], + method=method, + n_jobs=1, + ) + assert sorted(result.dims) == sorted( + [ + "level", + "zonal_statistics", + "geometry", + "month", + ] + ) + + assert (result.zonal_statistics == ["mean", "sum", "quantile(q=0.20)"]).all() + else: + result = ds.xvec.zonal_stats( + world.geometry[:10].boundary, + "longitude", + "latitude", + stats=[ + "mean", + "sum", + ("quantile", "quantile", {"q": [0.1, 0.2, 0.3]}), + ("numpymean", np.nanmean), + np.nanmean, + ], + method=method, + n_jobs=1, + ) + assert sorted(result.dims) == sorted( + [ + "level", + "zonal_statistics", + "geometry", + "month", + "quantile", + ] + ) - assert ( - result.zonal_statistics == ["mean", "sum", "quantile", "numpymean", "nanmean"] - ).all() + assert ( + result.zonal_statistics + == ["mean", "sum", "quantile", "numpymean", "nanmean"] + ).all() -@pytest.mark.parametrize("method", ["rasterize", "iterate"]) +@pytest.mark.parametrize("method", ["rasterize", "iterate", "exactextract"]) def test_invalid(method): ds = xr.tutorial.open_dataset("eraint_uvz") world = gpd.read_file(geodatasets.get_path("naturalearth land")) diff --git a/xvec/zonal.py b/xvec/zonal.py index 604d6a3..d8ed2ca 100644 --- a/xvec/zonal.py +++ b/xvec/zonal.py @@ -292,7 +292,7 @@ def _zonal_stats_exactextract( y_coords: Hashable, stats: str | Callable | Sequence[str | Callable | tuple] = "mean", name: str = "geometry", -) -> xr.DataArray: +) -> xr.DataArray | xr.Dataset: """Extract the values from a dataset indexed by a set of geometries The CRS of the raster and that of geometry need to be equal. @@ -319,71 +319,53 @@ def _zonal_stats_exactextract( Returns ------- - DataArray + Dataset | DataArray A subset of the original object with N-1 dimensions indexed by the the GeometryIndex. """ - try: - import exactextract - except ImportError as err: - raise ImportError( - "The exactextract package is required for `zonal_stats()` with " - "method='exactextract'." - ) from err - - try: - import geopandas as gpd - except ImportError as err: - raise ImportError( - "The geopandas package is required for `zonal_stats()` with " - "method='exactextract'. " - "You can install it using 'conda install -c conda-forge geopandas' or " - "'pip install geopandas'." - ) from err - if hasattr(geometry, "crs"): crs = geometry.crs # type: ignore else: - crs = None + raise AttributeError( + "Geometry input does not have a Coordinate Reference System (CRS)." + ) # the input should be xarray.DataArray + original_is_ds = False if not isinstance(acc._obj, xr.core.dataarray.DataArray): + original_ds = acc._obj acc._obj = acc._obj.to_dataarray() - - # Get all the dimensions execpt x_coords, y_coords, they will be used to stack the - # dataarray later - arr_dims = tuple(dim for dim in acc._obj.dims if dim not in [x_coords, y_coords]) - - # Get the original information to use for unstacking the resulte later - coords_info = {name: geometry} - original_shape = [len(geometry)] - for dim in arr_dims: - original_shape.append(acc._obj[dim].size) - coords_info[dim] = acc._obj[dim].values - - # Stack the other dimensions into one dimension called "location" - data = acc._obj.stack(location=arr_dims) - locs = data.location.size - - # Check the order of dimensions - data = data.transpose("location", y_coords, x_coords) - - # Aggregation result - gdf = gpd.GeoDataFrame(geometry=geometry, crs=crs) - results = exactextract.exact_extract(rast=data, vec=gdf, ops=stats, output="pandas") + original_is_ds = True # Unstack the results + agg = {} if pd.api.types.is_list_like(stats): - agg = {} + for stat in stats: + if not isinstance(stat, str): + raise ValueError(f"{stat} is not a valid aggregation.") + + results, original_shape, coords_info, locs = _agg_exactextract( + acc, geometry, crs, x_coords, y_coords, stats, name, original_is_ds + ) i = 0 for stat in stats: # type: ignore df = results.iloc[:, i : i + locs] # Unstack the result arr = df.values.reshape(original_shape) - result = xr.DataArray( - arr, coords=coords_info, dims=coords_info.keys() - ).xvec.set_geom_indexes(name, crs=crs) + if original_is_ds is True: + data_vars = {} + for idx, data_var in enumerate(acc._obj["variable"].values): + data_vars[data_var] = (coords_info.keys(), arr[:, idx]) + result = xr.Dataset( + data_vars=data_vars, + coords=coords_info, + ).xvec.set_geom_indexes(name, crs=crs) + else: + result = xr.DataArray( + arr, coords=coords_info, dims=coords_info.keys() + ).xvec.set_geom_indexes(name, crs=crs) + agg[stat] = result i += locs vec_cube = xr.concat( @@ -393,14 +375,120 @@ def _zonal_stats_exactextract( ), ) elif isinstance(stats, str): + results, original_shape, coords_info, _ = _agg_exactextract( + acc, geometry, crs, x_coords, y_coords, stats, name, original_is_ds + ) # Unstack the result arr = results.values.reshape(original_shape) - vec_cube = xr.DataArray( - arr, coords=coords_info, dims=coords_info.keys() - ).xvec.set_geom_indexes(name, crs=crs) + if original_is_ds is True: + data_vars = {} + for idx, data_var in enumerate(acc._obj["variable"].values): + data_vars[data_var] = (coords_info.keys(), arr[:, idx]) + vec_cube = xr.Dataset( + data_vars=data_vars, + coords=coords_info, + ).xvec.set_geom_indexes(name, crs=crs) + else: + vec_cube = xr.DataArray( + arr, coords=coords_info, dims=coords_info.keys() + ).xvec.set_geom_indexes(name, crs=crs) else: - raise ValueError( - f"{stats} is not a valid aggregation for the exactextract method." - ) + raise ValueError(f"{stats} is not a valid aggregation.") + + if original_is_ds: + acc._obj = original_ds return vec_cube + + +def _agg_exactextract( + acc, + geometry, + crs, + x_coords: str | None = None, + y_coords: str | None = None, + stats: str | Callable | Iterable[str | Callable | tuple] = "mean", + name: str = "geometry", + original_is_ds: bool = False, +): + """Extract the values from a dataset indexed by a set of geometries + + The CRS of the raster and that of geometry need to be equal. + Xvec does not verify their equality. + + Parameters + ---------- + geometry : Sequence[shapely.Geometry] + An arrray-like (1-D) of shapely geometries, like a numpy array or + :class:`geopandas.GeoSeries`. + crs : Coordinate Reference System of the geometry objects. + x_coords : Hashable + name of the coordinates containing ``x`` coordinates (i.e. the first value + in the coordinate pair encoding the vertex of the polygon) + y_coords : Hashable + name of the coordinates containing ``y`` coordinates (i.e. the second value + in the coordinate pair encoding the vertex of the polygon) + stats : Hashable + Spatial aggregation statistic method, by default "mean". Any of the + strings that can be used to construct :py:class:`Operation` objects + supported by :func:`exactextract.exact_extract` (e.g., ``"mean"``, + ``"quantile(q=0.20)"``) + name : str, optional + Name of the dimension that will hold the ``geometry``, by default "geometry" + original_is_ds : bool + If True, all pixels touched by geometries will be considered. If False, only + pixels whose center is within the polygon or that are selected by + Bresenham’s line algorithm will be considered. + + Returns + ------- + pandas.DataFrame + Aggregated values over the geometry. + + """ + try: + import exactextract + except ImportError as err: + raise ImportError( + "The exactextract package is required for `zonal_stats()` with " + "method='exactextract'." + ) from err + try: + import geopandas as gpd + except ImportError as err: + raise ImportError( + "The geopandas package is required for `zonal_stats()` with " + "method='exactextract'. " + "You can install it using 'conda install -c conda-forge geopandas' or " + "'pip install geopandas'." + ) from err + + # Stack the other dimensions into one dimension called "location" + arr_dims = tuple(dim for dim in acc._obj.dims if dim not in [x_coords, y_coords]) + data = acc._obj.stack(location=arr_dims) + locs = data.location.size + + # Check the order of dimensions + data = data.transpose("location", y_coords, x_coords) + + # Aggregation result + gdf = gpd.GeoDataFrame(geometry=geometry, crs=crs) + results = exactextract.exact_extract(rast=data, vec=gdf, ops=stats, output="pandas") + # Get all the dimensions execpt x_coords, y_coords, they will be used to stack the + # dataarray later + if original_is_ds is True: + # Get the original dataset information to use for unstacking the resulte later + coords_info = {name: geometry} + original_shape = [len(geometry)] + for dim in arr_dims: + original_shape.append(acc._obj[dim].size) + if dim != "variable": + coords_info[dim] = acc._obj[dim].values + else: + # Get the original dataarray information to use for unstacking the resulte later + coords_info = {name: geometry} + original_shape = [len(geometry)] + for dim in arr_dims: + original_shape.append(acc._obj[dim].size) + coords_info[dim] = acc._obj[dim].values + return results, original_shape, coords_info, locs From 09919e4abbb5c1e17210d136ca7679413f345b12 Mon Sep 17 00:00:00 2001 From: masawdah Date: Sat, 29 Jun 2024 14:30:11 +0200 Subject: [PATCH 15/16] args types --- xvec/zonal.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xvec/zonal.py b/xvec/zonal.py index d8ed2ca..7538504 100644 --- a/xvec/zonal.py +++ b/xvec/zonal.py @@ -341,7 +341,7 @@ def _zonal_stats_exactextract( # Unstack the results agg = {} if pd.api.types.is_list_like(stats): - for stat in stats: + for stat in stats: # type: ignore if not isinstance(stat, str): raise ValueError(f"{stat} is not a valid aggregation.") @@ -405,8 +405,8 @@ def _agg_exactextract( acc, geometry, crs, - x_coords: str | None = None, - y_coords: str | None = None, + x_coords: Hashable, + y_coords: Hashable, stats: str | Callable | Iterable[str | Callable | tuple] = "mean", name: str = "geometry", original_is_ds: bool = False, From 71767ce21f03aeefa0f682325ae671260f2f4515 Mon Sep 17 00:00:00 2001 From: masawdah Date: Tue, 2 Jul 2024 15:38:44 +0200 Subject: [PATCH 16/16] pass attrs --- doc/source/zonal_stats.ipynb | 24 +++++++++++++++--------- xvec/zonal.py | 2 ++ 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/doc/source/zonal_stats.ipynb b/doc/source/zonal_stats.ipynb index 4f21f22..12d77b0 100644 --- a/doc/source/zonal_stats.ipynb +++ b/doc/source/zonal_stats.ipynb @@ -4707,7 +4707,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -5087,7 +5087,10 @@ " u (geometry, month, level) float64 6kB 2.342 1.357 ... 2.395 0.8823\n", " v (geometry, month, level) float64 6kB 0.4431 0.211 ... 1.116 0.6409\n", "Indexes:\n", - " geometry GeometryIndex (crs=EPSG:4326)
  • Conventions :
    CF-1.0
    Info :
    Monthly ERA-Interim data. Downloaded and edited by fabien.maussion@uibk.ac.at
  • " ], "text/plain": [ " Size: 19kB\n", @@ -5358,10 +5361,13 @@ " u (geometry, month, level) float64 6kB 2.342 1.357 ... 2.395 0.8823\n", " v (geometry, month, level) float64 6kB 0.4431 0.211 ... 1.116 0.6409\n", "Indexes:\n", - " geometry GeometryIndex (crs=EPSG:4326)" + " geometry GeometryIndex (crs=EPSG:4326)\n", + "Attributes:\n", + " Conventions: CF-1.0\n", + " Info: Monthly ERA-Interim data. Downloaded and edited by fabien.m..." ] }, - "execution_count": 6, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } diff --git a/xvec/zonal.py b/xvec/zonal.py index 7538504..060eff1 100644 --- a/xvec/zonal.py +++ b/xvec/zonal.py @@ -395,6 +395,8 @@ def _zonal_stats_exactextract( else: raise ValueError(f"{stats} is not a valid aggregation.") + vec_cube.attrs = acc._obj.attrs + if original_is_ds: acc._obj = original_ds