Skip to content

Commit 66708b1

Browse files
committed
re #5 adding docstrings
1 parent a71c00f commit 66708b1

File tree

1 file changed

+147
-0
lines changed

1 file changed

+147
-0
lines changed

cut_protected_areas_by_county.py

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,25 @@
1+
"""Cut public lands polygons by county boundaries.
2+
3+
This script prepares two vector layers (public lands and counties) and produces a
4+
new vector layer where public lands geometries are intersected with county
5+
polygons. Public lands features fully contained by a county remain unchanged,
6+
while features that cross county boundaries are split into multiple features.
7+
8+
Processing is organized as a task graph with intermediate artifacts written to
9+
`INTERMEDIATE_DIR`. The final cut dataset is written to `OUT_DIR`.
10+
11+
Typical pipeline:
12+
1. Fix invalid geometries in both layers.
13+
2. Reproject counties to the public lands CRS (work CRS).
14+
3. Simplify public lands geometries.
15+
4. Drop null and zero-area geometries.
16+
5. Spatially prefilter intersecting features (sjoin) and cut via overlay
17+
intersection.
18+
19+
Outputs are written as GeoPackage layers when the destination path ends in
20+
`.gpkg`; otherwise, the driver is inferred by GeoPandas.
21+
"""
22+
123
from pathlib import Path
224
import logging
325

@@ -26,14 +48,46 @@
2648

2749

2850
def _layer_from_path(path: str) -> str:
51+
"""Derive a default layer name from a filesystem path.
52+
53+
For a path like `/some/dir/foo.gpkg`, this returns `foo`.
54+
55+
Args:
56+
path: Path to a dataset file.
57+
58+
Returns:
59+
The filename stem (final path component without extension).
60+
"""
2961
return Path(path).stem
3062

3163

3264
def _read(path: str) -> gpd.GeoDataFrame:
65+
"""Read a vector dataset into a GeoDataFrame.
66+
67+
Args:
68+
path: Path to a vector dataset readable by GeoPandas.
69+
70+
Returns:
71+
A GeoDataFrame containing the dataset's features and geometry column.
72+
"""
3373
return gpd.read_file(path)
3474

3575

3676
def _write(gdf: gpd.GeoDataFrame, path: str, layer: str | None = None) -> None:
77+
"""Write a GeoDataFrame to disk.
78+
79+
If `path` ends with `.gpkg`, the output is written as a GeoPackage and the
80+
layer name is set to `layer` or (if unset) the stem of `path`. Otherwise,
81+
GeoPandas infers the driver from the file extension.
82+
83+
Args:
84+
gdf: GeoDataFrame to write.
85+
path: Destination path.
86+
layer: Optional layer name (used for GeoPackage outputs only).
87+
88+
Returns:
89+
None
90+
"""
3791
p = Path(path)
3892
p.parent.mkdir(parents=True, exist_ok=True)
3993
if p.suffix.lower() == ".gpkg":
@@ -43,12 +97,38 @@ def _write(gdf: gpd.GeoDataFrame, path: str, layer: str | None = None) -> None:
4397

4498

4599
def fix_geometries(in_path: str, out_path: str, layer: str) -> None:
100+
"""Fix invalid geometries and write the result to a new dataset.
101+
102+
This uses `shapely.make_valid` to repair invalid geometries (e.g. self-
103+
intersections). The operation is applied to every feature geometry and the
104+
result is written to `out_path`.
105+
106+
Args:
107+
in_path: Input vector dataset path.
108+
out_path: Output vector dataset path.
109+
layer: Output layer name (for GeoPackage outputs).
110+
111+
Returns:
112+
None
113+
"""
46114
gdf = _read(in_path)
47115
gdf.geometry = make_valid(gdf.geometry)
48116
_write(gdf, out_path, layer=layer)
49117

50118

51119
def reproject(in_path: str, out_path: str, target_crs, layer: str) -> None:
120+
"""Reproject a vector dataset to a target CRS and write the result.
121+
122+
Args:
123+
in_path: Input vector dataset path.
124+
out_path: Output vector dataset path.
125+
target_crs: CRS passed to GeoPandas `to_crs` (e.g. EPSG int/string or
126+
CRS object).
127+
layer: Output layer name (for GeoPackage outputs).
128+
129+
Returns:
130+
None
131+
"""
52132
gdf = _read(in_path)
53133
gdf = gdf.to_crs(target_crs)
54134
_write(gdf, out_path, layer=layer)
@@ -57,12 +137,38 @@ def reproject(in_path: str, out_path: str, target_crs, layer: str) -> None:
57137
def simplify(
58138
in_path: str, out_path: str, tolerance: float, layer: str | None = None
59139
) -> None:
140+
"""Simplify geometries and write the result.
141+
142+
Uses Shapely geometry simplification with `preserve_topology=True`.
143+
144+
Args:
145+
in_path: Input vector dataset path.
146+
out_path: Output vector dataset path.
147+
tolerance: Simplification tolerance in the dataset's coordinate units.
148+
layer: Optional output layer name (for GeoPackage outputs).
149+
150+
Returns:
151+
None
152+
"""
60153
gdf = _read(in_path)
61154
gdf.geometry = gdf.geometry.simplify(tolerance, preserve_topology=True)
62155
_write(gdf, out_path, layer=layer)
63156

64157

65158
def drop_zero_area(in_path: str, out_path: str, layer: str | None = None) -> None:
159+
"""Drop null and zero-area geometries and write the result.
160+
161+
Note that `.area` is computed in the layer CRS; for meaningful area filtering
162+
this should be a projected CRS.
163+
164+
Args:
165+
in_path: Input vector dataset path.
166+
out_path: Output vector dataset path.
167+
layer: Optional output layer name (for GeoPackage outputs).
168+
169+
Returns:
170+
None
171+
"""
66172
gdf = _read(in_path)
67173
gdf = gdf[gdf.geometry.notna() & (gdf.geometry.area > 0)].copy()
68174
_write(gdf, out_path, layer=layer)
@@ -75,6 +181,32 @@ def cut_public_lands_by_counties(
75181
out_layer: str = "public_lands_cut",
76182
county_keep_cols: list[str] | None = None,
77183
) -> None:
184+
"""Cut public lands polygons by county boundaries.
185+
186+
The cut operation is implemented as an overlay intersection between public
187+
lands and county polygons. This yields:
188+
- One output feature for each public-lands portion within a county.
189+
- Public-lands features fully contained by a county remain geometrically
190+
unchanged (aside from possible vertex ordering).
191+
- Public-lands features that cross county boundaries are split into
192+
multiple output features, one per intersected county.
193+
194+
Prior to overlay, a spatial join is used to restrict processing to
195+
intersecting features only.
196+
197+
Args:
198+
public_lands_path: Path to the prepared public lands dataset.
199+
counties_path: Path to the prepared counties dataset (must be in the same
200+
CRS as `public_lands_path`).
201+
out_path: Destination path for the cut output dataset.
202+
out_layer: Layer name for GeoPackage outputs.
203+
county_keep_cols: Optional list of county columns to retain in the output
204+
(in addition to geometry). Columns not present in `counties` are
205+
ignored. If omitted, only county geometry is used.
206+
207+
Returns:
208+
None
209+
"""
78210
public_lands = _read(public_lands_path).reset_index(drop=True)
79211
counties = _read(counties_path).reset_index(drop=True)
80212

@@ -111,6 +243,21 @@ def cut_public_lands_by_counties(
111243

112244

113245
def main():
246+
"""Run the end-to-end cutting workflow using a TaskGraph pipeline.
247+
248+
This function:
249+
- Creates output and intermediate directories.
250+
- Creates a TaskGraph to manage intermediate steps and dependencies.
251+
- Fixes geometries for both layers.
252+
- Reprojects counties to the public lands CRS.
253+
- Simplifies public lands geometries.
254+
- Drops null/zero-area geometries for both layers.
255+
- Cuts public lands by counties via overlay intersection and writes the
256+
final GeoPackage.
257+
258+
Returns:
259+
None
260+
"""
114261
out_dir = Path(OUT_DIR)
115262
out_dir.mkdir(parents=True, exist_ok=True)
116263
intermediate_dir = Path(INTERMEDIATE_DIR)

0 commit comments

Comments
 (0)