1+ """Cut public lands polygons by county boundaries.
2+
3+ This script prepares two vector layers (public lands and counties) and produces a
4+ new vector layer where public lands geometries are intersected with county
5+ polygons. Public lands features fully contained by a county remain unchanged,
6+ while features that cross county boundaries are split into multiple features.
7+
8+ Processing is organized as a task graph with intermediate artifacts written to
9+ `INTERMEDIATE_DIR`. The final cut dataset is written to `OUT_DIR`.
10+
11+ Typical pipeline:
12+ 1. Fix invalid geometries in both layers.
13+ 2. Reproject counties to the public lands CRS (work CRS).
14+ 3. Simplify public lands geometries.
15+ 4. Drop null and zero-area geometries.
16+ 5. Spatially prefilter intersecting features (sjoin) and cut via overlay
17+ intersection.
18+
19+ Outputs are written as GeoPackage layers when the destination path ends in
20+ `.gpkg`; otherwise, the driver is inferred by GeoPandas.
21+ """
22+
123from pathlib import Path
224import logging
325
2648
2749
2850def _layer_from_path (path : str ) -> str :
51+ """Derive a default layer name from a filesystem path.
52+
53+ For a path like `/some/dir/foo.gpkg`, this returns `foo`.
54+
55+ Args:
56+ path: Path to a dataset file.
57+
58+ Returns:
59+ The filename stem (final path component without extension).
60+ """
2961 return Path (path ).stem
3062
3163
3264def _read (path : str ) -> gpd .GeoDataFrame :
65+ """Read a vector dataset into a GeoDataFrame.
66+
67+ Args:
68+ path: Path to a vector dataset readable by GeoPandas.
69+
70+ Returns:
71+ A GeoDataFrame containing the dataset's features and geometry column.
72+ """
3373 return gpd .read_file (path )
3474
3575
3676def _write (gdf : gpd .GeoDataFrame , path : str , layer : str | None = None ) -> None :
77+ """Write a GeoDataFrame to disk.
78+
79+ If `path` ends with `.gpkg`, the output is written as a GeoPackage and the
80+ layer name is set to `layer` or (if unset) the stem of `path`. Otherwise,
81+ GeoPandas infers the driver from the file extension.
82+
83+ Args:
84+ gdf: GeoDataFrame to write.
85+ path: Destination path.
86+ layer: Optional layer name (used for GeoPackage outputs only).
87+
88+ Returns:
89+ None
90+ """
3791 p = Path (path )
3892 p .parent .mkdir (parents = True , exist_ok = True )
3993 if p .suffix .lower () == ".gpkg" :
@@ -43,12 +97,38 @@ def _write(gdf: gpd.GeoDataFrame, path: str, layer: str | None = None) -> None:
4397
4498
4599def fix_geometries (in_path : str , out_path : str , layer : str ) -> None :
100+ """Fix invalid geometries and write the result to a new dataset.
101+
102+ This uses `shapely.make_valid` to repair invalid geometries (e.g. self-
103+ intersections). The operation is applied to every feature geometry and the
104+ result is written to `out_path`.
105+
106+ Args:
107+ in_path: Input vector dataset path.
108+ out_path: Output vector dataset path.
109+ layer: Output layer name (for GeoPackage outputs).
110+
111+ Returns:
112+ None
113+ """
46114 gdf = _read (in_path )
47115 gdf .geometry = make_valid (gdf .geometry )
48116 _write (gdf , out_path , layer = layer )
49117
50118
51119def reproject (in_path : str , out_path : str , target_crs , layer : str ) -> None :
120+ """Reproject a vector dataset to a target CRS and write the result.
121+
122+ Args:
123+ in_path: Input vector dataset path.
124+ out_path: Output vector dataset path.
125+ target_crs: CRS passed to GeoPandas `to_crs` (e.g. EPSG int/string or
126+ CRS object).
127+ layer: Output layer name (for GeoPackage outputs).
128+
129+ Returns:
130+ None
131+ """
52132 gdf = _read (in_path )
53133 gdf = gdf .to_crs (target_crs )
54134 _write (gdf , out_path , layer = layer )
@@ -57,12 +137,38 @@ def reproject(in_path: str, out_path: str, target_crs, layer: str) -> None:
57137def simplify (
58138 in_path : str , out_path : str , tolerance : float , layer : str | None = None
59139) -> None :
140+ """Simplify geometries and write the result.
141+
142+ Uses Shapely geometry simplification with `preserve_topology=True`.
143+
144+ Args:
145+ in_path: Input vector dataset path.
146+ out_path: Output vector dataset path.
147+ tolerance: Simplification tolerance in the dataset's coordinate units.
148+ layer: Optional output layer name (for GeoPackage outputs).
149+
150+ Returns:
151+ None
152+ """
60153 gdf = _read (in_path )
61154 gdf .geometry = gdf .geometry .simplify (tolerance , preserve_topology = True )
62155 _write (gdf , out_path , layer = layer )
63156
64157
65158def drop_zero_area (in_path : str , out_path : str , layer : str | None = None ) -> None :
159+ """Drop null and zero-area geometries and write the result.
160+
161+ Note that `.area` is computed in the layer CRS; for meaningful area filtering
162+ this should be a projected CRS.
163+
164+ Args:
165+ in_path: Input vector dataset path.
166+ out_path: Output vector dataset path.
167+ layer: Optional output layer name (for GeoPackage outputs).
168+
169+ Returns:
170+ None
171+ """
66172 gdf = _read (in_path )
67173 gdf = gdf [gdf .geometry .notna () & (gdf .geometry .area > 0 )].copy ()
68174 _write (gdf , out_path , layer = layer )
@@ -75,6 +181,32 @@ def cut_public_lands_by_counties(
75181 out_layer : str = "public_lands_cut" ,
76182 county_keep_cols : list [str ] | None = None ,
77183) -> None :
184+ """Cut public lands polygons by county boundaries.
185+
186+ The cut operation is implemented as an overlay intersection between public
187+ lands and county polygons. This yields:
188+ - One output feature for each public-lands portion within a county.
189+ - Public-lands features fully contained by a county remain geometrically
190+ unchanged (aside from possible vertex ordering).
191+ - Public-lands features that cross county boundaries are split into
192+ multiple output features, one per intersected county.
193+
194+ Prior to overlay, a spatial join is used to restrict processing to
195+ intersecting features only.
196+
197+ Args:
198+ public_lands_path: Path to the prepared public lands dataset.
199+ counties_path: Path to the prepared counties dataset (must be in the same
200+ CRS as `public_lands_path`).
201+ out_path: Destination path for the cut output dataset.
202+ out_layer: Layer name for GeoPackage outputs.
203+ county_keep_cols: Optional list of county columns to retain in the output
204+ (in addition to geometry). Columns not present in `counties` are
205+ ignored. If omitted, only county geometry is used.
206+
207+ Returns:
208+ None
209+ """
78210 public_lands = _read (public_lands_path ).reset_index (drop = True )
79211 counties = _read (counties_path ).reset_index (drop = True )
80212
@@ -111,6 +243,21 @@ def cut_public_lands_by_counties(
111243
112244
113245def main ():
246+ """Run the end-to-end cutting workflow using a TaskGraph pipeline.
247+
248+ This function:
249+ - Creates output and intermediate directories.
250+ - Creates a TaskGraph to manage intermediate steps and dependencies.
251+ - Fixes geometries for both layers.
252+ - Reprojects counties to the public lands CRS.
253+ - Simplifies public lands geometries.
254+ - Drops null/zero-area geometries for both layers.
255+ - Cuts public lands by counties via overlay intersection and writes the
256+ final GeoPackage.
257+
258+ Returns:
259+ None
260+ """
114261 out_dir = Path (OUT_DIR )
115262 out_dir .mkdir (parents = True , exist_ok = True )
116263 intermediate_dir = Path (INTERMEDIATE_DIR )
0 commit comments