From a5a850094954c4bbc0085b543e63b89c4b468394 Mon Sep 17 00:00:00 2001 From: jross Date: Thu, 25 Jun 2026 11:53:54 -0600 Subject: [PATCH] Use GCS-backed IO manager for durable asset I/O Materializing a downstream asset on its own (combine or geoserver) failed: FileNotFoundError: .../storage//sources/ DagsterExecutionLoadInputError: loading input "src_bor" of "" Dagster+ serverless defaults to the filesystem IO manager backed by the run's ephemeral /tmp, so a source asset's output isn't available to a combine/geoserver step unless both run in the same run. Configure a GCSPickleIOManager (gcs_prefix=dagster-io on the products bucket) so asset outputs persist and downstream assets load their inputs across runs. Co-Authored-By: Claude Opus 4.8 --- orchestration/definitions.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/orchestration/definitions.py b/orchestration/definitions.py index fbe2b4d..5cc5233 100644 --- a/orchestration/definitions.py +++ b/orchestration/definitions.py @@ -2,6 +2,7 @@ import dagster as dg import yaml +from dagster_gcp.gcs import GCSPickleIOManager, GCSResource as DagsterGCSResource from orchestration.resources.die_config import DIEConfigResource from orchestration.resources.gcs import GCSResource @@ -59,5 +60,14 @@ def _build_schedules(products_config: dict) -> list: bucket_name=_products_config.get("gcs_bucket", "dataservices-die-products"), ), "geoserver": GeoServerResource(), + # Persist asset I/O to GCS instead of the serverless run's ephemeral + # /tmp. Without this, materializing a downstream asset (combine / + # geoserver) on its own can't load its source inputs from a prior run + # and fails with FileNotFoundError. + "io_manager": GCSPickleIOManager( + gcs=DagsterGCSResource(), + gcs_bucket=_products_config.get("gcs_bucket", "dataservices-die-products"), + gcs_prefix="dagster-io", + ), }, )