From 515726aeed0a8e2b3e03ca42a4dc518a69e4c1a3 Mon Sep 17 00:00:00 2001 From: Deepak Kumar Date: Mon, 25 May 2026 10:35:06 -0700 Subject: [PATCH 1/3] UI: Return clear 500 detail from structure_data when asset_expression is malformed The /structure/structure_data endpoint calls get_upstream_assets() to walk the serialized Dag's asset_expression. If the stored expression contains an unknown key or asset type, get_upstream_assets() raises TypeError("Unsupported type: ..."). The exception escaped uncaught and FastAPI returned a generic {"detail": "Internal Server Error"} body with no context about which Dag triggered it, forcing operators to dig through server logs to identify the broken Dag. Wrap the call in try/except TypeError and re-raise as HTTPException(500) with a detail message identifying the Dag id and version. Still a 500 (the underlying data corruption is genuinely server-side, not bad client input), but now with a controlled, debuggable response body. Regression test mocks get_upstream_assets to raise TypeError and asserts the response is 500 with a detail message that includes the Dag id. --- .../core_api/routes/ui/structure.py | 12 +++++++--- .../core_api/routes/ui/test_structure.py | 23 +++++++++++++++++++ 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/airflow-core/src/airflow/api_fastapi/core_api/routes/ui/structure.py b/airflow-core/src/airflow/api_fastapi/core_api/routes/ui/structure.py index 87184788413f3..8759e607f659f 100644 --- a/airflow-core/src/airflow/api_fastapi/core_api/routes/ui/structure.py +++ b/airflow-core/src/airflow/api_fastapi/core_api/routes/ui/structure.py @@ -161,9 +161,15 @@ def structure_data( ) if (asset_expression := serialized_dag.dag_model.asset_expression) and entry_node_ref: - upstream_asset_nodes, upstream_asset_edges = get_upstream_assets( - asset_expression, entry_node_ref["id"] - ) + try: + upstream_asset_nodes, upstream_asset_edges = get_upstream_assets( + asset_expression, entry_node_ref["id"] + ) + except TypeError as e: + raise HTTPException( + status.HTTP_500_INTERNAL_SERVER_ERROR, + f"Malformed asset_expression in Dag {dag_id!r} version {version_number}: {e}", + ) from e data["nodes"] += upstream_asset_nodes data["edges"] += upstream_asset_edges diff --git a/airflow-core/tests/unit/api_fastapi/core_api/routes/ui/test_structure.py b/airflow-core/tests/unit/api_fastapi/core_api/routes/ui/test_structure.py index db436ca3cf93a..0fedb8ffa05a5 100644 --- a/airflow-core/tests/unit/api_fastapi/core_api/routes/ui/test_structure.py +++ b/airflow-core/tests/unit/api_fastapi/core_api/routes/ui/test_structure.py @@ -709,6 +709,29 @@ def test_should_return_404(self, test_client): assert response.status_code == 404 assert response.json()["detail"] == "Dag with id not_existing was not found" + @pytest.mark.usefixtures("make_dags") + def test_should_return_500_on_malformed_asset_expression(self, test_client): + """A TypeError from get_upstream_assets surfaces as a 500 with a clear message. + + Without the try/except wrap, the TypeError propagates uncaught and FastAPI returns a + generic ``{"detail": "Internal Server Error"}`` body with no context about which Dag + triggered it. With the wrap, the response body identifies the Dag and version, which + is what an operator needs to start debugging stored-data corruption. + """ + with mock.patch( + "airflow.api_fastapi.core_api.routes.ui.structure.get_upstream_assets", + side_effect=TypeError("Unsupported type: dict_keys(['weird-op'])"), + ): + response = test_client.get( + "/structure/structure_data", + params={"dag_id": DAG_ID, "external_dependencies": True}, + ) + assert response.status_code == 500 + detail = response.json()["detail"] + assert "Malformed asset_expression" in detail + assert DAG_ID in detail + assert "Unsupported type" in detail + def test_should_return_404_when_dag_version_not_found(self, test_client): response = test_client.get( "/structure/structure_data", params={"dag_id": DAG_ID, "version_number": 999} From 56ff1f7f49657f2743926256e48eae9892ef352a Mon Sep 17 00:00:00 2001 From: Deepak Kumar Date: Tue, 26 May 2026 08:41:19 -0700 Subject: [PATCH 2/3] Use 400 BAD_REQUEST for malformed asset_expression per review feedback Per @jason810496 review feedback on #67489: the malformed asset_expression ultimately originates from user-authored Dag code (via the Task SDK), so the appropriate response is 400 BAD_REQUEST rather than 500 INTERNAL_SERVER_ERROR. - Change status code from 500 to 400 in structure_data. - Add HTTP_400_BAD_REQUEST to create_openapi_http_exception_doc so the OpenAPI spec advertises the new error response. - Update regression test to assert 400 and rename accordingly. Detail message is unchanged per reviewer: "It's fine to add more context". --- .../api_fastapi/core_api/openapi/_private_ui.yaml | 6 ++++++ .../api_fastapi/core_api/routes/ui/structure.py | 9 +++++++-- .../core_api/routes/ui/test_structure.py | 14 ++++++++------ 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/airflow-core/src/airflow/api_fastapi/core_api/openapi/_private_ui.yaml b/airflow-core/src/airflow/api_fastapi/core_api/openapi/_private_ui.yaml index 080f9c4ded360..215652c5ddc64 100644 --- a/airflow-core/src/airflow/api_fastapi/core_api/openapi/_private_ui.yaml +++ b/airflow-core/src/airflow/api_fastapi/core_api/openapi/_private_ui.yaml @@ -991,6 +991,12 @@ paths: application/json: schema: $ref: '#/components/schemas/StructureDataResponse' + '400': + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPExceptionResponse' + description: Bad Request '404': content: application/json: diff --git a/airflow-core/src/airflow/api_fastapi/core_api/routes/ui/structure.py b/airflow-core/src/airflow/api_fastapi/core_api/routes/ui/structure.py index 8759e607f659f..597f44db424bb 100644 --- a/airflow-core/src/airflow/api_fastapi/core_api/routes/ui/structure.py +++ b/airflow-core/src/airflow/api_fastapi/core_api/routes/ui/structure.py @@ -42,7 +42,12 @@ @structure_router.get( "/structure_data", - responses=create_openapi_http_exception_doc([status.HTTP_404_NOT_FOUND]), + responses=create_openapi_http_exception_doc( + [ + status.HTTP_400_BAD_REQUEST, + status.HTTP_404_NOT_FOUND, + ] + ), dependencies=[ Depends(requires_access_dag("GET")), Depends(requires_access_dag("GET", DagAccessEntity.DEPENDENCIES)), @@ -167,7 +172,7 @@ def structure_data( ) except TypeError as e: raise HTTPException( - status.HTTP_500_INTERNAL_SERVER_ERROR, + status.HTTP_400_BAD_REQUEST, f"Malformed asset_expression in Dag {dag_id!r} version {version_number}: {e}", ) from e data["nodes"] += upstream_asset_nodes diff --git a/airflow-core/tests/unit/api_fastapi/core_api/routes/ui/test_structure.py b/airflow-core/tests/unit/api_fastapi/core_api/routes/ui/test_structure.py index 0fedb8ffa05a5..8e504c132c4eb 100644 --- a/airflow-core/tests/unit/api_fastapi/core_api/routes/ui/test_structure.py +++ b/airflow-core/tests/unit/api_fastapi/core_api/routes/ui/test_structure.py @@ -710,13 +710,15 @@ def test_should_return_404(self, test_client): assert response.json()["detail"] == "Dag with id not_existing was not found" @pytest.mark.usefixtures("make_dags") - def test_should_return_500_on_malformed_asset_expression(self, test_client): - """A TypeError from get_upstream_assets surfaces as a 500 with a clear message. + def test_should_return_400_on_malformed_asset_expression(self, test_client): + """A TypeError from get_upstream_assets surfaces as a 400 with a clear message. + The asset_expression ultimately comes from user-authored Dag code (via the Task SDK), + so a malformed expression is bad input that ended up persisted -- not a server fault. Without the try/except wrap, the TypeError propagates uncaught and FastAPI returns a - generic ``{"detail": "Internal Server Error"}`` body with no context about which Dag - triggered it. With the wrap, the response body identifies the Dag and version, which - is what an operator needs to start debugging stored-data corruption. + generic ``{"detail": "Internal Server Error"}`` 500 body with no context about which + Dag triggered it. With the wrap, the response identifies the Dag and version, which + is what a caller needs to fix the upstream Dag definition. """ with mock.patch( "airflow.api_fastapi.core_api.routes.ui.structure.get_upstream_assets", @@ -726,7 +728,7 @@ def test_should_return_500_on_malformed_asset_expression(self, test_client): "/structure/structure_data", params={"dag_id": DAG_ID, "external_dependencies": True}, ) - assert response.status_code == 500 + assert response.status_code == 400 detail = response.json()["detail"] assert "Malformed asset_expression" in detail assert DAG_ID in detail From 6174d6e575d97d8dc0e9cade760eaaf8c71d8ffa Mon Sep 17 00:00:00 2001 From: pierrejeambrun Date: Mon, 1 Jun 2026 15:03:31 +0200 Subject: [PATCH 3/3] Revert uv.lock diff --- uv.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/uv.lock b/uv.lock index b421b2da82370..caf92c6b934d7 100644 --- a/uv.lock +++ b/uv.lock @@ -107,7 +107,7 @@ apache-airflow-providers-zendesk = false apache-airflow-providers-presto = false apache-airflow-providers-airbyte = false apache-airflow-providers-apache-hive = false -swagger-plugin-for-sphinx = { timestamp = "0001-01-01T00:00:00Z", span = "PT8H" } +swagger-plugin-for-sphinx = { timestamp = "0001-01-01T00:00:00Z", span = "PT2H" } apache-airflow-kubernetes-tests = false apache-airflow-providers-grpc = false apache-airflow-providers-apache-druid = false @@ -21167,8 +21167,8 @@ name = "secretstorage" version = "3.5.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "cryptography", marker = "(python_full_version >= '3.14' and sys_platform == 'darwin') or (python_full_version < '3.15' and sys_platform == 'emscripten') or (python_full_version < '3.15' and sys_platform == 'win32') or (platform_machine != 'arm64' and sys_platform == 'darwin') or (sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'win32')" }, - { name = "jeepney", marker = "(python_full_version >= '3.14' and sys_platform == 'darwin') or (python_full_version < '3.15' and sys_platform == 'emscripten') or (python_full_version < '3.15' and sys_platform == 'win32') or (platform_machine != 'arm64' and sys_platform == 'darwin') or (sys_platform != 'darwin' and sys_platform != 'emscripten' and sys_platform != 'win32')" }, + { name = "cryptography", marker = "python_full_version >= '3.14' or platform_machine != 'arm64' or sys_platform != 'darwin'" }, + { name = "jeepney", marker = "python_full_version >= '3.14' or platform_machine != 'arm64' or sys_platform != 'darwin'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/1c/03/e834bcd866f2f8a49a85eaff47340affa3bfa391ee9912a952a1faa68c7b/secretstorage-3.5.0.tar.gz", hash = "sha256:f04b8e4689cbce351744d5537bf6b1329c6fc68f91fa666f60a380edddcd11be", size = 19884, upload-time = "2025-11-23T19:02:53.191Z" } wheels = [