From e39c721c593657f8dfb2b0cb406e9533f1381769 Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Sun, 15 Mar 2026 14:32:45 +0100 Subject: [PATCH] [v3-1-test] Warn instead of failing on missing 3rd-party doc inventories (#63630) * Warn instead of failing on missing 3rd-party doc inventories Third-party Sphinx intersphinx inventories (e.g., Pandas) are sometimes temporarily unavailable. Previously, any download failure terminated the entire doc build. Now missing 3rd-party inventories produce warnings and fall back to cached versions when available. A marker file is written for CI to detect missing inventories and send Slack notifications on canary builds. Publishing workflows fail by default but can opt out. - Add --fail-on-missing-third-party-inventories flag (default: off) - Add --clean-inventory-cache flag (--clean-build no longer deletes cache) - Cache inventories via stash action in CI and publish workflows - Send Slack warning on canary builds when inventories are missing * Add documentation for inventory cache handling options Document the new --clean-inventory-cache, --fail-on-missing-third-party-inventories, and --ignore-missing-inventories flags in the contributing docs, Breeze developer tasks, and release management docs. * Skip missing third-party inventories in intersphinx mapping When a third-party inventory file doesn't exist in the cache, skip it from the Sphinx intersphinx_mapping instead of referencing a non-existent file. This prevents Sphinx build errors when third-party inventory downloads fail. (cherry picked from commit afda438816b8e8cd43ef105630c8f33da8ec98b5) Co-authored-by: Jarek Potiuk --- .github/workflows/ci-amd-arm.yml | 1 + .github/workflows/ci-image-checks.yml | 40 ++++++++- .../11_documentation_building.rst | 25 +++++- dev/breeze/doc/03_developer_tasks.rst | 13 +++ .../doc/09_release_management_tasks.rst | 4 + dev/breeze/doc/images/output_build-docs.svg | 86 ++++++++++++------- dev/breeze/doc/images/output_build-docs.txt | 2 +- .../output_workflow-run_publish-docs.svg | 22 +++-- .../output_workflow-run_publish-docs.txt | 2 +- .../commands/developer_commands.py | 26 +++++- .../commands/developer_commands_config.py | 7 +- .../commands/workflow_commands.py | 7 ++ .../commands/workflow_commands_config.py | 6 ++ .../airflow_breeze/params/doc_build_params.py | 6 ++ devel-common/src/docs/build_docs.py | 32 +++++-- devel-common/src/docs/utils/conf_constants.py | 31 ++++--- .../docs_build/fetch_inventories.py | 47 +++++++--- 17 files changed, 280 insertions(+), 77 deletions(-) diff --git a/.github/workflows/ci-amd-arm.yml b/.github/workflows/ci-amd-arm.yml index 99dbeaf15320a..0faa6f16b102f 100644 --- a/.github/workflows/ci-amd-arm.yml +++ b/.github/workflows/ci-amd-arm.yml @@ -331,6 +331,7 @@ jobs: secrets: DOCS_AWS_ACCESS_KEY_ID: ${{ secrets.DOCS_AWS_ACCESS_KEY_ID }} DOCS_AWS_SECRET_ACCESS_KEY: ${{ secrets.DOCS_AWS_SECRET_ACCESS_KEY }} + SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} providers: name: "provider distributions tests" diff --git a/.github/workflows/ci-image-checks.yml b/.github/workflows/ci-image-checks.yml index 7d23dce54a7d9..1e6c70e99f53c 100644 --- a/.github/workflows/ci-image-checks.yml +++ b/.github/workflows/ci-image-checks.yml @@ -117,6 +117,8 @@ on: # yamllint disable-line rule:truthy required: true DOCS_AWS_SECRET_ACCESS_KEY: required: true + SLACK_BOT_TOKEN: + required: false permissions: @@ -252,23 +254,55 @@ jobs: uses: apache/infrastructure-actions/stash/restore@1c35b5ccf8fba5d4c3fdf25a045ca91aa0cbc468 with: path: ./generated/_inventory_cache/ - key: cache-docs-inventory-v1-${{ hashFiles('**/pyproject.toml') }} + key: cache-docs-inventory-v1 id: restore-docs-inventory-cache - name: "Building docs with ${{ matrix.flag }} flag" env: DOCS_LIST_AS_STRING: ${{ inputs.docs-list-as-string }} run: > breeze build-docs ${DOCS_LIST_AS_STRING} ${{ matrix.flag }} --refresh-airflow-inventories + - name: "Check for missing third-party inventories" + id: check-missing-inventories + if: always() + shell: bash + run: | + MARKER_FILE="./generated/_inventory_cache/.missing_third_party_inventories" + if [[ -f "${MARKER_FILE}" ]]; then + echo "missing=true" >> "${GITHUB_OUTPUT}" + echo "::warning::Missing third-party inventories:" + cat "${MARKER_FILE}" + echo "packages<> "${GITHUB_OUTPUT}" + cat "${MARKER_FILE}" >> "${GITHUB_OUTPUT}" + echo "EOF" >> "${GITHUB_OUTPUT}" + else + echo "missing=false" >> "${GITHUB_OUTPUT}" + fi + - name: "Notify Slack about missing inventories (canary only)" + if: >- + inputs.canary-run == 'true' && + steps.check-missing-inventories.outputs.missing == 'true' && + matrix.flag == '--docs-only' + uses: slackapi/slack-github-action@91efab103c0de0a537f72a35f6b8cda0ee76bf0a # v2.1.1 + with: + method: chat.postMessage + token: ${{ env.SLACK_BOT_TOKEN }} + # yamllint disable rule:line-length + payload: | + channel: "internal-airflow-ci-cd" + text: "⚠️ Missing 3rd-party doc inventories in canary build on *${{ github.ref_name }}*\n\nPackages:\n${{ steps.check-missing-inventories.outputs.packages }}\n\n" + # yamllint enable rule:line-length + env: + SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} - name: "Save docs inventory cache" uses: apache/infrastructure-actions/stash/save@1c35b5ccf8fba5d4c3fdf25a045ca91aa0cbc468 with: path: ./generated/_inventory_cache/ - key: cache-docs-inventory-v1-${{ hashFiles('**/pyproject.toml') }} + key: cache-docs-inventory-v1 if-no-files-found: 'error' retention-days: '2' # If we upload from multiple matrix jobs we could end up with a race condition. so just pick one job # to be responsible for updating it. https://github.com/actions/upload-artifact/issues/506 - if: steps.restore-docs-inventory-cache != 'true' && matrix.flag == '--docs-only' + if: steps.restore-docs-inventory-cache.outputs.stash-hit != 'true' && matrix.flag == '--docs-only' - name: "Upload build docs" uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: diff --git a/contributing-docs/11_documentation_building.rst b/contributing-docs/11_documentation_building.rst index b7dfe3abf5215..7ae771fd212af 100644 --- a/contributing-docs/11_documentation_building.rst +++ b/contributing-docs/11_documentation_building.rst @@ -239,7 +239,30 @@ For example: breeze build-docs --doc-only --clean fab -Will build ``fab`` provider documentation and clean inventories and other build artifacts before. +Will build ``fab`` provider documentation and clean build artifacts before. + +Inventory cache handling +........................ + +When building documentation, Sphinx downloads intersphinx inventories from external sources (both Airflow +packages hosted on S3 and third-party packages like Pandas, SQLAlchemy, etc.). These inventories enable +cross-references between documentation sets. + +By default, missing third-party inventories produce warnings but do **not** fail the build. This is +because third-party inventory servers can be temporarily unavailable and should not block documentation +builds. If a cached version of the inventory exists, it will be used instead. + +The following flags control inventory behavior: + +- ``--clean-inventory-cache`` — deletes the inventory cache before fetching. Use this when you want + to force a completely fresh download of all inventories. +- ``--clean-build`` — cleans build artifacts (``_build``, ``_doctrees``, ``apis``) but does **not** + delete the inventory cache. This allows rebuilding docs from scratch while preserving cached + inventories. +- ``--refresh-airflow-inventories`` — forces a refresh of only Airflow package inventories, without + cleaning build artifacts or external inventories. +- ``--fail-on-missing-third-party-inventories`` — fails the build if any third-party inventory cannot + be downloaded (useful for publishing workflows where complete cross-references are important). You can also use ``breeze build-docs --help`` to see available options and head to `breeze documentation <../dev/breeze/doc/03_developer_tasks.rst>`__ to learn more about the ``breeze`` diff --git a/dev/breeze/doc/03_developer_tasks.rst b/dev/breeze/doc/03_developer_tasks.rst index b61aa6a17da04..0c5190018033b 100644 --- a/dev/breeze/doc/03_developer_tasks.rst +++ b/dev/breeze/doc/03_developer_tasks.rst @@ -278,6 +278,19 @@ package names and can be used to select more than one package with single filter breeze build-docs --package-filter apache-airflow-providers-* +Inventory cache handling +^^^^^^^^^^^^^^^^^^^^^^^^ + +When building documentation, Sphinx downloads intersphinx inventories to enable cross-references +between documentation sets. By default, missing third-party inventories (e.g., Pandas, SQLAlchemy) +produce warnings but do **not** fail the build — third-party servers can be temporarily unavailable. +If a cached version exists, it will be used with a warning. + +Use ``--clean-inventory-cache`` to force a fresh download of all inventories, or +``--fail-on-missing-third-party-inventories`` to fail the build when any third-party inventory +is missing (useful for publishing). Note that ``--clean-build`` cleans build artifacts but +preserves the inventory cache. + Often errors during documentation generation come from the docstrings of auto-api generated classes. During the docs building auto-api generated files are stored in the ``generated`` folder. This helps you easily identify the location the problems with documentation originated from. diff --git a/dev/breeze/doc/09_release_management_tasks.rst b/dev/breeze/doc/09_release_management_tasks.rst index 3d3108c6df261..372c9978f3e91 100644 --- a/dev/breeze/doc/09_release_management_tasks.rst +++ b/dev/breeze/doc/09_release_management_tasks.rst @@ -933,6 +933,10 @@ These are all available flags of ``workflow-run`` command: ``--site-env`` specifies the environment to use for the site (e.g., auto, live, staging). the default is auto, based on the ref it decides live or staging. ``--refresh-site`` specifies whether to refresh the site after publishing the documentation. This triggers workflow on apache/airflow-site repository to refresh the site. ``--skip-write-to-stable-folder`` specifies the documentation packages to skip writing to the stable folder. +``--ignore-missing-inventories`` when set, the publish workflow will not fail if third-party intersphinx +inventories cannot be downloaded. By default, the publish workflow fails on missing inventories to ensure +complete cross-references in published documentation. Use this flag only when you need to publish despite +temporary third-party inventory outages. These are all available flags of ``workflow-run publish-docs`` command: diff --git a/dev/breeze/doc/images/output_build-docs.svg b/dev/breeze/doc/images/output_build-docs.svg index ee242c749039a..cd1ec59a584aa 100644 --- a/dev/breeze/doc/images/output_build-docs.svg +++ b/dev/breeze/doc/images/output_build-docs.svg @@ -1,4 +1,4 @@ - +