From 498a0eb1c88bbff232a16710b778da5a7bf3424c Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Sun, 29 Jun 2025 12:10:19 +0200 Subject: [PATCH] Speed-up constraints generation Constraints generation was slow because we run them in a loop and we tried to run them all on a single machine - trying to utilize the fact that we only have to build airflow and provider packages once. But those are pretty fast, comparing to constraint generation and it's much better to parallelize the constraint jobs and run them on separatae workers. This will speed up constraint generation delays that will allow building PROD images and kubernetes checks faster. --- .github/workflows/ci-amd.yml | 2 + .github/workflows/ci-arm.yml | 4 +- .github/workflows/finalize-tests.yml | 2 +- .github/workflows/generate-constraints.yml | 82 +++++++++++----------- .github/workflows/prod-image-build.yml | 2 +- 5 files changed, 47 insertions(+), 45 deletions(-) diff --git a/.github/workflows/ci-amd.yml b/.github/workflows/ci-amd.yml index 5a55b9369b779..e7573035b1a68 100644 --- a/.github/workflows/ci-amd.yml +++ b/.github/workflows/ci-amd.yml @@ -274,6 +274,8 @@ jobs: runners: ${{ needs.build-info.outputs.amd-runners }} platform: "linux/amd64" python-versions-list-as-string: ${{ needs.build-info.outputs.python-versions-list-as-string }} + python-versions: ${{ needs.build-info.outputs.python-versions }} + generate-pypi-constraints: "true" # generate no providers constraints only in canary builds - they take quite some time to generate # they are not needed for regular builds, they are only needed to update constraints in canaries generate-no-providers-constraints: ${{ needs.build-info.outputs.canary-run }} diff --git a/.github/workflows/ci-arm.yml b/.github/workflows/ci-arm.yml index bf54881cd1592..21dd5d75205ec 100644 --- a/.github/workflows/ci-arm.yml +++ b/.github/workflows/ci-arm.yml @@ -210,7 +210,6 @@ jobs: contents: read packages: write id-token: write - if: needs.build-info.outputs.canary-run == 'true' with: runners: ${{ needs.build-info.outputs.arm-runners }} platform: "linux/arm64" @@ -237,13 +236,14 @@ jobs: runners: ${{ needs.build-info.outputs.arm-runners }} platform: "linux/arm64" python-versions-list-as-string: ${{ needs.build-info.outputs.python-versions-list-as-string }} + python-versions: ${{ needs.build-info.outputs.python-versions }} + generate-pypi-constraints: "true" # generate no providers constraints only in canary builds - they take quite some time to generate # they are not needed for regular builds, they are only needed to update constraints in canaries generate-no-providers-constraints: ${{ needs.build-info.outputs.canary-run }} debug-resources: ${{ needs.build-info.outputs.debug-resources }} use-uv: ${{ needs.build-info.outputs.use-uv }} - providers: name: "provider distributions tests" uses: ./.github/workflows/test-providers.yml diff --git a/.github/workflows/finalize-tests.yml b/.github/workflows/finalize-tests.yml index fb4b46d9d6256..819220b42a762 100644 --- a/.github/workflows/finalize-tests.yml +++ b/.github/workflows/finalize-tests.yml @@ -118,7 +118,7 @@ jobs: - name: "Download constraints from the constraints generated by build CI image" uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0 with: - name: constraints + pattern: constraints-* path: ./files - name: "Diff in constraints for Python: ${{ inputs.python-versions-list-as-string }}" run: ./scripts/ci/constraints/ci_diff_constraints.sh diff --git a/.github/workflows/generate-constraints.yml b/.github/workflows/generate-constraints.yml index e6548f3240ed8..565700a66ea6a 100644 --- a/.github/workflows/generate-constraints.yml +++ b/.github/workflows/generate-constraints.yml @@ -32,10 +32,18 @@ on: # yamllint disable-line rule:truthy description: "Stringified array of all Python versions to test - separated by spaces." required: true type: string + python-versions: + description: "JSON-formatted array of Python versions to generate constraints for" + required: true + type: string generate-no-providers-constraints: description: "Whether to generate constraints without providers (true/false)" required: true type: string + generate-pypi-constraints: + description: "Whether to generate PyPI constraints (true/false)" + required: true + type: string debug-resources: description: "Whether to run in debug mode (true/false)" required: true @@ -45,19 +53,22 @@ on: # yamllint disable-line rule:truthy required: true type: string jobs: - generate-constraints: + generate-constraints-matrix: permissions: contents: read timeout-minutes: 70 - name: Generate constraints ${{ inputs.python-versions-list-as-string }} + name: Generate constraints for ${{ inputs.python-versions-list-as-string }} runs-on: ${{ fromJSON(inputs.runners) }} + strategy: + matrix: + python-version: ${{ fromJson(inputs.python-versions) }} env: DEBUG_RESOURCES: ${{ inputs.debug-resources }} GITHUB_REPOSITORY: ${{ github.repository }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_USERNAME: ${{ github.actor }} INCLUDE_SUCCESS_OUTPUTS: "true" - PYTHON_VERSIONS: ${{ inputs.python-versions-list-as-string }} + PYTHON_VERSION: ${{ matrix.python-version }} VERBOSE: "true" steps: - name: "Cleanup repo" @@ -67,72 +78,61 @@ jobs: uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: persist-credentials: false - - name: "Prepare and cleanup runner" - run: ./scripts/ci/prepare_and_cleanup_runner.sh - shell: bash - - name: "Install Breeze" - uses: ./.github/actions/breeze - with: - use-uv: ${{ inputs.use-uv }} - id: breeze - - name: "Prepare all CI images: ${{ inputs.python-versions-list-as-string}}" - uses: ./.github/actions/prepare_all_ci_images + - name: "Prepare breeze & CI image: ${{ matrix.python-version }}" + uses: ./.github/actions/prepare_breeze_and_image with: platform: ${{ inputs.platform }} - python-versions-list-as-string: ${{ inputs.python-versions-list-as-string }} - docker-volume-location: "" # TODO(jscheffl): Understand why it fails here and fix it - - name: "Verify all CI images ${{ inputs.python-versions-list-as-string }}" - run: breeze ci-image verify --run-in-parallel + python: ${{ matrix.python-version }} + use-uv: ${{ inputs.use-uv }} - name: "Source constraints" shell: bash run: > - breeze release-management generate-constraints --run-in-parallel + breeze release-management generate-constraints --airflow-constraints-mode constraints-source-providers --answer yes + --python "${PYTHON_VERSION}" - name: "No providers constraints" shell: bash timeout-minutes: 25 run: > - breeze release-management generate-constraints --run-in-parallel - --airflow-constraints-mode constraints-no-providers --answer yes --parallelism 3 - # The no providers constraints are only needed when we want to update constraints (in canary builds) - # They slow down the start of PROD image builds so we want to only run them when needed. + breeze release-management generate-constraints + --airflow-constraints-mode constraints-no-providers --answer yes + --python "${PYTHON_VERSION}" if: inputs.generate-no-providers-constraints == 'true' - name: "Prepare updated provider distributions" - # In case of provider distributions which are not yet released, we build them from sources shell: bash run: > - breeze release-management prepare-provider-distributions --include-not-ready-providers - --distribution-format wheel + breeze release-management prepare-provider-distributions + --include-not-ready-providers --distribution-format wheel + if: inputs.generate-pypi-constraints == 'true' - name: "Prepare airflow distributions" shell: bash run: > breeze release-management prepare-airflow-distributions --distribution-format wheel + if: inputs.generate-pypi-constraints == 'true' - name: "Prepare task-sdk distribution" shell: bash run: > breeze release-management prepare-task-sdk-distributions --distribution-format wheel + if: inputs.generate-pypi-constraints == 'true' - name: "PyPI constraints" shell: bash timeout-minutes: 25 run: | - for PYTHON in $PYTHON_VERSIONS; do - breeze release-management generate-constraints --airflow-constraints-mode constraints \ - --answer yes --python "${PYTHON}" - done - - name: "Dependency upgrade summary" - shell: bash - env: - PYTHON_VERSIONS: ${{ env.PYTHON_VERSIONS }} - run: | - for PYTHON_VERSION in $PYTHON_VERSIONS; do - echo "Summarizing Python $PYTHON_VERSION" - cat "files/constraints-${PYTHON_VERSION}"/*.md >> $GITHUB_STEP_SUMMARY || true - df -H - done + breeze release-management generate-constraints --airflow-constraints-mode constraints \ + --answer yes --python "${PYTHON_VERSION}" + if: inputs.generate-pypi-constraints == 'true' - name: "Upload constraint artifacts" uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 with: - name: constraints - path: ./files/constraints-*/constraints-*.txt + name: constraints-${{ matrix.python-version }} + path: ./files/constraints-${{ matrix.python-version }}/constraints-*.txt retention-days: 7 if-no-files-found: error + - name: "Dependency upgrade summary" + shell: bash + env: + PYTHON_VERSION: ${{ matrix.python-version }} + run: | + echo "Summarizing Python $PYTHON_VERSION" + cat "files/constraints-${PYTHON_VERSION}"/*.md >> $GITHUB_STEP_SUMMARY || true + df -H diff --git a/.github/workflows/prod-image-build.yml b/.github/workflows/prod-image-build.yml index 2ba47bf8c4990..4819ee7be1470 100644 --- a/.github/workflows/prod-image-build.yml +++ b/.github/workflows/prod-image-build.yml @@ -233,7 +233,7 @@ jobs: - name: "Download constraints" uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0 with: - name: constraints + pattern: constraints-* path: ./docker-context-files - name: "Show constraints" run: |