From c55426b351ab8a18f5b77f881356b1924a6ff1d7 Mon Sep 17 00:00:00 2001 From: Kaxil Naik Date: Tue, 10 Mar 2026 18:34:42 +0000 Subject: [PATCH] Enable parallel backfill by eliminating shared state between providers Add --provider and --providers-json flags to extract_parameters.py and extract_connections.py so each backfill run uses an isolated temp providers.json and only scans the target provider. In --provider mode, modules.json is not written (it would be incomplete), so concurrent runs don't clobber each other. The backfill command now creates a TemporaryDirectory with per-version providers.json files instead of patching a shared file. --- dev/breeze/doc/11_registry_tasks.rst | 11 ++ dev/breeze/doc/images/output_registry.svg | 18 ++- dev/breeze/doc/images/output_registry.txt | 2 +- .../doc/images/output_registry_backfill.svg | 38 +++-- .../doc/images/output_registry_backfill.txt | 2 +- .../commands/registry_commands.py | 136 ++++++++-------- dev/breeze/tests/test_registry_backfill.py | 145 ++++++++++-------- dev/registry/extract_connections.py | 34 +++- dev/registry/extract_parameters.py | 93 +++++++---- 9 files changed, 288 insertions(+), 191 deletions(-) diff --git a/dev/breeze/doc/11_registry_tasks.rst b/dev/breeze/doc/11_registry_tasks.rst index 6b01d9065dcd6..9be90b576b3ae 100644 --- a/dev/breeze/doc/11_registry_tasks.rst +++ b/dev/breeze/doc/11_registry_tasks.rst @@ -79,6 +79,17 @@ Example usage: # Backfill a hyphenated provider breeze registry backfill --provider microsoft-azure --version 11.0.0 +Each run uses an isolated temporary ``providers.json``, so different providers +can be backfilled in parallel from separate terminal sessions: + +.. code-block:: bash + + # Terminal 1 + breeze registry backfill --provider amazon --version 9.15.0 --version 9.14.0 + + # Terminal 2 (safe to run simultaneously) + breeze registry backfill --provider google --version 14.0.0 --version 13.0.0 + Output is written to ``registry/src/_data/versions/{provider}/{version}/``: - ``parameters.json`` — operator/sensor/hook parameters diff --git a/dev/breeze/doc/images/output_registry.svg b/dev/breeze/doc/images/output_registry.svg index e4b4f92c4f861..951851010e777 100644 --- a/dev/breeze/doc/images/output_registry.svg +++ b/dev/breeze/doc/images/output_registry.svg @@ -1,4 +1,4 @@ - +