From 023020612162164fd2e9d73ac548dc9f5c4765ad Mon Sep 17 00:00:00 2001 From: Richard Levasseur Date: Sat, 4 Apr 2026 17:21:41 -0700 Subject: [PATCH 1/4] feat(zipapp): support RULES_PYTHON_EXTRACT_ROOT env var --- python/private/zipapp/py_zipapp_rule.bzl | 5 ++++ python/private/zipapp/zip_main_template.py | 32 ++++++++++++++++------ 2 files changed, 28 insertions(+), 9 deletions(-) diff --git a/python/private/zipapp/py_zipapp_rule.bzl b/python/private/zipapp/py_zipapp_rule.bzl index a6ab485fc4..f4bb128582 100644 --- a/python/private/zipapp/py_zipapp_rule.bzl +++ b/python/private/zipapp/py_zipapp_rule.bzl @@ -39,6 +39,11 @@ def _create_zipapp_main_py(ctx, py_runtime, py_executable, stage2_bootstrap): "%python_binary_actual%": python_binary_actual_path, "%stage2_bootstrap%": runfiles_root_path(ctx, stage2_bootstrap.short_path), "%workspace_name%": ctx.workspace_name, + "%EXTRACT_DIR%": paths.join( + (ctx.label.repo_name or "_main"), + ctx.label.package, + ctx.label.name, + ), }, ) return zip_main_py diff --git a/python/private/zipapp/zip_main_template.py b/python/private/zipapp/zip_main_template.py index e997110a5c..a92edb859a 100644 --- a/python/private/zipapp/zip_main_template.py +++ b/python/private/zipapp/zip_main_template.py @@ -22,6 +22,7 @@ del sys.path[0] import os +from os.path import join import shutil import subprocess import tempfile @@ -35,6 +36,10 @@ # executable to use. _PYTHON_BINARY_ACTUAL = "%python_binary_actual%" _WORKSPACE_NAME = "%workspace_name%" +# relative path under EXTRACT_ROOT to extract to. +EXTRACT_DIR = "%EXTRACT_DIR%" + +EXTRACT_ROOT = os.environ.get("RULES_PYTHON_EXTRACT_ROOT") def print_verbose(*args, mapping=None, values=None): @@ -182,11 +187,14 @@ def extract_zip(zip_path, dest_dir): # Create the runfiles tree by extracting the zip file def create_runfiles_root(): - temp_dir = tempfile.mkdtemp("", "Bazel.runfiles_") - extract_zip(os.path.dirname(__file__), temp_dir) + if EXTRACT_ROOT: + extract_root = join(EXTRACT_ROOT, EXTRACT_DIR) + else: + extract_root = tempfile.mkdtemp("", "Bazel.runfiles_") + extract_zip(os.path.dirname(__file__), extract_root) # IMPORTANT: Later code does `rm -fr` on dirname(runfiles_root) -- it's # important that deletion code be in sync with this directory structure - return os.path.join(temp_dir, "runfiles") + return os.path.join(extract_root, "runfiles") def execute_file( @@ -223,18 +231,24 @@ def execute_file( # - When running in a zip file, we need to clean up the # workspace after the process finishes so control must return here. try: - subprocess_argv = [python_program, main_filename] + args + subprocess_argv = [python_program] + if not EXTRACT_ROOT: + subprocess_argv.append(f"-XRULES_PYTHON_ZIP_DIR={os.path.dirname(runfiles_root)}") + subprocess_argv.append(main_filename) + subprocess_argv += args print_verbose("subprocess argv:", values=subprocess_argv) print_verbose("subprocess env:", mapping=env) print_verbose("subprocess cwd:", workspace) ret_code = subprocess.call(subprocess_argv, env=env, cwd=workspace) sys.exit(ret_code) finally: - # NOTE: dirname() is called because create_runfiles_root() creates a - # sub-directory within a temporary directory, and we want to remove the - # whole temporary directory. - ##shutil.rmtree(os.path.dirname(runfiles_root), True) - pass + if not EXTRACT_ROOT: + # NOTE: dirname() is called because create_runfiles_root() creates a + # sub-directory within a temporary directory, and we want to remove the + # whole temporary directory. + extract_root = os.path.dirname(runfiles_root) + print_verbose("cleanup: rmtree: ", extract_root) + shutil.rmtree(extract_root, True) def main(): From 289bea72d5273ccdf72cf35cb465cecfe063a55a Mon Sep 17 00:00:00 2001 From: Richard Levasseur Date: Sat, 4 Apr 2026 17:25:04 -0700 Subject: [PATCH 2/4] add test --- tests/py_zipapp/BUILD.bazel | 11 +++++++++++ tests/py_zipapp/extract_root_test.sh | 21 +++++++++++++++++++++ 2 files changed, 32 insertions(+) create mode 100755 tests/py_zipapp/extract_root_test.sh diff --git a/tests/py_zipapp/BUILD.bazel b/tests/py_zipapp/BUILD.bazel index 708d322f41..616a57d54d 100644 --- a/tests/py_zipapp/BUILD.bazel +++ b/tests/py_zipapp/BUILD.bazel @@ -98,6 +98,17 @@ sh_test( toolchains = ["//python:current_py_toolchain"], ) +sh_test( + name = "extract_root_test", + srcs = ["extract_root_test.sh"], + data = [ + ":system_python_zipapp", + ], + env = { + "ZIPAPP": "$(rootpath :system_python_zipapp)", + }, +) + py_library( name = "some_dep", srcs = ["some_dep.py"], diff --git a/tests/py_zipapp/extract_root_test.sh b/tests/py_zipapp/extract_root_test.sh new file mode 100755 index 0000000000..1aee2b9caa --- /dev/null +++ b/tests/py_zipapp/extract_root_test.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +# Verify that the RULES_PYTHON_EXTRACT_ROOT env variable is respected. + +set -euo pipefail + +export RULES_PYTHON_EXTRACT_ROOT="${TEST_TMPDIR:-/tmp}/extract_root_test" + +echo "Running zipapp the first time..." +"$ZIPAPP" + +# Verify that the directory was created +if [[ ! -d "$RULES_PYTHON_EXTRACT_ROOT" ]]; then + echo "Error: Extract root directory $RULES_PYTHON_EXTRACT_ROOT was not created!" + exit 1 +fi + +# Run a second time to ensure it can re-extract successfully. +echo "Running zipapp the second time..." +"$ZIPAPP" + +echo "Success!" From 342a6238f7afc1d10516627e967cd213dc554c3d Mon Sep 17 00:00:00 2001 From: Richard Levasseur Date: Sat, 4 Apr 2026 17:35:45 -0700 Subject: [PATCH 3/4] cleanup test --- python/private/zipapp/zip_main_template.py | 15 ++++++++++--- tests/py_zipapp/BUILD.bazel | 11 ---------- tests/py_zipapp/extract_root_test.sh | 21 ------------------- ...m_python_zipapp_external_bootstrap_test.sh | 15 +++++++++++++ 4 files changed, 27 insertions(+), 35 deletions(-) delete mode 100755 tests/py_zipapp/extract_root_test.sh diff --git a/python/private/zipapp/zip_main_template.py b/python/private/zipapp/zip_main_template.py index a92edb859a..aa832aa855 100644 --- a/python/private/zipapp/zip_main_template.py +++ b/python/private/zipapp/zip_main_template.py @@ -24,6 +24,7 @@ import os from os.path import join import shutil +import stat import subprocess import tempfile import zipfile @@ -166,10 +167,18 @@ def extract_zip(zip_path, dest_dir): dest_dir = get_windows_path_with_unc_prefix(dest_dir) with zipfile.ZipFile(zip_path) as zf: for info in zf.infolist(): - zf.extract(info, dest_dir) - # UNC-prefixed paths must be absolute/normalized. See - # https://docs.microsoft.com/en-us/windows/desktop/fileio/naming-a-file#maximum-path-length-limitation file_path = os.path.abspath(os.path.join(dest_dir, info.filename)) + # If the file exists, it might be a symlink or read-only file from a previous extraction. + # Unlink it first so zipfile.extract doesn't corrupt the symlink target or fail on read-only files. + if os.path.lexists(file_path) and not os.path.isdir(file_path): + try: + os.unlink(file_path) + except OSError: + # On Windows, unlinking a read-only file fails. + os.chmod(file_path, stat.S_IWRITE) + os.unlink(file_path) + + zf.extract(info, dest_dir) # The Unix st_mode bits (see "man 7 inode") are stored in the upper 16 # bits of external_attr. attrs = info.external_attr >> 16 diff --git a/tests/py_zipapp/BUILD.bazel b/tests/py_zipapp/BUILD.bazel index 616a57d54d..708d322f41 100644 --- a/tests/py_zipapp/BUILD.bazel +++ b/tests/py_zipapp/BUILD.bazel @@ -98,17 +98,6 @@ sh_test( toolchains = ["//python:current_py_toolchain"], ) -sh_test( - name = "extract_root_test", - srcs = ["extract_root_test.sh"], - data = [ - ":system_python_zipapp", - ], - env = { - "ZIPAPP": "$(rootpath :system_python_zipapp)", - }, -) - py_library( name = "some_dep", srcs = ["some_dep.py"], diff --git a/tests/py_zipapp/extract_root_test.sh b/tests/py_zipapp/extract_root_test.sh deleted file mode 100755 index 1aee2b9caa..0000000000 --- a/tests/py_zipapp/extract_root_test.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/usr/bin/env bash -# Verify that the RULES_PYTHON_EXTRACT_ROOT env variable is respected. - -set -euo pipefail - -export RULES_PYTHON_EXTRACT_ROOT="${TEST_TMPDIR:-/tmp}/extract_root_test" - -echo "Running zipapp the first time..." -"$ZIPAPP" - -# Verify that the directory was created -if [[ ! -d "$RULES_PYTHON_EXTRACT_ROOT" ]]; then - echo "Error: Extract root directory $RULES_PYTHON_EXTRACT_ROOT was not created!" - exit 1 -fi - -# Run a second time to ensure it can re-extract successfully. -echo "Running zipapp the second time..." -"$ZIPAPP" - -echo "Success!" diff --git a/tests/py_zipapp/system_python_zipapp_external_bootstrap_test.sh b/tests/py_zipapp/system_python_zipapp_external_bootstrap_test.sh index 21c6741197..bb4ba640d3 100755 --- a/tests/py_zipapp/system_python_zipapp_external_bootstrap_test.sh +++ b/tests/py_zipapp/system_python_zipapp_external_bootstrap_test.sh @@ -13,6 +13,21 @@ fi ZIPAPP="${ZIPAPP/.exe/.zip}" export RULES_PYTHON_BOOTSTRAP_VERBOSE=1 + # We're testing the invocation of `__main__.py`, so we have to # manually pass the zipapp to python. +echo "Running zipapp using an automatic temp directory..." +"$PYTHON" "$ZIPAPP" + +echo "Running zipapp with extract root set..." +export RULES_PYTHON_EXTRACT_ROOT="${TEST_TMPDIR:-/tmp}/extract_root_test" +"$PYTHON" "$ZIPAPP" + +# Verify that the directory was created +if [[ ! -d "$RULES_PYTHON_EXTRACT_ROOT" ]]; then + echo "Error: Extract root directory $RULES_PYTHON_EXTRACT_ROOT was not created!" + exit 1 +fi + +echo "Running zipapp with extract root set a second time..." "$PYTHON" "$ZIPAPP" From bc1856568c7a10585b2655225c84ac4480f9dafb Mon Sep 17 00:00:00 2001 From: Richard Levasseur Date: Sat, 4 Apr 2026 17:36:42 -0700 Subject: [PATCH 4/4] cleanup --- python/private/zipapp/py_zipapp_rule.bzl | 8 +++---- python/private/zipapp/zip_main_template.py | 26 +++++++++++----------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/python/private/zipapp/py_zipapp_rule.bzl b/python/private/zipapp/py_zipapp_rule.bzl index f4bb128582..ac7944726f 100644 --- a/python/private/zipapp/py_zipapp_rule.bzl +++ b/python/private/zipapp/py_zipapp_rule.bzl @@ -35,15 +35,15 @@ def _create_zipapp_main_py(ctx, py_runtime, py_executable, stage2_bootstrap): template = py_runtime.zip_main_template, output = zip_main_py, substitutions = { - "%python_binary%": venv_python_exe_path, - "%python_binary_actual%": python_binary_actual_path, - "%stage2_bootstrap%": runfiles_root_path(ctx, stage2_bootstrap.short_path), - "%workspace_name%": ctx.workspace_name, "%EXTRACT_DIR%": paths.join( (ctx.label.repo_name or "_main"), ctx.label.package, ctx.label.name, ), + "%python_binary%": venv_python_exe_path, + "%python_binary_actual%": python_binary_actual_path, + "%stage2_bootstrap%": runfiles_root_path(ctx, stage2_bootstrap.short_path), + "%workspace_name%": ctx.workspace_name, }, ) return zip_main_py diff --git a/python/private/zipapp/zip_main_template.py b/python/private/zipapp/zip_main_template.py index aa832aa855..3c25d1d722 100644 --- a/python/private/zipapp/zip_main_template.py +++ b/python/private/zipapp/zip_main_template.py @@ -22,12 +22,12 @@ del sys.path[0] import os -from os.path import join import shutil import stat import subprocess import tempfile import zipfile +from os.path import dirname, join # runfiles-root-relative path _STAGE2_BOOTSTRAP = "%stage2_bootstrap%" @@ -124,7 +124,7 @@ def search_path(name): search_path = os.getenv("PATH", os.defpath).split(os.pathsep) for directory in search_path: if directory: - path = os.path.join(directory, name) + path = join(directory, name) if os.path.isfile(path) and os.access(path, os.X_OK): return path return None @@ -145,7 +145,7 @@ def find_binary(runfiles_root, bin_name): # Use normpath() to convert slashes to os.sep on Windows. elif os.sep in os.path.normpath(bin_name): # Case 3: Path is relative to the repo root. - return os.path.join(runfiles_root, bin_name) + return join(runfiles_root, bin_name) else: # Case 4: Path has to be looked up in the search path. return search_path(bin_name) @@ -167,7 +167,7 @@ def extract_zip(zip_path, dest_dir): dest_dir = get_windows_path_with_unc_prefix(dest_dir) with zipfile.ZipFile(zip_path) as zf: for info in zf.infolist(): - file_path = os.path.abspath(os.path.join(dest_dir, info.filename)) + file_path = os.path.abspath(join(dest_dir, info.filename)) # If the file exists, it might be a symlink or read-only file from a previous extraction. # Unlink it first so zipfile.extract doesn't corrupt the symlink target or fail on read-only files. if os.path.lexists(file_path) and not os.path.isdir(file_path): @@ -177,7 +177,7 @@ def extract_zip(zip_path, dest_dir): # On Windows, unlinking a read-only file fails. os.chmod(file_path, stat.S_IWRITE) os.unlink(file_path) - + zf.extract(info, dest_dir) # The Unix st_mode bits (see "man 7 inode") are stored in the upper 16 # bits of external_attr. @@ -200,10 +200,10 @@ def create_runfiles_root(): extract_root = join(EXTRACT_ROOT, EXTRACT_DIR) else: extract_root = tempfile.mkdtemp("", "Bazel.runfiles_") - extract_zip(os.path.dirname(__file__), extract_root) + extract_zip(dirname(__file__), extract_root) # IMPORTANT: Later code does `rm -fr` on dirname(runfiles_root) -- it's # important that deletion code be in sync with this directory structure - return os.path.join(extract_root, "runfiles") + return join(extract_root, "runfiles") def execute_file( @@ -242,7 +242,7 @@ def execute_file( try: subprocess_argv = [python_program] if not EXTRACT_ROOT: - subprocess_argv.append(f"-XRULES_PYTHON_ZIP_DIR={os.path.dirname(runfiles_root)}") + subprocess_argv.append(f"-XRULES_PYTHON_ZIP_DIR={dirname(runfiles_root)}") subprocess_argv.append(main_filename) subprocess_argv += args print_verbose("subprocess argv:", values=subprocess_argv) @@ -255,7 +255,7 @@ def execute_file( # NOTE: dirname() is called because create_runfiles_root() creates a # sub-directory within a temporary directory, and we want to remove the # whole temporary directory. - extract_root = os.path.dirname(runfiles_root) + extract_root = dirname(runfiles_root) print_verbose("cleanup: rmtree: ", extract_root) shutil.rmtree(extract_root, True) @@ -289,7 +289,7 @@ def main(): # See: https://docs.python.org/3.11/using/cmdline.html#envvar-PYTHONSAFEPATH new_env["PYTHONSAFEPATH"] = "1" - main_filename = os.path.join(runfiles_root, main_rel_path) + main_filename = join(runfiles_root, main_rel_path) main_filename = get_windows_path_with_unc_prefix(main_filename) assert os.path.exists(main_filename), ( "Cannot exec() %r: file not found." % main_filename @@ -299,7 +299,7 @@ def main(): ) if _PYTHON_BINARY_VENV: - python_program = os.path.join(runfiles_root, _PYTHON_BINARY_VENV) + python_program = join(runfiles_root, _PYTHON_BINARY_VENV) # When a venv is used, the `bin/python3` symlink may need to be created. # This case occurs when "create venv at runtime" or "resolve python at # runtime" modes are enabled. @@ -311,7 +311,7 @@ def main(): "Program's venv binary not under runfiles: {python_program}" ) symlink_to = find_binary(runfiles_root, _PYTHON_BINARY_ACTUAL) - os.makedirs(os.path.dirname(python_program), exist_ok=True) + os.makedirs(dirname(python_program), exist_ok=True) try: os.symlink(symlink_to, python_program) except OSError as e: @@ -340,7 +340,7 @@ def main(): # change directory to the right runfiles directory. # (So that the data files are accessible) if os.environ.get("RUN_UNDER_RUNFILES") == "1": - workspace = os.path.join(runfiles_root, _WORKSPACE_NAME) + workspace = join(runfiles_root, _WORKSPACE_NAME) sys.stdout.flush() execute_file(