From a07abeae47293d3610a642517ad35710f0687cc1 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 10 Mar 2025 10:50:59 -0700 Subject: [PATCH 1/8] feat: add firejail wrapper --- .../source_declarative_manifest/__init__.py | 2 + .../_sandboxed_run.py | 52 +++++++++++++++++++ pyproject.toml | 1 + 3 files changed, 55 insertions(+) create mode 100644 airbyte_cdk/cli/source_declarative_manifest/_sandboxed_run.py diff --git a/airbyte_cdk/cli/source_declarative_manifest/__init__.py b/airbyte_cdk/cli/source_declarative_manifest/__init__.py index 0ea86fa7b..eb3ff1587 100644 --- a/airbyte_cdk/cli/source_declarative_manifest/__init__.py +++ b/airbyte_cdk/cli/source_declarative_manifest/__init__.py @@ -1,5 +1,7 @@ from airbyte_cdk.cli.source_declarative_manifest._run import run +from airbyte_cdk.cli.source_declarative_manifest._sandboxed_run import sandboxed_run __all__ = [ "run", + "sandboxed_run", ] diff --git a/airbyte_cdk/cli/source_declarative_manifest/_sandboxed_run.py b/airbyte_cdk/cli/source_declarative_manifest/_sandboxed_run.py new file mode 100644 index 000000000..7e1435cf1 --- /dev/null +++ b/airbyte_cdk/cli/source_declarative_manifest/_sandboxed_run.py @@ -0,0 +1,52 @@ +"""Use Firejail (if installed) to run the source in a sandboxed environment.""" + +import os +import subprocess +import sys + +ENV_SANDBOX_MODE = "AIRBYTE_CONNECTOR_SANDBOX_MODE" +ENV_SANDBOX_MODE_FIREJAIL = "FIREJAIL" +ENV_SANDBOX_MODE_AUTO = "AUTO" # Use Firejail if available, otherwise None +ENV_SANDBOX_MODE_NONE = "NONE" + + +def _wrap_in_sandbox(cmd: list[str]) -> list[str]: + """Wrap the given command in Firejail. + This function modifies the command to include Firejail options + and returns the updated command list. + """ + sb_mode = os.getenv(ENV_SANDBOX_MODE, ENV_SANDBOX_MODE_AUTO).upper() + if sb_mode == ENV_SANDBOX_MODE_NONE: + print( + f"WARNING: Sandboxing disabled due to env variable '{ENV_SANDBOX_MODE}'. " + "Running without Firejail." + ) + return cmd + + # Try Firejail + try: + subprocess.run(["firejail", "--version"], check=True, stdout=subprocess.PIPE) + except FileNotFoundError: + if sb_mode == ENV_SANDBOX_MODE_FIREJAIL: + raise RuntimeError( + "Firejail not found. Set AIRBYTE_CONNECTOR_SANDBOX_MODE=NONE to disable sandboxing." + ) + print("Firejail not found. Running without sandboxing.") + return cmd + + # Firejail is available + return ["firejail", "--private", "--net=none"] + cmd + + +def sandboxed_run(): + executable = "source-declarative-manifest" # Assume base SDM entrypoint is installed and on PATH + + full_cmd = _wrap_in_sandbox([executable] + sys.argv[1:]) # Preserve CLI arguments and wrap in Firejail + print(f"Running command: {' '.join(full_cmd)}") + + # Execute the wrapped SDM command + os.execvp(full_cmd[0], full_cmd) + + +if __name__ == "__main__": + sandboxed_run() diff --git a/pyproject.toml b/pyproject.toml index 7f8201692..25e7ac091 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -111,6 +111,7 @@ sql = ["sqlalchemy"] [tool.poetry.scripts] source-declarative-manifest = "airbyte_cdk.cli.source_declarative_manifest:run" +sandboxed-source-declarative-manifest = "airbyte_cdk.cli.source_declarative_manifest:sandboxed_run" [tool.isort] skip = ["__init__.py"] # TODO: Remove after this is fixed: https://github.com/airbytehq/airbyte-python-cdk/issues/12 From 8fd0c208fee3a0c95795c5a6a02db4a09afe8fb9 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 10 Mar 2025 10:51:08 -0700 Subject: [PATCH 2/8] feat: install firejail in docker image --- Dockerfile | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Dockerfile b/Dockerfile index d2bce875d..7da20db6e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,6 +9,12 @@ FROM docker.io/airbyte/python-connector-base:4.0.0@sha256:d9894b6895923b379f3006 WORKDIR /airbyte/integration_code +# Install Firejail +RUN apt-get update && \ + apt-get install -y firejail && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + # Copy project files needed for build COPY pyproject.toml poetry.lock README.md ./ COPY dist/*.whl ./dist/ From f6788e98f921e3c571721737a6e835ae0c4a32ea Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 10 Mar 2025 12:06:46 -0700 Subject: [PATCH 3/8] add logging for clarity --- airbyte_cdk/cli/source_declarative_manifest/_sandboxed_run.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/airbyte_cdk/cli/source_declarative_manifest/_sandboxed_run.py b/airbyte_cdk/cli/source_declarative_manifest/_sandboxed_run.py index 7e1435cf1..03e607ae5 100644 --- a/airbyte_cdk/cli/source_declarative_manifest/_sandboxed_run.py +++ b/airbyte_cdk/cli/source_declarative_manifest/_sandboxed_run.py @@ -25,6 +25,7 @@ def _wrap_in_sandbox(cmd: list[str]) -> list[str]: # Try Firejail try: + print("Checking for Firejail...") subprocess.run(["firejail", "--version"], check=True, stdout=subprocess.PIPE) except FileNotFoundError: if sb_mode == ENV_SANDBOX_MODE_FIREJAIL: @@ -35,6 +36,7 @@ def _wrap_in_sandbox(cmd: list[str]) -> list[str]: return cmd # Firejail is available + print("Firejail found. Running with Firejail sandboxing.") return ["firejail", "--private", "--net=none"] + cmd From 5074b4bf8b7c1ad1bd0ad9643112b423728bb760 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 10 Mar 2025 12:17:23 -0700 Subject: [PATCH 4/8] print usage info for CLI --- .../_sandboxed_run.py | 46 +++++++++++++++++-- 1 file changed, 43 insertions(+), 3 deletions(-) diff --git a/airbyte_cdk/cli/source_declarative_manifest/_sandboxed_run.py b/airbyte_cdk/cli/source_declarative_manifest/_sandboxed_run.py index 03e607ae5..724c8f4fd 100644 --- a/airbyte_cdk/cli/source_declarative_manifest/_sandboxed_run.py +++ b/airbyte_cdk/cli/source_declarative_manifest/_sandboxed_run.py @@ -9,6 +9,21 @@ ENV_SANDBOX_MODE_AUTO = "AUTO" # Use Firejail if available, otherwise None ENV_SANDBOX_MODE_NONE = "NONE" +USAGE = f""" +Sandboxed execution of the source-declarative-manifest connector. + +Usage: source-declarative-manifest-sandboxed [OPTIONS] [CMD] + +Options: + --help Show this help message and exit. + --check-sandbox Check Firejail availability and exit. + +CMD: + The command to run in the sandboxed environment. This should be the command + that would normally be run to start the connector. E.g. "check", "read", etc. + + The command is ignored if specifying --check-sandbox or --help. +""" def _wrap_in_sandbox(cmd: list[str]) -> list[str]: """Wrap the given command in Firejail. @@ -18,19 +33,20 @@ def _wrap_in_sandbox(cmd: list[str]) -> list[str]: sb_mode = os.getenv(ENV_SANDBOX_MODE, ENV_SANDBOX_MODE_AUTO).upper() if sb_mode == ENV_SANDBOX_MODE_NONE: print( - f"WARNING: Sandboxing disabled due to env variable '{ENV_SANDBOX_MODE}'. " + f"WARNING: Sandboxing disabled because env var '{ENV_SANDBOX_MODE}={sb_mode}'. " "Running without Firejail." ) return cmd # Try Firejail try: - print("Checking for Firejail...") subprocess.run(["firejail", "--version"], check=True, stdout=subprocess.PIPE) except FileNotFoundError: if sb_mode == ENV_SANDBOX_MODE_FIREJAIL: raise RuntimeError( - "Firejail not found. Set AIRBYTE_CONNECTOR_SANDBOX_MODE=NONE to disable sandboxing." + f"Firejail required by env var value `{ENV_SANDBOX_MODE}={sb_mode}` but not found. " + f"Set {ENV_SANDBOX_MODE} to 'NONE' to disable sandboxing or 'AUTO' to " + "only use Firejail when available." ) print("Firejail not found. Running without sandboxing.") return cmd @@ -41,6 +57,18 @@ def _wrap_in_sandbox(cmd: list[str]) -> list[str]: def sandboxed_run(): + if len(sys.argv) == 1: + _print_help() + return + + if sys.argv[1] == "--help": + _print_help() + return + + if sys.argv[1] == "--check-sandbox": + _print_sandbox_check() + return + executable = "source-declarative-manifest" # Assume base SDM entrypoint is installed and on PATH full_cmd = _wrap_in_sandbox([executable] + sys.argv[1:]) # Preserve CLI arguments and wrap in Firejail @@ -50,5 +78,17 @@ def sandboxed_run(): os.execvp(full_cmd[0], full_cmd) +def _print_help() -> None: + print(USAGE) + + +def _print_sandbox_check() -> None: + try: + subprocess.run(["firejail", "--version"], check=True, stdout=subprocess.PIPE) + print("Firejail found.") + except FileNotFoundError: + print("Firejail not found.") + + if __name__ == "__main__": sandboxed_run() From 528e5fa54e1c35b1d2bffffea7b8931156038d02 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 10 Mar 2025 12:18:12 -0700 Subject: [PATCH 5/8] use sandboxed entrypoint by default --- Dockerfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 7da20db6e..20156e092 100644 --- a/Dockerfile +++ b/Dockerfile @@ -21,7 +21,7 @@ COPY dist/*.whl ./dist/ # Install dependencies - ignore keyring warnings RUN poetry config virtualenvs.create false \ - && poetry install --only main --no-interaction --no-ansi || true + && poetry install --only main --no-interaction --no-ansi # Build and install the package RUN pip install dist/*.whl @@ -40,6 +40,6 @@ RUN rm -rf dist/ pyproject.toml poetry.lock README.md RUN chown -R 1000:1000 /airbyte # Set the entrypoint -ENV AIRBYTE_ENTRYPOINT="python /airbyte/integration_code/main.py" -ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] +ENV AIRBYTE_ENTRYPOINT="source-declarative-manifest-sandboxed" +ENTRYPOINT ["source-declarative-manifest-sandboxed"] USER airbyte From 642a1c0405a2f4283cb0b40f15d4cdb19e27d4ce Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 10 Mar 2025 12:38:38 -0700 Subject: [PATCH 6/8] rename CLI --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 25e7ac091..f6fb0630b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -111,7 +111,7 @@ sql = ["sqlalchemy"] [tool.poetry.scripts] source-declarative-manifest = "airbyte_cdk.cli.source_declarative_manifest:run" -sandboxed-source-declarative-manifest = "airbyte_cdk.cli.source_declarative_manifest:sandboxed_run" +source-declarative-manifest-sandboxed = "airbyte_cdk.cli.source_declarative_manifest:sandboxed_run" [tool.isort] skip = ["__init__.py"] # TODO: Remove after this is fixed: https://github.com/airbytehq/airbyte-python-cdk/issues/12 From f27987ee0c0e63daa35817f6c6ca3dee078bb23a Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 10 Mar 2025 16:24:19 -0700 Subject: [PATCH 7/8] expand usage info --- .../_sandboxed_run.py | 26 ++++++++++++++----- 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/airbyte_cdk/cli/source_declarative_manifest/_sandboxed_run.py b/airbyte_cdk/cli/source_declarative_manifest/_sandboxed_run.py index 724c8f4fd..a4a36b0e5 100644 --- a/airbyte_cdk/cli/source_declarative_manifest/_sandboxed_run.py +++ b/airbyte_cdk/cli/source_declarative_manifest/_sandboxed_run.py @@ -10,21 +10,35 @@ ENV_SANDBOX_MODE_NONE = "NONE" USAGE = f""" -Sandboxed execution of the source-declarative-manifest connector. +------------------------------------------- +-- source-declarative-manifest-sandboxed -- +------------------------------------------- -Usage: source-declarative-manifest-sandboxed [OPTIONS] [CMD] +Sandboxed execution of the source-declarative-manifest connector. By default, this script +wraps the source-declarative-manifest command in Firejail to run the connector in a sandboxed +environment. If Firejail is not available, the connector will run without sandboxing. -Options: - --help Show this help message and exit. - --check-sandbox Check Firejail availability and exit. +Environment variable '{ENV_SANDBOX_MODE}' controls the sandboxing behavior. The following values +are supported: + - '{ENV_SANDBOX_MODE_FIREJAIL}': Use Firejail to run the connector in a sandboxed environment. + - '{ENV_SANDBOX_MODE_AUTO}': Use Firejail if available, otherwise run without sandboxing. + - '{ENV_SANDBOX_MODE_NONE}': Disable sandboxing and run the connector without Firejail. + +Usage: source-declarative-manifest-sandboxed [OPTIONS] [CMD] CMD: The command to run in the sandboxed environment. This should be the command that would normally be run to start the connector. E.g. "check", "read", etc. + The command is passed to the source-declarative-manifest entrypoint. - The command is ignored if specifying --check-sandbox or --help. + The command is ignored if specifying any of the below options. + +Options: + --help Show this help message and exit. + --check-sandbox Check Firejail availability and exit. """ + def _wrap_in_sandbox(cmd: list[str]) -> list[str]: """Wrap the given command in Firejail. This function modifies the command to include Firejail options From bb0295343931bfc75113d208eb10cc0ba25ac03c Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 10 Mar 2025 16:26:00 -0700 Subject: [PATCH 8/8] add more firejail protections, including network restrictions --- .../_sandboxed_run.py | 60 ++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) diff --git a/airbyte_cdk/cli/source_declarative_manifest/_sandboxed_run.py b/airbyte_cdk/cli/source_declarative_manifest/_sandboxed_run.py index a4a36b0e5..3eae5aa30 100644 --- a/airbyte_cdk/cli/source_declarative_manifest/_sandboxed_run.py +++ b/airbyte_cdk/cli/source_declarative_manifest/_sandboxed_run.py @@ -39,6 +39,22 @@ """ +def _get_default_gateway() -> str | None: + """Returns the system's default gateway IP, or None if not found.""" + try: + result = subprocess.run( + ["ip", "route"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True + ) + for line in result.stdout.split("\n"): + if line.startswith("default via"): + return line.split()[2] # Extract the gateway IP + + except Exception as e: + print(f"❌ Error detecting gateway: {e}") + + return None + + def _wrap_in_sandbox(cmd: list[str]) -> list[str]: """Wrap the given command in Firejail. This function modifies the command to include Firejail options @@ -66,8 +82,50 @@ def _wrap_in_sandbox(cmd: list[str]) -> list[str]: return cmd # Firejail is available + gateway_ip: str | None = _get_default_gateway() + if not gateway_ip: + print("❌ No gateway detected. Blocking all egress traffic.") + netfilter_rules = [ + "--netfilter=reject 0.0.0.0/0" # Block all traffic if no gateway is detected + ] + else: + print(f"🔥 Allowing egress only via gateway: {gateway_ip}") + netfilter_rules = [ + # Allow traffic only via the detected gateway: + f"--netfilter=accept {gateway_ip}", + # Block all private networks and localhost: + "--netfilter=reject 10.0.0.0/8", + "--netfilter=reject 172.16.0.0/12", + "--netfilter=reject 192.168.0.0/16", + "--netfilter=reject 127.0.0.0/8", + "--netfilter=reject 169.254.0.0/16", + "--netfilter=reject ::1/128", + "--netfilter=reject fc00::/7", + "--netfilter=reject fe80::/10", + ] + + # Use Google DNS resolution. It is fine to use others but we just don't want to block DNS. + dns_args = [ + "--dns=8.8.8.8", + "--dns=8.8.4.4", + ] + # Firejail Command with DNS resolution allowed + firejail_args = ( + [ + "--private", # Isolates the filesystem (prevents access to user files) + "--net=none", # Ensures restricted networking + "--seccomp", # Enable seccomp-bpf syscall filtering + "--blacklist=/var/run/" # Block system sockets + "--private-tmp" # Creates an isolated temp directory for each process + "--rlimit-cpu=120", # Limits CPU time to 1 second (prevents runaway processes) + "--rlimit-fsize=10000000", # Limits max file size to 10 MB (prevents excessive disk writes) + ] + + netfilter_rules + + dns_args + ) + print("Firejail found. Running with Firejail sandboxing.") - return ["firejail", "--private", "--net=none"] + cmd + return ["firejail"] + firejail_args + cmd def sandboxed_run():