From b44c02f3983a503e1e8552cd3f6552f4bced4514 Mon Sep 17 00:00:00 2001 From: Tester Date: Fri, 3 Jul 2026 16:48:23 +0200 Subject: [PATCH] feat(sandbox): run gh outside the sandbox so keyring auth works MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gh authenticates via the OS keyring, which the in-process sandbox blocks, so gh commands failed with keyring / "not logged in" errors. Excluding gh from the sandbox lets it use the real host auth. Confirmation is safe-by-default: permissions.ask matches Bash(gh *) so every gh command prompts unless a more-specific read-only allow rule (gh pr view, gh * list, ...) exempts it — so every destructive or unknown gh subcommand is confirmed. gh auth token / gh auth refresh stay denied so the token cannot be dumped. Generated-by: Claude Code (Opus 4.8) --- .claude/settings.json | 48 +++++++++---------- docs/setup/secure-agent-setup.md | 41 ++++++++++++---- tools/sandbox-lint/README.md | 3 +- tools/sandbox-lint/expected.json | 48 +++++++++---------- .../sandbox-lint/src/sandbox_lint/__init__.py | 1 + tools/sandbox-lint/tests/test_validator.py | 23 +++++++++ .../specs/agent-isolation-sandbox.md | 26 ++++++---- 7 files changed, 123 insertions(+), 67 deletions(-) diff --git a/.claude/settings.json b/.claude/settings.json index 942a5bf6..29097932 100644 --- a/.claude/settings.json +++ b/.claude/settings.json @@ -2,6 +2,9 @@ "$schema": "https://json.schemastore.org/claude-code-settings.json", "sandbox": { "enabled": true, + "excludedCommands": [ + "gh *" + ], "filesystem": { "denyRead": [ "~/" @@ -65,6 +68,26 @@ "permissions": { "allow": [ "Bash(gh api graphql *)", + "Bash(gh pr view *)", + "Bash(gh pr list *)", + "Bash(gh pr diff *)", + "Bash(gh pr checks *)", + "Bash(gh issue view *)", + "Bash(gh issue list *)", + "Bash(gh repo view *)", + "Bash(gh repo list *)", + "Bash(gh run view *)", + "Bash(gh run list *)", + "Bash(gh run watch *)", + "Bash(gh workflow view *)", + "Bash(gh workflow list *)", + "Bash(gh release view *)", + "Bash(gh release list *)", + "Bash(gh label list *)", + "Bash(gh cache list *)", + "Bash(gh search *)", + "Bash(gh browse *)", + "Bash(gh auth status*)", "mcp__claude_ai_Gmail__get_thread", "mcp__claude_ai_Gmail__search_threads", "mcp__ponymail__search_list", @@ -113,30 +136,7 @@ "Bash(git push *)", "Bash(git push --force *)", "Bash(git push --force-with-lease *)", - "Bash(gh pr create *)", - "Bash(gh pr edit *)", - "Bash(gh pr merge *)", - "Bash(gh issue create *)", - "Bash(gh issue edit *)", - "Bash(gh issue close *)", - "Bash(gh issue comment *)", - "Bash(gh release create *)", - "Bash(gh api * -X *)", - "Bash(gh api * -f *)", - "Bash(gh api * -F *)", - "Bash(gh gist *)", - "Bash(gh repo create *)", - "Bash(gh repo edit *)", - "Bash(gh repo delete *)", - "Bash(gh api * --method *)", - "Bash(gh api --method *)", - "Bash(gh api * --input *)", - "Bash(gh api --input *)", - "Bash(gh secret *)", - "Bash(gh ssh-key *)", - "Bash(gh release upload *)", - "Bash(gh release delete *)", - "Bash(gh workflow run *)" + "Bash(gh *)" ] } } diff --git a/docs/setup/secure-agent-setup.md b/docs/setup/secure-agent-setup.md index 4e60dcb5..14b2a345 100644 --- a/docs/setup/secure-agent-setup.md +++ b/docs/setup/secure-agent-setup.md @@ -365,6 +365,16 @@ below, annotated. { "sandbox": { "enabled": true, + // `excludedCommands` runs the listed commands OUTSIDE the sandbox. + // `gh` authenticates via the OS keyring (and `~/.config/gh`), which the + // sandbox blocks — so a sandboxed `gh` fails with a keyring / "not + // logged in" error. Excluding it lets `gh` use the real host auth. Its + // write / destructive subcommands are still gated by the + // `permissions.ask` rules below, and `gh auth token` / `gh auth refresh` + // stay in `permissions.deny` so the token can never be dumped. This is + // what makes the "`gh` is sandbox-bypassed" note under `credentials` + // below actually hold. + "excludedCommands": ["gh *"], // The `lychee` link-check hook runs in OFFLINE mode (`offline = // true` in `.lychee.toml`): it validates only local cross-file and // anchor references and never fetches remote URLs, so it makes no @@ -456,7 +466,18 @@ below, annotated. }, "permissions": { "allow": [ - "Bash(gh api graphql *)" // read-only GraphQL fetches (PR-triage paginated fetch loop, similar bulk reads); MORE SPECIFIC than the `-F`/`-f` ask rules below, so it short-circuits them. Mutations via `gh api graphql -F query='mutation {...}'` slip through this rule and are not prompted — accept this trade-off because the skills in this framework do not route mutations through graphql (REST + explicit `-X`/`--method` is the mutation path). + "Bash(gh api graphql *)", // read-only GraphQL fetches (PR-triage paginated loop). MORE SPECIFIC than the `gh *` ask below, so it — and the read-only rules that follow — run WITHOUT a prompt. GraphQL mutations slip through; accepted, since the skills route mutations through REST, not graphql. + // Read-only gh, allow-listed so they don't trip the `gh *` ask below. + // Anything NOT listed here — every write/destructive gh, and REST `gh + // api` (GET included) — falls through to `gh *` and prompts. + "Bash(gh pr view *)", "Bash(gh pr list *)", "Bash(gh pr diff *)", "Bash(gh pr checks *)", + "Bash(gh issue view *)", "Bash(gh issue list *)", + "Bash(gh repo view *)", "Bash(gh repo list *)", + "Bash(gh run view *)", "Bash(gh run list *)", "Bash(gh run watch *)", + "Bash(gh workflow view *)", "Bash(gh workflow list *)", + "Bash(gh release view *)", "Bash(gh release list *)", + "Bash(gh label list *)", "Bash(gh cache list *)", + "Bash(gh search *)", "Bash(gh browse *)", "Bash(gh auth status*)" ], "deny": [ "Read(~/.aws/**)", "Read(~/.ssh/**)", "Read(~/.netrc)", @@ -468,16 +489,12 @@ below, annotated. "Bash(curl *)", "Bash(wget *)", // network egress via Bash bypasses the sandbox proxy "Bash(aws *)", "Bash(gcloud *)", "Bash(az *)", "Bash(kubectl *)", "Bash(docker login *)", "Bash(npm publish *)", - "Bash(pip install --upgrade *)", "Bash(uv self update *)" + "Bash(pip install --upgrade *)", "Bash(uv self update *)", + "Bash(gh auth token*)", "Bash(gh auth refresh*)" // gh runs unsandboxed (excludedCommands), so deny the two subcommands that would print/rotate the token ], "ask": [ "Bash(git push *)", // including --force / --force-with-lease variants - "Bash(gh pr create *)", "Bash(gh pr edit *)", "Bash(gh pr merge *)", - "Bash(gh issue create *)", "Bash(gh issue edit *)", - "Bash(gh issue close *)", "Bash(gh issue comment *)", - "Bash(gh release create *)", - "Bash(gh api * -X *)", // any non-default-method API call - "Bash(gh api * -f *)", "Bash(gh api * -F *)" // any payload-bearing API call — narrowed by the `gh api graphql *` allow above for the GraphQL read path + "Bash(gh *)" // safe-by-default: EVERY gh command prompts unless it matches a more-specific read-only allow rule above. Guarantees every destructive / unknown gh subcommand (gh pr close, gh run delete, gh label delete, gh repo archive, gh variable set, gh project item-delete, …) is confirmed. `gh auth token`/`refresh` are denied above (deny > ask). ] } } @@ -490,6 +507,14 @@ agent should never *see* it. `sandbox.filesystem.allowRead` permits the bash subprocess to read the file; `permissions.deny[Read(...)]` blocks the agent's Read tool from reading the same path. +**OpenCode parity.** OpenCode has no per-command sandbox exclusion — its +isolation is the OS-level sandbox of the [clean-env wrapper](#the-clean-env-wrapper), +which already runs `gh` with the host keyring, so there is no `excludedCommands` +equivalent to add. The "always confirm" half carries over via OpenCode's own +policy: `sandbox-lint --opencode` requires `permission.bash` to default to +`ask`/`deny` (never a blanket `allow`), so `gh` write subcommands prompt there +by default. No `opencode.json` change is needed to match the Claude config. + ## Project-root coverage in the sandbox allowlists The `.` entry in `sandbox.filesystem.allowRead` is **intended** to diff --git a/tools/sandbox-lint/README.md b/tools/sandbox-lint/README.md index 2d60008b..67db5bb8 100644 --- a/tools/sandbox-lint/README.md +++ b/tools/sandbox-lint/README.md @@ -55,7 +55,8 @@ uv run --project tools/sandbox-lint sandbox-lint --opencode opencode.json 1. **Baseline parity.** Every key/value in the live settings file must match the baseline. Lists tagged as set-typed (`denyRead`, - `allowRead`, `allowWrite`, `allowedDomains`, `deny`, `ask`) are + `allowRead`, `allowWrite`, `allowedDomains`, `excludedCommands`, + `deny`, `ask`) are compared as sets so a re-order does not trip the lint, but every addition or removal does. Any drift fails CI. 2. **Hard invariants.** Independent of the baseline, the live diff --git a/tools/sandbox-lint/expected.json b/tools/sandbox-lint/expected.json index 942a5bf6..29097932 100644 --- a/tools/sandbox-lint/expected.json +++ b/tools/sandbox-lint/expected.json @@ -2,6 +2,9 @@ "$schema": "https://json.schemastore.org/claude-code-settings.json", "sandbox": { "enabled": true, + "excludedCommands": [ + "gh *" + ], "filesystem": { "denyRead": [ "~/" @@ -65,6 +68,26 @@ "permissions": { "allow": [ "Bash(gh api graphql *)", + "Bash(gh pr view *)", + "Bash(gh pr list *)", + "Bash(gh pr diff *)", + "Bash(gh pr checks *)", + "Bash(gh issue view *)", + "Bash(gh issue list *)", + "Bash(gh repo view *)", + "Bash(gh repo list *)", + "Bash(gh run view *)", + "Bash(gh run list *)", + "Bash(gh run watch *)", + "Bash(gh workflow view *)", + "Bash(gh workflow list *)", + "Bash(gh release view *)", + "Bash(gh release list *)", + "Bash(gh label list *)", + "Bash(gh cache list *)", + "Bash(gh search *)", + "Bash(gh browse *)", + "Bash(gh auth status*)", "mcp__claude_ai_Gmail__get_thread", "mcp__claude_ai_Gmail__search_threads", "mcp__ponymail__search_list", @@ -113,30 +136,7 @@ "Bash(git push *)", "Bash(git push --force *)", "Bash(git push --force-with-lease *)", - "Bash(gh pr create *)", - "Bash(gh pr edit *)", - "Bash(gh pr merge *)", - "Bash(gh issue create *)", - "Bash(gh issue edit *)", - "Bash(gh issue close *)", - "Bash(gh issue comment *)", - "Bash(gh release create *)", - "Bash(gh api * -X *)", - "Bash(gh api * -f *)", - "Bash(gh api * -F *)", - "Bash(gh gist *)", - "Bash(gh repo create *)", - "Bash(gh repo edit *)", - "Bash(gh repo delete *)", - "Bash(gh api * --method *)", - "Bash(gh api --method *)", - "Bash(gh api * --input *)", - "Bash(gh api --input *)", - "Bash(gh secret *)", - "Bash(gh ssh-key *)", - "Bash(gh release upload *)", - "Bash(gh release delete *)", - "Bash(gh workflow run *)" + "Bash(gh *)" ] } } diff --git a/tools/sandbox-lint/src/sandbox_lint/__init__.py b/tools/sandbox-lint/src/sandbox_lint/__init__.py index 635d2839..611d8f2e 100644 --- a/tools/sandbox-lint/src/sandbox_lint/__init__.py +++ b/tools/sandbox-lint/src/sandbox_lint/__init__.py @@ -146,6 +146,7 @@ def _normalised_set(paths: list[str]) -> set[str]: "allowRead", "allowWrite", "allowedDomains", + "excludedCommands", "deny", "ask", } diff --git a/tools/sandbox-lint/tests/test_validator.py b/tools/sandbox-lint/tests/test_validator.py index 1521ff4f..79166483 100644 --- a/tools/sandbox-lint/tests/test_validator.py +++ b/tools/sandbox-lint/tests/test_validator.py @@ -69,6 +69,20 @@ def test_live_settings_satisfy_invariants(live_settings: dict[str, Any]) -> None assert errors == [], "live settings violate invariants:\n" + "\n".join(errors) +def test_baseline_excludes_gh_from_sandbox(baseline: dict[str, Any]) -> None: + # gh authenticates via the OS keyring, which is unreachable inside the + # sandbox; excluding it lets gh run against the real host auth. The `ask` + # rules still gate its write/destructive subcommands. + assert "gh *" in baseline["sandbox"].get("excludedCommands", []) + + +def test_baseline_asks_on_all_gh_by_default(baseline: dict[str, Any]) -> None: + # Safe-by-default: every gh command prompts unless a more-specific + # read-only allow rule exempts it, so destructive/unknown gh always asks. + assert "Bash(gh *)" in baseline["permissions"]["ask"] + assert "Bash(gh pr view *)" in baseline["permissions"]["allow"] + + def test_main_exits_zero_on_repo(monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.chdir(REPO_ROOT) assert main([]) == 0 @@ -94,6 +108,15 @@ def test_diff_detects_added_allowed_domain(baseline: dict[str, Any]) -> None: assert any("sandbox-lint-test-extra-marker" in d for d in diffs) +def test_diff_excluded_commands_order_insensitive(baseline: dict[str, Any]) -> None: + settings = copy.deepcopy(baseline) + settings["sandbox"]["excludedCommands"] = [ + *reversed(settings["sandbox"]["excludedCommands"]), + *settings["sandbox"]["excludedCommands"], + ] + assert deep_diff(settings, baseline) == [] + + def test_diff_detects_removed_deny_entry(baseline: dict[str, Any]) -> None: settings = copy.deepcopy(baseline) settings["permissions"]["deny"].remove("Bash(curl *)") diff --git a/tools/spec-loop/specs/agent-isolation-sandbox.md b/tools/spec-loop/specs/agent-isolation-sandbox.md index 057dc2bb..b63e8a91 100644 --- a/tools/spec-loop/specs/agent-isolation-sandbox.md +++ b/tools/spec-loop/specs/agent-isolation-sandbox.md @@ -15,8 +15,9 @@ acceptance: - Every agent subprocess runs inside an OS-level sandbox with default- deny filesystem reads and network egress. - Credential-shaped env vars are stripped before the agent execs. - - State-mutating shell calls (git push, gh pr create, …) require a - confirmation prompt; secrets/cred files are deny-read. + - State-mutating shell calls (git push, and every gh command except + allow-listed read-only ones) require a confirmation prompt; + secrets/cred files are deny-read. --- # Agent isolation / layered sandbox @@ -33,8 +34,8 @@ saying "no". - `tools/agent-isolation/` — the harness (clean-env wrapper + sandbox profiles). - `.claude/settings.json` — the `sandbox` block (filesystem - allow/deny, network `allowedDomains`) and `permissions` (`deny` / - `ask`). + allow/deny, network `allowedDomains`, `excludedCommands`) and + `permissions` (`deny` / `ask`). - Skills: `setup-isolated-setup-install`, `-update`, `-verify`, `-doctor` (probes live sandbox restrictions — SSH-agent reachability, localhost port binding, docker/podman socket — and maps each to a @@ -50,11 +51,16 @@ The reference model is four layers, layered: `$ANTHROPIC_API_KEY` leakage). 2. **Filesystem + network sandbox** — Linux `bubblewrap` + `socat` SNI proxy; macOS `sandbox-exec`. Default-deny reads outside the tree and - egress to non-allowed hosts. + egress to non-allowed hosts. `sandbox.excludedCommands` carves out + commands that need host auth the sandbox blocks — `gh` (OS keyring); + the blast radius is held by layers 3 (`gh auth token` / `gh auth + refresh` denied) and 4 (`gh` writes gated by `ask`). 3. **Tool permissions** — the host's `permissions.deny` blocks denied paths/binaries (`Read(~/.ssh/**)`, `Bash(curl *)`, …). -4. **Forced confirmation** — `permissions.ask` on every state-mutating - shell call (`git push`, `gh pr create`, `gh issue edit`, …). +4. **Forced confirmation** — `permissions.ask` on `git push` and, + safe-by-default, on `Bash(gh *)`: every `gh` command prompts unless a + more-specific read-only `allow` rule (`gh pr view`, `gh * list`, …) + exempts it, so every destructive or unknown `gh` subcommand confirms. Pinned system tools (`bubblewrap`, `socat`, agent CLI) are aged through a cooldown window; bumps are PRs, not silent updates. @@ -69,8 +75,8 @@ cooldown window; bumps are PRs, not silent updates. 1. Filesystem and network default-deny with explicit allow-lists. 2. The clean-env wrapper strips credential-shaped vars before exec. -3. `git push` / `gh pr create` are in `permissions.ask`; secret/cred - files are in `permissions.deny`. +3. `git push` and `Bash(gh *)` are in `permissions.ask` (read-only `gh` + exempted via `allow`); secret/cred files are in `permissions.deny`. ## Validation @@ -78,7 +84,7 @@ cooldown window; bumps are PRs, not silent updates. uv run --project tools/agent-isolation --group dev pytest python3 -c "import json,sys; s=json.load(open('.claude/settings.json')); \ asks=' '.join(s['permissions']['ask']); \ - sys.exit(0 if 'git push' in asks and 'gh pr create *' in asks else 1)" + sys.exit(0 if 'git push' in asks and 'gh *' in asks else 1)" ``` ## Known gaps