diff --git a/.github/workflows/daily-compiler-quality.lock.yml b/.github/workflows/daily-compiler-quality.lock.yml index 97fa0272ce2..c65b1fb5400 100644 --- a/.github/workflows/daily-compiler-quality.lock.yml +++ b/.github/workflows/daily-compiler-quality.lock.yml @@ -1,4 +1,4 @@ -# gh-aw-metadata: {"schema_version":"v4","frontmatter_hash":"661b7c3876eeebed5394a5c46311409e084df3c67f4d20c00758cbb6657b90e9","body_hash":"0a85250e3ba307278b526b7978b9f3c6bb2bb37f47e76038509ebe4dd5afed7c","strict":true,"agent_id":"copilot","engine_versions":{"copilot":"1.0.59","copilot-sdk":"1.0.0"}} +# gh-aw-metadata: {"schema_version":"v4","frontmatter_hash":"20431832c1e5509ccae28ecd101f573a62bf343cd8cbe4d864266bd5fcbe1558","body_hash":"0a85250e3ba307278b526b7978b9f3c6bb2bb37f47e76038509ebe4dd5afed7c","strict":true,"agent_id":"copilot","engine_versions":{"copilot":"1.0.59","copilot-sdk":"1.0.0"}} # gh-aw-manifest: {"version":1,"secrets":["GH_AW_GITHUB_MCP_SERVER_TOKEN","GH_AW_GITHUB_TOKEN","GH_AW_OTEL_GRAFANA_AUTHORIZATION","GH_AW_OTEL_GRAFANA_ENDPOINT","GH_AW_OTEL_SENTRY_AUTHORIZATION","GH_AW_OTEL_SENTRY_ENDPOINT","GITHUB_TOKEN"],"actions":[{"repo":"actions/cache/restore","sha":"27d5ce7f107fe9357f9df03efb73ab90386fccae","version":"v5.0.5"},{"repo":"actions/cache/save","sha":"27d5ce7f107fe9357f9df03efb73ab90386fccae","version":"v5.0.5"},{"repo":"actions/checkout","sha":"de0fac2e4500dabe0009e67214ff5f5447ce83dd","version":"v6.0.2"},{"repo":"actions/download-artifact","sha":"3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c","version":"v8.0.1"},{"repo":"actions/github-script","sha":"3a2844b7e9c422d3c10d287c895573f7108da1b3","version":"v9.0.0"},{"repo":"actions/setup-node","sha":"48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e","version":"v6.4.0"},{"repo":"actions/upload-artifact","sha":"043fb46d1a93c77aae656e7c1c64a875d1fc6a0a","version":"v7.0.1"}],"containers":[{"image":"ghcr.io/github/gh-aw-firewall/agent:0.25.65"},{"image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.25.65"},{"image":"ghcr.io/github/gh-aw-firewall/cli-proxy:0.25.65"},{"image":"ghcr.io/github/gh-aw-firewall/squid:0.25.65"},{"image":"ghcr.io/github/gh-aw-mcpg:v0.3.23","digest":"sha256:0dd1bd91a41e24a3ccc31b1ec6cb61d36608997fabf91f2d643b64e3fc33180a","pinned_image":"ghcr.io/github/gh-aw-mcpg:v0.3.23@sha256:0dd1bd91a41e24a3ccc31b1ec6cb61d36608997fabf91f2d643b64e3fc33180a"},{"image":"ghcr.io/github/github-mcp-server:v1.1.2","digest":"sha256:30197479d8036c7811892bc07e06f9a05c9ef3cdd79bc59f256d50647f95788c","pinned_image":"ghcr.io/github/github-mcp-server:v1.1.2@sha256:30197479d8036c7811892bc07e06f9a05c9ef3cdd79bc59f256d50647f95788c"},{"image":"ghcr.io/github/serena-mcp-server:latest","digest":"sha256:bf343399e3725c45528f531a230f3a04521d4cdef29f9a5af6282ff0d3c393c5","pinned_image":"ghcr.io/github/serena-mcp-server:latest@sha256:bf343399e3725c45528f531a230f3a04521d4cdef29f9a5af6282ff0d3c393c5"},{"image":"node:lts-alpine","digest":"sha256:2bdb65ed1dab192432bc31c95f94155ca5ad7fc1392fb7eb7526ab682fa5bf14","pinned_image":"node:lts-alpine@sha256:2bdb65ed1dab192432bc31c95f94155ca5ad7fc1392fb7eb7526ab682fa5bf14"}]} # ___ _ _ # / _ \ | | (_) @@ -895,6 +895,42 @@ jobs: - name: Execute GitHub Copilot CLI id: agentic_execution # Copilot CLI tool arguments (sorted): + # --allow-tool github + # --allow-tool safeoutputs + # --allow-tool serena + # --allow-tool shell(bc) + # --allow-tool shell(cat pkg/**/*.go) + # --allow-tool shell(cat) + # --allow-tool shell(date) + # --allow-tool shell(echo) + # --allow-tool shell(find pkg -name "*.go" ! -name "*_test.go" -type f) + # --allow-tool shell(find pkg -type f -name "*.go" ! -name "*_test.go") + # --allow-tool shell(find pkg/ -maxdepth 1 -ls) + # --allow-tool shell(find pkg/workflow/ -maxdepth 1 -ls) + # --allow-tool shell(find) + # --allow-tool shell(gh:*) + # --allow-tool shell(git:*) + # --allow-tool shell(grep -r "func " pkg --include="*.go") + # --allow-tool shell(grep) + # --allow-tool shell(head -n * pkg/**/*.go) + # --allow-tool shell(head) + # --allow-tool shell(jq) + # --allow-tool shell(ls) + # --allow-tool shell(mkdir) + # --allow-tool shell(mv) + # --allow-tool shell(printf) + # --allow-tool shell(pwd) + # --allow-tool shell(safeoutputs:*) + # --allow-tool shell(sed) + # --allow-tool shell(serena:*) + # --allow-tool shell(set) + # --allow-tool shell(sort) + # --allow-tool shell(tail) + # --allow-tool shell(uniq) + # --allow-tool shell(wc -l pkg/**/*.go) + # --allow-tool shell(wc) + # --allow-tool shell(yq) + # --allow-tool write timeout-minutes: 30 run: | set -o pipefail @@ -935,7 +971,7 @@ jobs: COPILOT_MODEL: ${{ vars.GH_AW_MODEL_AGENT_COPILOT || vars.GH_AW_DEFAULT_MODEL_COPILOT || 'claude-sonnet-4.6' }} COPILOT_SDK_URI: http://127.0.0.1:3002 GH_AW_COPILOT_SDK_DRIVER: 1 - GH_AW_COPILOT_SDK_SERVER_ARGS: '["--headless","--no-auto-update","--port","3002","--add-dir","/tmp/gh-aw/","--log-level","all","--log-dir","/tmp/gh-aw/sandbox/agent/logs/","--disable-builtin-mcps","--no-ask-user","--allow-all-tools","--add-dir","/tmp/gh-aw/cache-memory/","--allow-all-paths"]' + GH_AW_COPILOT_SDK_SERVER_ARGS: '["--headless","--no-auto-update","--port","3002","--add-dir","/tmp/gh-aw/","--log-level","all","--log-dir","/tmp/gh-aw/sandbox/agent/logs/","--disable-builtin-mcps","--no-ask-user","--allow-tool","github","--allow-tool","safeoutputs","--allow-tool","serena","--allow-tool","shell(bc)","--allow-tool","shell(cat pkg/**/*.go)","--allow-tool","shell(cat)","--allow-tool","shell(date)","--allow-tool","shell(echo)","--allow-tool","shell(find pkg -name \"*.go\" ! -name \"*_test.go\" -type f)","--allow-tool","shell(find pkg -type f -name \"*.go\" ! -name \"*_test.go\")","--allow-tool","shell(find pkg/ -maxdepth 1 -ls)","--allow-tool","shell(find pkg/workflow/ -maxdepth 1 -ls)","--allow-tool","shell(find)","--allow-tool","shell(gh:*)","--allow-tool","shell(git:*)","--allow-tool","shell(grep -r \"func \" pkg --include=\"*.go\")","--allow-tool","shell(grep)","--allow-tool","shell(head -n * pkg/**/*.go)","--allow-tool","shell(head)","--allow-tool","shell(jq)","--allow-tool","shell(ls)","--allow-tool","shell(mkdir)","--allow-tool","shell(mv)","--allow-tool","shell(printf)","--allow-tool","shell(pwd)","--allow-tool","shell(safeoutputs:*)","--allow-tool","shell(sed)","--allow-tool","shell(serena:*)","--allow-tool","shell(set)","--allow-tool","shell(sort)","--allow-tool","shell(tail)","--allow-tool","shell(uniq)","--allow-tool","shell(wc -l pkg/**/*.go)","--allow-tool","shell(wc)","--allow-tool","shell(yq)","--allow-tool","write","--add-dir","/tmp/gh-aw/cache-memory/","--allow-all-paths"]' GH_AW_MAX_TOOL_DENIALS: 5 GH_AW_MAX_TURNS: ${{ vars.GH_AW_DEFAULT_MAX_TURNS || '' }} GH_AW_MCP_CONFIG: /home/runner/.copilot/mcp-config.json diff --git a/.github/workflows/daily-compiler-quality.md b/.github/workflows/daily-compiler-quality.md index 96734378363..55b92f5a725 100644 --- a/.github/workflows/daily-compiler-quality.md +++ b/.github/workflows/daily-compiler-quality.md @@ -62,7 +62,28 @@ name: Daily Compiler Quality Check strict: true timeout-minutes: 30 tools: - bash: true + bash: + - set + - find + - wc + - git + - mkdir + - cat + - jq + - mv + - echo + - bc + - sed + - printf + - date + - grep + - head + - ls + - pwd + - sort + - tail + - uniq + - yq cache-memory: true cli-proxy: true github: diff --git a/actions/setup/js/bash_command_parser.cjs b/actions/setup/js/bash_command_parser.cjs index f48a049567f..bcc2b37cc4f 100644 --- a/actions/setup/js/bash_command_parser.cjs +++ b/actions/setup/js/bash_command_parser.cjs @@ -26,7 +26,7 @@ /** * Split a shell command text into individual pipeline segments. - * Splits on the following shell operators: &&, ||, |, ; + * Splits on the following shell operators: &&, ||, |, ; and newlines. * * The split respects: * - Single-quoted strings (no escaping inside) @@ -143,6 +143,39 @@ function splitOnPipelineOperators(commandText) { continue; } + // Newline (sequential) — treat line breaks as command separators, + // except when escaped as a shell line continuation ("\\" + newline). + // Handles LF, CRLF, and CR forms. + if (ch === "\n" || ch === "\r") { + let backslashRunLength = 0; + for (let j = current.length - 1; j >= 0 && current[j] === "\\"; j--) { + backslashRunLength++; + } + + // Odd number of trailing backslashes means the newline is escaped. + if (backslashRunLength % 2 === 1) { + current = current.slice(0, -1); + i++; + if (ch === "\r" && i < len && commandText[i] === "\n") { + i++; + } + while (i < len && (commandText[i] === " " || commandText[i] === "\t")) i++; + if (current && !/\s$/.test(current)) { + current += " "; + } + continue; + } + + segments.push(current); + current = ""; + i++; + if (ch === "\r" && i < len && commandText[i] === "\n") { + i++; + } + while (i < len && /\s/.test(commandText[i])) i++; + continue; + } + current += ch; i++; } @@ -156,13 +189,26 @@ function splitOnPipelineOperators(commandText) { } /** - * Shell flow-control keywords that can appear as the first word of a segment - * but do not represent an executable command. They must be excluded so the - * permission checker does not attempt to look up keywords like "then" or "fi" - * as command names and incorrectly deny (or allow) a pipeline that contains - * them as part of a compound statement (e.g. `if …; then cat …; fi`). + * Clause keywords can prefix an executable command in the same segment + * (for example: "then cat file", "do git log"). These are skipped and + * scanning continues to find the command token. */ -const SHELL_KEYWORDS = new Set(["then", "else", "elif", "fi", "do", "done", "esac", "in", "function", "time", "coproc"]); +const CLAUSE_KEYWORDS = new Set(["then", "else", "elif", "do"]); + +/** + * Structural shell keywords never represent an executable command token + * for permission matching in this parser. They introduce/close control + * structures and are treated as non-command segment starts. + */ +const STRUCTURE_KEYWORDS = new Set(["if", "fi", "for", "done", "while", "until", "case", "esac", "select", "in", "function", "time", "coproc"]); + +const SHELL_KEYWORDS = new Set([...CLAUSE_KEYWORDS, ...STRUCTURE_KEYWORDS]); + +// IDENTIFIER=VALUE where VALUE is one of: +// - "(...)" double-quoted text (supports escapes like \") +// - '(...)' single-quoted text +// - an unquoted non-space token +const ENV_ASSIGNMENT_PREFIX_RE = /^[A-Za-z_][A-Za-z0-9_]*=(?:"(?:\\.|[^"\\])*"|'[^']*'|\S*)\s*/; /** * Extract the executable command name from a single shell command segment. @@ -186,9 +232,8 @@ function extractCommandName(segment) { if (!remaining) return null; // Skip leading env-var assignments: IDENTIFIER=anything (repeat) - const envAssignRe = /^[A-Za-z_][A-Za-z0-9_]*=\S*\s*/; for (;;) { - const m = remaining.match(envAssignRe); + const m = remaining.match(ENV_ASSIGNMENT_PREFIX_RE); if (!m) break; remaining = remaining.slice(m[0].length).trim(); } @@ -216,6 +261,11 @@ function extractCommandName(segment) { // Flow-control keywords are not executable commands if (SHELL_KEYWORDS.has(word)) { + if (CLAUSE_KEYWORDS.has(word)) { + remaining = remaining.slice(word.length).trim(); + if (!remaining) return null; + continue; + } return null; } diff --git a/actions/setup/js/bash_command_parser.test.cjs b/actions/setup/js/bash_command_parser.test.cjs index 4fb8f38111e..1fdb6dab434 100644 --- a/actions/setup/js/bash_command_parser.test.cjs +++ b/actions/setup/js/bash_command_parser.test.cjs @@ -105,6 +105,16 @@ describe("splitOnPipelineOperators", () => { expect(segments).toEqual(["pwd", "ls -la", "safeoutputs --help"]); }); + it("splits on newlines as sequential separators", () => { + const segments = splitOnPipelineOperators("pwd\nls -la\nsafeoutputs --help"); + expect(segments).toEqual(["pwd", "ls -la", "safeoutputs --help"]); + }); + + it("does not split on escaped newline continuations", () => { + const segments = splitOnPipelineOperators("git log \\\n --oneline \\\n --max-count=1"); + expect(segments).toEqual(["git log --oneline --max-count=1"]); + }); + it("trims leading/trailing whitespace from each segment", () => { const segments = splitOnPipelineOperators(" ls /tmp && cat file "); expect(segments[0]).toBe("ls /tmp"); @@ -137,6 +147,18 @@ describe("extractCommandName", () => { expect(extractCommandName("FOO=bar BAZ=qux echo hi")).toBe("echo"); }); + it("skips leading env-var assignment with quoted spaces", () => { + expect(extractCommandName("FILES='a b c' echo hi")).toBe("echo"); + }); + + it("skips leading env-var assignment with double-quoted spaces", () => { + expect(extractCommandName('FILES="a b c" echo hi')).toBe("echo"); + }); + + it("skips leading env-var assignment with escaped quote in double-quoted value", () => { + expect(extractCommandName('FILES="a \\"b\\" c" echo hi')).toBe("echo"); + }); + it("handles negation operator ! and returns next command", () => { expect(extractCommandName("! ls /tmp")).toBe("ls"); }); @@ -153,10 +175,30 @@ describe("extractCommandName", () => { expect(extractCommandName("else")).toBeNull(); }); + it("extracts command after shell keyword 'else'", () => { + expect(extractCommandName("else cat file")).toBe("cat"); + }); + it("returns null for shell keyword 'fi'", () => { expect(extractCommandName("fi")).toBeNull(); }); + it("extracts command after shell keyword 'elif'", () => { + expect(extractCommandName("elif grep x file")).toBe("grep"); + }); + + it("returns null for shell keyword 'if'", () => { + expect(extractCommandName("if [ -f file ]")).toBeNull(); + }); + + it("returns null for shell keyword 'for'", () => { + expect(extractCommandName("for f in a b c")).toBeNull(); + }); + + it("extracts command after shell keyword 'do'", () => { + expect(extractCommandName("do git status")).toBe("git"); + }); + it("returns null for a bare redirection like >file", () => { expect(extractCommandName(">file.txt")).toBeNull(); }); @@ -280,6 +322,26 @@ describe("extractCommandNamesFromPipeline", () => { it("handles date with flags", () => { expect(extractCommandNamesFromPipeline("date +%Y-%m-%d && echo done")).toEqual(["date", "echo"]); }); + + it("extracts all command names from multiline script with variables and control flow", () => { + const cmd = `set -euo pipefail +CACHE_DIR='cache/gh-aw/cache-memory/compiler-quality' +ANALYSES_DIR="$CACHE_DIR/analyses" +mkdir -p "$ANALYSES_DIR" +FILES='compiler.go compiler_activation_jobs.go compiler_orchestrator.go compiler_jobs.go compiler_safe_outputs.go compiler_safe_outputs_config.go compiler_safe_outputs_job.go compiler_yaml.go compiler_yaml_main_job.go' +for f in $FILES; do git -C /home/runner/work/gh-aw/gh-aw log -1 --format='%H' -- "pkg/workflow/$f" | sed "s|^|$f |"; done +printf '---ROTATION---\n' +if [ -f "$CACHE_DIR/rotation.json" ]; then cat "$CACHE_DIR/rotation.json"; fi +printf '\n---HASHES---\n' +if [ -f "$CACHE_DIR/file-hashes.json" ]; then cat "$CACHE_DIR/file-hashes.json"; fi +printf '\n---FILES---\n' +for f in $FILES; do wc -l "/home/runner/work/gh-aw/gh-aw/pkg/workflow/$f"; done`; + expect(extractCommandNamesFromPipeline(cmd)).toEqual(["set", "mkdir", "git", "sed", "printf", "cat", "wc"]); + }); + + it("keeps continued multiline command as one extracted command", () => { + expect(extractCommandNamesFromPipeline("git log \\\n --oneline \\\n --max-count=1")).toEqual(["git"]); + }); }); // ───────────────────────────────────────────────────────────────────────────── @@ -351,7 +413,7 @@ describe("extractCommandName – extensive vectors", () => { { id: "BP-EC-004", segment: "2>&1", expected: null }, { id: "BP-EC-005", segment: ">out.txt", expected: null }, { id: "BP-EC-006", segment: "A=1 B=2 safeoutputs missing_data", expected: "safeoutputs" }, - { id: "BP-EC-007", segment: "then cat file", expected: null }, + { id: "BP-EC-007", segment: "then cat file", expected: "cat" }, { id: "BP-EC-008", segment: "fi", expected: null }, { id: "BP-EC-009", segment: "do", expected: null }, { id: "BP-EC-010", segment: "done", expected: null }, diff --git a/actions/setup/js/bash_command_parser_spec_vectors.json b/actions/setup/js/bash_command_parser_spec_vectors.json index 897a52c6af6..8b07783c1bc 100644 --- a/actions/setup/js/bash_command_parser_spec_vectors.json +++ b/actions/setup/js/bash_command_parser_spec_vectors.json @@ -1,5 +1,5 @@ { - "version": "1.0.0", + "version": "1.1.0", "metadata": { "spec": "docs/src/content/docs/specs/bash-command-parser-specification.md", "description": "Language-agnostic conformance vectors for the bash command parser", @@ -54,6 +54,18 @@ "source": "verification", "input": " ! ls /tmp && echo done ", "expected": ["! ls /tmp", "echo done"] + }, + { + "id": "BP-SP-153", + "source": "verification", + "input": "pwd\nls -la\nsafeoutputs --help", + "expected": ["pwd", "ls -la", "safeoutputs --help"] + }, + { + "id": "BP-SP-154", + "source": "verification", + "input": "git log \\\n --oneline \\\n --max-count=1", + "expected": ["git log --oneline --max-count=1"] } ], "extractCommandName": [ @@ -85,7 +97,7 @@ "id": "BP-EC-105", "source": "model-based", "input": "then cat file", - "expected": null + "expected": "cat" }, { "id": "BP-EC-151", @@ -104,6 +116,18 @@ "source": "verification", "input": "coproc", "expected": null + }, + { + "id": "BP-EC-154", + "source": "verification", + "input": "FILES='a b c' echo hi", + "expected": "echo" + }, + { + "id": "BP-EC-155", + "source": "verification", + "input": "FILES=\"a \\\"b\\\" c\" echo hi", + "expected": "echo" } ], "extractCommandNamesFromPipeline": [ @@ -148,6 +172,19 @@ "source": "verification", "input": "cat $(ls /tmp)", "expected": ["cat"] + }, + { + "id": "BP-EP-154", + "source": "verification", + "note": "Multiline control-flow script fixture to verify extraction of set/mkdir/git/sed/printf/cat/wc across for/if blocks.", + "input": "set -euo pipefail\nmkdir -p \"$ANALYSES_DIR\"\nfor f in $FILES; do git -C \"$REPO\" log -1 -- \"$f\" | sed \"s|^|$f |\"; done\nprintf '---ROTATION---\\n'\nif [ -f \"$CACHE_DIR/rotation.json\" ]; then cat \"$CACHE_DIR/rotation.json\"; fi\nfor f in $FILES; do wc -l \"$f\"; done", + "expected": ["set", "mkdir", "git", "sed", "printf", "cat", "wc"] + }, + { + "id": "BP-EP-155", + "source": "verification", + "input": "git log \\\n --oneline \\\n --max-count=1", + "expected": ["git"] } ] }, diff --git a/actions/setup/js/copilot_sdk_driver.test.cjs b/actions/setup/js/copilot_sdk_driver.test.cjs index 35472af02f2..756e6833905 100644 --- a/actions/setup/js/copilot_sdk_driver.test.cjs +++ b/actions/setup/js/copilot_sdk_driver.test.cjs @@ -844,5 +844,67 @@ describe("copilot_sdk_driver.cjs", () => { }); expect(result).toEqual({ kind: "approve-once" }); }); + + it("denies multiline shell command when required tools are missing", async () => { + const handler = await makePermissionHandlerViaSDK(["shell(mkdir)", "shell(git:*)", "shell(printf)", "shell(cat)", "shell(wc)"]); + const result = handler({ + kind: "shell", + commands: [], + fullCommandText: `set -euo pipefail +CACHE_DIR='cache/gh-aw/cache-memory/compiler-quality' +ANALYSES_DIR="$CACHE_DIR/analyses" +mkdir -p "$ANALYSES_DIR" +FILES='compiler.go compiler_activation_jobs.go compiler_orchestrator.go compiler_jobs.go compiler_safe_outputs.go compiler_safe_outputs_config.go compiler_safe_outputs_job.go compiler_yaml.go compiler_yaml_main_job.go' +for f in $FILES; do git -C /home/runner/work/gh-aw/gh-aw log -1 --format='%H' -- "pkg/workflow/$f" | sed "s|^|$f |"; done +printf '---ROTATION---\n' +if [ -f "$CACHE_DIR/rotation.json" ]; then cat "$CACHE_DIR/rotation.json"; fi +printf '\n---HASHES---\n' +if [ -f "$CACHE_DIR/file-hashes.json" ]; then cat "$CACHE_DIR/file-hashes.json"; fi +printf '\n---FILES---\n' +for f in $FILES; do wc -l "/home/runner/work/gh-aw/gh-aw/pkg/workflow/$f"; done`, + }); + expect(result).toEqual({ kind: "reject", feedback: "Tool invocation is not allowed by workflow tool permissions." }); + }); + + it("approves multiline shell command when all required tools are permitted", async () => { + const handler = await makePermissionHandlerViaSDK(["shell(set)", "shell(mkdir)", "shell(git:*)", "shell(sed)", "shell(printf)", "shell(cat)", "shell(wc)"]); + const result = handler({ + kind: "shell", + commands: [], + fullCommandText: `set -euo pipefail +CACHE_DIR='cache/gh-aw/cache-memory/compiler-quality' +ANALYSES_DIR="$CACHE_DIR/analyses" +mkdir -p "$ANALYSES_DIR" +FILES='compiler.go compiler_activation_jobs.go compiler_orchestrator.go compiler_jobs.go compiler_safe_outputs.go compiler_safe_outputs_config.go compiler_safe_outputs_job.go compiler_yaml.go compiler_yaml_main_job.go' +for f in $FILES; do git -C /home/runner/work/gh-aw/gh-aw log -1 --format='%H' -- "pkg/workflow/$f" | sed "s|^|$f |"; done +printf '---ROTATION---\n' +if [ -f "$CACHE_DIR/rotation.json" ]; then cat "$CACHE_DIR/rotation.json"; fi +printf '\n---HASHES---\n' +if [ -f "$CACHE_DIR/file-hashes.json" ]; then cat "$CACHE_DIR/file-hashes.json"; fi +printf '\n---FILES---\n' +for f in $FILES; do wc -l "/home/runner/work/gh-aw/gh-aw/pkg/workflow/$f"; done`, + }); + expect(result).toEqual({ kind: "approve-once" }); + }); + + it("requires explicit read permission for AGENTS.md and SKILL.md reads", async () => { + const denied = await makePermissionHandlerViaSDK(["shell(ls)"]); + expect(denied({ kind: "read", path: "/home/runner/work/gh-aw/gh-aw/AGENTS.md" })).toEqual({ + kind: "reject", + feedback: "Tool invocation is not allowed by workflow tool permissions.", + }); + expect(denied({ kind: "read", path: "/home/runner/work/gh-aw/gh-aw/SKILL.md" })).toEqual({ + kind: "reject", + feedback: "Tool invocation is not allowed by workflow tool permissions.", + }); + + const allowed = await makePermissionHandlerViaSDK(["read"]); + expect(allowed({ kind: "read", path: "/home/runner/work/gh-aw/gh-aw/AGENTS.md" })).toEqual({ + kind: "approve-once", + }); + expect(allowed({ kind: "read", path: "/home/runner/work/gh-aw/gh-aw/SKILL.md" })).toEqual({ + kind: "approve-once", + }); + }); }); }); diff --git a/docs/src/content/docs/specs/bash-command-parser-specification.md b/docs/src/content/docs/specs/bash-command-parser-specification.md index 393238b3381..9a2b2b8fea5 100644 --- a/docs/src/content/docs/specs/bash-command-parser-specification.md +++ b/docs/src/content/docs/specs/bash-command-parser-specification.md @@ -7,7 +7,7 @@ sidebar: # Bash Command Parser Specification -**Version**: 1.0.0 +**Version**: 1.1.0 **Status**: Draft Specification **Latest Version**: [bash-command-parser-specification](/gh-aw/specs/bash-command-parser-specification/) **Editors**: GitHub Agentic Workflows Team @@ -49,7 +49,7 @@ The parser is a lightweight recognizer for shell command identifiers in chained/ This specification defines: -- splitting on `&&`, `||`, `|`, `;` +- splitting on `&&`, `||`, `|`, `;`, and top-level line breaks - quote/subshell shielding during splitting - executable token extraction from a segment - deduplicated name extraction from pipeline text @@ -103,7 +103,8 @@ The grammar below is recognition-oriented and intentionally limited to parser be ```ebnf command_text = { unit } ; unit = single_quoted | double_quoted | subshell | operator | other ; -operator = "&&" | "||" | "|" | ";" ; +operator = "&&" | "||" | "|" | ";" | newline ; +newline = "\n" | "\r\n" | "\r" ; single_quoted = "'" , { ? any char except "'" ? } , [ "'" ] ; double_quoted = '"' , { dq_char | escape } , [ '"' ] ; @@ -124,14 +125,21 @@ The optional closing quote in `single_quoted` and `double_quoted` is intentional ```ebnf segment = ws , { env_assign , ws } , core ; -env_assign = ident , "=" , nonspace* ; +env_assign = ident , "=" , env_value ; +env_value = dq_value | sq_value | nonspace* ; +dq_value = '"' , { dq_char | escape } , [ '"' ] ; +sq_value = "'" , { ? any char except "'" ? } , [ "'" ] ; ident = ("_" | letter) , { "_" | letter | digit } ; core = negation | brace | keyword | redirection | word | empty ; negation = "!" , ws , core ; brace = ("{" | "}") , ws , core ; -keyword = "then" | "else" | "elif" | "fi" | "do" | "done" - | "esac" | "in" | "function" | "time" | "coproc" ; +keyword = clause_keyword | structural_keyword ; +clause_keyword = "then" | "else" | "elif" | "do" ; +structural_keyword + = "if" | "fi" | "for" | "done" | "while" | "until" + | "case" | "esac" | "select" | "in" + | "function" | "time" | "coproc" ; redirection = ("<" | ">") , nonspace* | digits , ("<" | ">" | "&") , nonspace* ; word = nonspace , nonspace* ; @@ -154,7 +162,7 @@ letter = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" ### 4.1 `splitOnPipelineOperators(commandText)` 1. Non-string or empty/falsy input MUST return `[]`. -2. The implementation MUST split at top-level operators `&&`, `||`, `|`, and `;`. +2. The implementation MUST split at top-level operators `&&`, `||`, `|`, `;`, and line breaks (`\n`, `\r\n`, `\r`), except escaped line continuations (`\\` immediately before the line break). 3. Operators inside single quotes, double quotes, or `$(` `)` regions MUST NOT split. 4. Output segments MUST be trimmed. 5. Empty segments MUST be removed. @@ -163,11 +171,12 @@ letter = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" ### 4.2 `extractCommandName(segment)` 1. Non-string or blank segment MUST return `null`. -2. Leading environment assignments (`IDENTIFIER=\S*`) MUST be stripped repeatedly. +2. Leading environment assignments (`IDENTIFIER=`) MUST be stripped repeatedly, where `` MAY be unquoted, single-quoted, or double-quoted (including escaped content in double quotes). 3. If the first token is redirection (`^[<>]` or `^\d+[<>&]`), return `null`. 4. If the first token is `!`, `{`, or `}`, extraction MUST recurse on the remainder. -5. If the first token is a shell keyword (`then`, `else`, `elif`, `fi`, `do`, `done`, `esac`, `in`, `function`, `time`, `coproc`), return `null`. -6. Otherwise return the first token. +5. If the first token is a clause keyword (`then`, `else`, `elif`, `do`), extraction MUST continue scanning on the remainder. +6. If the first token is a structural keyword (`if`, `fi`, `for`, `done`, `while`, `until`, `case`, `esac`, `select`, `in`, `function`, `time`, `coproc`), return `null`. +7. Otherwise return the first token. ### 4.3 `extractCommandNamesFromPipeline(commandText)` @@ -206,7 +215,7 @@ Implementations SHOULD consume machine-readable vectors and run identical assert A minimal suite MUST include all categories below: - **S-CORE (4 tests)**: - 1. top-level split on each operator `&&`, `||`, `|`, `;` + 1. top-level split on each operator `&&`, `||`, `|`, `;`, and newline (excluding escaped line continuations) 2. no split when operators occur in single quotes 3. no split when operators occur in double quotes 4. no split when operators occur inside `$(` `)` @@ -214,7 +223,7 @@ A minimal suite MUST include all categories below: 1. simple word returns command name 2. leading environment assignments are stripped 3. redirection-first segment returns null/none - 4. keyword-first segment returns null/none + 4. clause keywords continue extraction while structural keywords return null/none 5. recursive skip for `!`, `{`, and `}` - **P-CORE (4 tests)**: 1. split + extract composition over multi-operator pipeline