-
Notifications
You must be signed in to change notification settings - Fork 424
Fix Copilot SDK tool-permission parsing for multiline shell scripts and restore least-privilege workflow allowlist #37487
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
b800b5e
8995bae
222c9d9
3346551
43d8c96
4030618
e576479
ccd49e0
693c322
e959397
5f943a0
a275d78
3fc3c35
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -26,7 +26,7 @@ | |
|
|
||
| /** | ||
| * Split a shell command text into individual pipeline segments. | ||
| * Splits on the following shell operators: &&, ||, |, ; | ||
| * Splits on the following shell operators: &&, ||, |, ; and newlines. | ||
| * | ||
| * The split respects: | ||
| * - Single-quoted strings (no escaping inside) | ||
|
|
@@ -143,6 +143,39 @@ function splitOnPipelineOperators(commandText) { | |
| continue; | ||
| } | ||
|
|
||
| // Newline (sequential) — treat line breaks as command separators, | ||
| // except when escaped as a shell line continuation ("\\" + newline). | ||
| // Handles LF, CRLF, and CR forms. | ||
| if (ch === "\n" || ch === "\r") { | ||
| let backslashRunLength = 0; | ||
| for (let j = current.length - 1; j >= 0 && current[j] === "\\"; j--) { | ||
| backslashRunLength++; | ||
| } | ||
|
|
||
| // Odd number of trailing backslashes means the newline is escaped. | ||
| if (backslashRunLength % 2 === 1) { | ||
| current = current.slice(0, -1); | ||
| i++; | ||
| if (ch === "\r" && i < len && commandText[i] === "\n") { | ||
| i++; | ||
| } | ||
| while (i < len && (commandText[i] === " " || commandText[i] === "\t")) i++; | ||
| if (current && !/\s$/.test(current)) { | ||
| current += " "; | ||
| } | ||
| continue; | ||
| } | ||
|
|
||
| segments.push(current); | ||
| current = ""; | ||
| i++; | ||
| if (ch === "\r" && i < len && commandText[i] === "\n") { | ||
| i++; | ||
| } | ||
| while (i < len && /\s/.test(commandText[i])) i++; | ||
| continue; | ||
| } | ||
|
|
||
| current += ch; | ||
| i++; | ||
| } | ||
|
|
@@ -156,13 +189,26 @@ function splitOnPipelineOperators(commandText) { | |
| } | ||
|
|
||
| /** | ||
| * Shell flow-control keywords that can appear as the first word of a segment | ||
| * but do not represent an executable command. They must be excluded so the | ||
| * permission checker does not attempt to look up keywords like "then" or "fi" | ||
| * as command names and incorrectly deny (or allow) a pipeline that contains | ||
| * them as part of a compound statement (e.g. `if …; then cat …; fi`). | ||
| * Clause keywords can prefix an executable command in the same segment | ||
| * (for example: "then cat file", "do git log"). These are skipped and | ||
| * scanning continues to find the command token. | ||
| */ | ||
| const SHELL_KEYWORDS = new Set(["then", "else", "elif", "fi", "do", "done", "esac", "in", "function", "time", "coproc"]); | ||
| const CLAUSE_KEYWORDS = new Set(["then", "else", "elif", "do"]); | ||
|
|
||
| /** | ||
| * Structural shell keywords never represent an executable command token | ||
| * for permission matching in this parser. They introduce/close control | ||
| * structures and are treated as non-command segment starts. | ||
| */ | ||
| const STRUCTURE_KEYWORDS = new Set(["if", "fi", "for", "done", "while", "until", "case", "esac", "select", "in", "function", "time", "coproc"]); | ||
|
|
||
| const SHELL_KEYWORDS = new Set([...CLAUSE_KEYWORDS, ...STRUCTURE_KEYWORDS]); | ||
|
|
||
| // IDENTIFIER=VALUE where VALUE is one of: | ||
| // - "(...)" double-quoted text (supports escapes like \") | ||
| // - '(...)' single-quoted text | ||
| // - an unquoted non-space token | ||
| const ENV_ASSIGNMENT_PREFIX_RE = /^[A-Za-z_][A-Za-z0-9_]*=(?:"(?:\\.|[^"\\])*"|'[^']*'|\S*)\s*/; | ||
|
|
||
| /** | ||
| * Extract the executable command name from a single shell command segment. | ||
|
|
@@ -186,9 +232,8 @@ function extractCommandName(segment) { | |
| if (!remaining) return null; | ||
|
|
||
| // Skip leading env-var assignments: IDENTIFIER=anything (repeat) | ||
| const envAssignRe = /^[A-Za-z_][A-Za-z0-9_]*=\S*\s*/; | ||
| for (;;) { | ||
| const m = remaining.match(envAssignRe); | ||
| const m = remaining.match(ENV_ASSIGNMENT_PREFIX_RE); | ||
| if (!m) break; | ||
| remaining = remaining.slice(m[0].length).trim(); | ||
| } | ||
|
|
@@ -216,6 +261,11 @@ function extractCommandName(segment) { | |
|
|
||
| // Flow-control keywords are not executable commands | ||
| if (SHELL_KEYWORDS.has(word)) { | ||
| if (CLAUSE_KEYWORDS.has(word)) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [/diagnose] 💡 ImpactFor a segment like
But
This means workflows using Consider whether |
||
| remaining = remaining.slice(word.length).trim(); | ||
| if (!remaining) return null; | ||
| continue; | ||
| } | ||
| return null; | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -105,6 +105,16 @@ describe("splitOnPipelineOperators", () => { | |
| expect(segments).toEqual(["pwd", "ls -la", "safeoutputs --help"]); | ||
| }); | ||
|
|
||
| it("splits on newlines as sequential separators", () => { | ||
| const segments = splitOnPipelineOperators("pwd\nls -la\nsafeoutputs --help"); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [/tdd] Only 💡 Suggested additionsit("splits on CRLF as sequential separator", () => {
const segments = splitOnPipelineOperators("pwd\r\nls -la\r\nsafeoutputs --help");
expect(segments).toEqual(["pwd", "ls -la", "safeoutputs --help"]);
});
it("splits on bare CR as sequential separator", () => {
const segments = splitOnPipelineOperators("pwd\rls -la\rsafeoutputs --help");
expect(segments).toEqual(["pwd", "ls -la", "safeoutputs --help"]);
});Corresponding conformance vectors should also be added to |
||
| expect(segments).toEqual(["pwd", "ls -la", "safeoutputs --help"]); | ||
| }); | ||
|
|
||
|
Comment on lines
+108
to
+112
|
||
| it("does not split on escaped newline continuations", () => { | ||
| const segments = splitOnPipelineOperators("git log \\\n --oneline \\\n --max-count=1"); | ||
| expect(segments).toEqual(["git log --oneline --max-count=1"]); | ||
| }); | ||
|
|
||
| it("trims leading/trailing whitespace from each segment", () => { | ||
| const segments = splitOnPipelineOperators(" ls /tmp && cat file "); | ||
| expect(segments[0]).toBe("ls /tmp"); | ||
|
|
@@ -137,6 +147,18 @@ describe("extractCommandName", () => { | |
| expect(extractCommandName("FOO=bar BAZ=qux echo hi")).toBe("echo"); | ||
| }); | ||
|
|
||
| it("skips leading env-var assignment with quoted spaces", () => { | ||
| expect(extractCommandName("FILES='a b c' echo hi")).toBe("echo"); | ||
| }); | ||
|
|
||
| it("skips leading env-var assignment with double-quoted spaces", () => { | ||
| expect(extractCommandName('FILES="a b c" echo hi')).toBe("echo"); | ||
| }); | ||
|
|
||
| it("skips leading env-var assignment with escaped quote in double-quoted value", () => { | ||
| expect(extractCommandName('FILES="a \\"b\\" c" echo hi')).toBe("echo"); | ||
| }); | ||
|
|
||
| it("handles negation operator ! and returns next command", () => { | ||
| expect(extractCommandName("! ls /tmp")).toBe("ls"); | ||
| }); | ||
|
|
@@ -153,10 +175,30 @@ describe("extractCommandName", () => { | |
| expect(extractCommandName("else")).toBeNull(); | ||
| }); | ||
|
|
||
| it("extracts command after shell keyword 'else'", () => { | ||
| expect(extractCommandName("else cat file")).toBe("cat"); | ||
| }); | ||
|
|
||
| it("returns null for shell keyword 'fi'", () => { | ||
| expect(extractCommandName("fi")).toBeNull(); | ||
| }); | ||
|
|
||
| it("extracts command after shell keyword 'elif'", () => { | ||
| expect(extractCommandName("elif grep x file")).toBe("grep"); | ||
| }); | ||
|
|
||
| it("returns null for shell keyword 'if'", () => { | ||
| expect(extractCommandName("if [ -f file ]")).toBeNull(); | ||
| }); | ||
|
|
||
| it("returns null for shell keyword 'for'", () => { | ||
| expect(extractCommandName("for f in a b c")).toBeNull(); | ||
| }); | ||
|
|
||
| it("extracts command after shell keyword 'do'", () => { | ||
| expect(extractCommandName("do git status")).toBe("git"); | ||
| }); | ||
|
|
||
| it("returns null for a bare redirection like >file", () => { | ||
| expect(extractCommandName(">file.txt")).toBeNull(); | ||
| }); | ||
|
|
@@ -280,6 +322,26 @@ describe("extractCommandNamesFromPipeline", () => { | |
| it("handles date with flags", () => { | ||
| expect(extractCommandNamesFromPipeline("date +%Y-%m-%d && echo done")).toEqual(["date", "echo"]); | ||
| }); | ||
|
|
||
| it("extracts all command names from multiline script with variables and control flow", () => { | ||
| const cmd = `set -euo pipefail | ||
| CACHE_DIR='cache/gh-aw/cache-memory/compiler-quality' | ||
| ANALYSES_DIR="$CACHE_DIR/analyses" | ||
| mkdir -p "$ANALYSES_DIR" | ||
| FILES='compiler.go compiler_activation_jobs.go compiler_orchestrator.go compiler_jobs.go compiler_safe_outputs.go compiler_safe_outputs_config.go compiler_safe_outputs_job.go compiler_yaml.go compiler_yaml_main_job.go' | ||
| for f in $FILES; do git -C /home/runner/work/gh-aw/gh-aw log -1 --format='%H' -- "pkg/workflow/$f" | sed "s|^|$f |"; done | ||
| printf '---ROTATION---\n' | ||
| if [ -f "$CACHE_DIR/rotation.json" ]; then cat "$CACHE_DIR/rotation.json"; fi | ||
| printf '\n---HASHES---\n' | ||
| if [ -f "$CACHE_DIR/file-hashes.json" ]; then cat "$CACHE_DIR/file-hashes.json"; fi | ||
| printf '\n---FILES---\n' | ||
| for f in $FILES; do wc -l "/home/runner/work/gh-aw/gh-aw/pkg/workflow/$f"; done`; | ||
| expect(extractCommandNamesFromPipeline(cmd)).toEqual(["set", "mkdir", "git", "sed", "printf", "cat", "wc"]); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [/tdd] The test asserts that Details💡 SuggestionAdd a comment in the test (or a dedicated // 'if' is a structural keyword — the entire condition segment returns null,
// so '[' is intentionally NOT extracted as a required command.
// Scripts with if-conditions do NOT need shell([) permission.
it("does not extract [ from if-condition segments", () => {
expect(extractCommandName("if [ -f file ]")).toBeNull();
});This documents the design choice and acts as a guard against accidentally promoting |
||
| }); | ||
|
|
||
| it("keeps continued multiline command as one extracted command", () => { | ||
| expect(extractCommandNamesFromPipeline("git log \\\n --oneline \\\n --max-count=1")).toEqual(["git"]); | ||
| }); | ||
| }); | ||
|
|
||
| // ───────────────────────────────────────────────────────────────────────────── | ||
|
|
@@ -351,7 +413,7 @@ describe("extractCommandName – extensive vectors", () => { | |
| { id: "BP-EC-004", segment: "2>&1", expected: null }, | ||
| { id: "BP-EC-005", segment: ">out.txt", expected: null }, | ||
| { id: "BP-EC-006", segment: "A=1 B=2 safeoutputs missing_data", expected: "safeoutputs" }, | ||
| { id: "BP-EC-007", segment: "then cat file", expected: null }, | ||
| { id: "BP-EC-007", segment: "then cat file", expected: "cat" }, | ||
| { id: "BP-EC-008", segment: "fi", expected: null }, | ||
| { id: "BP-EC-009", segment: "do", expected: null }, | ||
| { id: "BP-EC-010", segment: "done", expected: null }, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,5 +1,5 @@ | ||
| { | ||
| "version": "1.0.0", | ||
| "version": "1.1.0", | ||
| "metadata": { | ||
| "spec": "docs/src/content/docs/specs/bash-command-parser-specification.md", | ||
| "description": "Language-agnostic conformance vectors for the bash command parser", | ||
|
|
@@ -54,6 +54,18 @@ | |
| "source": "verification", | ||
| "input": " ! ls /tmp && echo done ", | ||
| "expected": ["! ls /tmp", "echo done"] | ||
| }, | ||
| { | ||
| "id": "BP-SP-153", | ||
| "source": "verification", | ||
| "input": "pwd\nls -la\nsafeoutputs --help", | ||
| "expected": ["pwd", "ls -la", "safeoutputs --help"] | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [/tdd] The spec and implementation support 💡 Suggested vectors{
"id": "BP-SP-154",
"source": "verification",
"input": "pwd\r\nls -la\r\nsafeoutputs --help",
"expected": ["pwd", "ls -la", "safeoutputs --help"]
},
{
"id": "BP-SP-155",
"source": "verification",
"input": "pwd\rls -la\rsafeoutputs --help",
"expected": ["pwd", "ls -la", "safeoutputs --help"]
} |
||
| }, | ||
| { | ||
| "id": "BP-SP-154", | ||
| "source": "verification", | ||
| "input": "git log \\\n --oneline \\\n --max-count=1", | ||
| "expected": ["git log --oneline --max-count=1"] | ||
| } | ||
| ], | ||
| "extractCommandName": [ | ||
|
|
@@ -85,7 +97,7 @@ | |
| "id": "BP-EC-105", | ||
| "source": "model-based", | ||
| "input": "then cat file", | ||
| "expected": null | ||
| "expected": "cat" | ||
| }, | ||
| { | ||
| "id": "BP-EC-151", | ||
|
|
@@ -104,6 +116,18 @@ | |
| "source": "verification", | ||
| "input": "coproc", | ||
| "expected": null | ||
| }, | ||
| { | ||
| "id": "BP-EC-154", | ||
| "source": "verification", | ||
| "input": "FILES='a b c' echo hi", | ||
| "expected": "echo" | ||
| }, | ||
| { | ||
| "id": "BP-EC-155", | ||
| "source": "verification", | ||
| "input": "FILES=\"a \\\"b\\\" c\" echo hi", | ||
| "expected": "echo" | ||
| } | ||
| ], | ||
| "extractCommandNamesFromPipeline": [ | ||
|
|
@@ -148,6 +172,19 @@ | |
| "source": "verification", | ||
| "input": "cat $(ls /tmp)", | ||
| "expected": ["cat"] | ||
| }, | ||
| { | ||
| "id": "BP-EP-154", | ||
| "source": "verification", | ||
| "note": "Multiline control-flow script fixture to verify extraction of set/mkdir/git/sed/printf/cat/wc across for/if blocks.", | ||
| "input": "set -euo pipefail\nmkdir -p \"$ANALYSES_DIR\"\nfor f in $FILES; do git -C \"$REPO\" log -1 -- \"$f\" | sed \"s|^|$f |\"; done\nprintf '---ROTATION---\\n'\nif [ -f \"$CACHE_DIR/rotation.json\" ]; then cat \"$CACHE_DIR/rotation.json\"; fi\nfor f in $FILES; do wc -l \"$f\"; done", | ||
| "expected": ["set", "mkdir", "git", "sed", "printf", "cat", "wc"] | ||
| }, | ||
| { | ||
| "id": "BP-EP-155", | ||
| "source": "verification", | ||
| "input": "git log \\\n --oneline \\\n --max-count=1", | ||
| "expected": ["git"] | ||
| } | ||
| ] | ||
| }, | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[/diagnose] Shell comment lines (
# ...) are now split into their own segments by the newline handler, andextractCommandNamewill return"#"for them — causing any script with comment lines to be rejected becauseshell(#)is never in any allowlist.💡 Root cause & fix
word = "#"passes every existing guard (not a redirection, not!/{/}, not inSHELL_KEYWORDS) so the function returns it as a command name.Add an early exit in
extractCommandName(before the keyword check):Alternatively strip trailing comments in
splitOnPipelineOperatorsbefore pushing a segment. TheextractCommandNameapproach is narrower and easier to test.A regression test should be added for the pipeline
"git status\n# a comment\necho done"expecting["git", "echo"].