github · pelikhan · Jun 7, 2026 · Jun 7, 2026 · Jun 7, 2026 · Jun 7, 2026
diff --git a/.github/workflows/daily-compiler-quality.lock.yml b/.github/workflows/daily-compiler-quality.lock.yml
diff --git a/.github/workflows/daily-compiler-quality.md b/.github/workflows/daily-compiler-quality.md
@@ -62,7 +62,28 @@ name: Daily Compiler Quality Check
 strict: true
 timeout-minutes: 30
 tools:
-  bash: true
+  bash:
+  - set
+  - find
+  - wc
+  - git
+  - mkdir
+  - cat
+  - jq
+  - mv
+  - echo
+  - bc
+  - sed
+  - printf
+  - date
+  - grep
+  - head
+  - ls
+  - pwd
+  - sort
+  - tail
+  - uniq
+  - yq
   cache-memory: true
   cli-proxy: true
   github:

diff --git a/actions/setup/js/bash_command_parser.cjs b/actions/setup/js/bash_command_parser.cjs
@@ -26,7 +26,7 @@
 
 /**
  * Split a shell command text into individual pipeline segments.
- * Splits on the following shell operators: &&, ||, |, ;
+ * Splits on the following shell operators: &&, ||, |, ; and newlines.
  *
  * The split respects:
  *   - Single-quoted strings (no escaping inside)
@@ -143,6 +143,39 @@ function splitOnPipelineOperators(commandText) {
       continue;
     }
 
+    // Newline (sequential) — treat line breaks as command separators,
+    // except when escaped as a shell line continuation ("\\" + newline).
+    // Handles LF, CRLF, and CR forms.
+    if (ch === "\n" || ch === "\r") {
+      let backslashRunLength = 0;
+      for (let j = current.length - 1; j >= 0 && current[j] === "\\"; j--) {
+        backslashRunLength++;
+      }
+
+      // Odd number of trailing backslashes means the newline is escaped.
+      if (backslashRunLength % 2 === 1) {
+        current = current.slice(0, -1);
+        i++;
+        if (ch === "\r" && i < len && commandText[i] === "\n") {
+          i++;
+        }
+        while (i < len && (commandText[i] === " " || commandText[i] === "\t")) i++;
+        if (current && !/\s$/.test(current)) {
+          current += " ";
+        }
+        continue;
+      }
+
+      segments.push(current);
+      current = "";
+      i++;
+      if (ch === "\r" && i < len && commandText[i] === "\n") {
+        i++;
+      }
+      while (i < len && /\s/.test(commandText[i])) i++;
+      continue;
+    }
+
     current += ch;
     i++;
   }
@@ -156,13 +189,26 @@ function splitOnPipelineOperators(commandText) {
 }
 
 /**
- * Shell flow-control keywords that can appear as the first word of a segment
- * but do not represent an executable command.  They must be excluded so the
- * permission checker does not attempt to look up keywords like "then" or "fi"
- * as command names and incorrectly deny (or allow) a pipeline that contains
- * them as part of a compound statement (e.g. `if …; then cat …; fi`).
+ * Clause keywords can prefix an executable command in the same segment
+ * (for example: "then cat file", "do git log"). These are skipped and
+ * scanning continues to find the command token.
  */
-const SHELL_KEYWORDS = new Set(["then", "else", "elif", "fi", "do", "done", "esac", "in", "function", "time", "coproc"]);
+const CLAUSE_KEYWORDS = new Set(["then", "else", "elif", "do"]);
+
+/**
+ * Structural shell keywords never represent an executable command token
+ * for permission matching in this parser. They introduce/close control
+ * structures and are treated as non-command segment starts.
+ */
+const STRUCTURE_KEYWORDS = new Set(["if", "fi", "for", "done", "while", "until", "case", "esac", "select", "in", "function", "time", "coproc"]);
+
+const SHELL_KEYWORDS = new Set([...CLAUSE_KEYWORDS, ...STRUCTURE_KEYWORDS]);
+
+// IDENTIFIER=VALUE where VALUE is one of:
+// - "(...)" double-quoted text (supports escapes like \")
+// - '(...)' single-quoted text
+// - an unquoted non-space token
+const ENV_ASSIGNMENT_PREFIX_RE = /^[A-Za-z_][A-Za-z0-9_]*=(?:"(?:\\.|[^"\\])*"|'[^']*'|\S*)\s*/;
 
 /**
  * Extract the executable command name from a single shell command segment.
@@ -186,9 +232,8 @@ function extractCommandName(segment) {
   if (!remaining) return null;
 
   // Skip leading env-var assignments: IDENTIFIER=anything  (repeat)
-  const envAssignRe = /^[A-Za-z_][A-Za-z0-9_]*=\S*\s*/;
   for (;;) {
-    const m = remaining.match(envAssignRe);
+    const m = remaining.match(ENV_ASSIGNMENT_PREFIX_RE);
     if (!m) break;
     remaining = remaining.slice(m[0].length).trim();
   }
@@ -216,6 +261,11 @@ function extractCommandName(segment) {
 
     // Flow-control keywords are not executable commands
     if (SHELL_KEYWORDS.has(word)) {
+      if (CLAUSE_KEYWORDS.has(word)) {
+        remaining = remaining.slice(word.length).trim();
+        if (!remaining) return null;
+        continue;
+      }
       return null;
     }
 

diff --git a/actions/setup/js/bash_command_parser.test.cjs b/actions/setup/js/bash_command_parser.test.cjs
@@ -105,6 +105,16 @@ describe("splitOnPipelineOperators", () => {
     expect(segments).toEqual(["pwd", "ls -la", "safeoutputs --help"]);
   });
 
+  it("splits on newlines as sequential separators", () => {
+    const segments = splitOnPipelineOperators("pwd\nls -la\nsafeoutputs --help");
+    expect(segments).toEqual(["pwd", "ls -la", "safeoutputs --help"]);
+  });
+
+  it("does not split on escaped newline continuations", () => {
+    const segments = splitOnPipelineOperators("git log \\\n  --oneline \\\n  --max-count=1");
+    expect(segments).toEqual(["git log --oneline --max-count=1"]);
+  });
+
   it("trims leading/trailing whitespace from each segment", () => {
     const segments = splitOnPipelineOperators("  ls /tmp  &&  cat file  ");
     expect(segments[0]).toBe("ls /tmp");
@@ -137,6 +147,18 @@ describe("extractCommandName", () => {
     expect(extractCommandName("FOO=bar BAZ=qux echo hi")).toBe("echo");
   });
 
+  it("skips leading env-var assignment with quoted spaces", () => {
+    expect(extractCommandName("FILES='a b c' echo hi")).toBe("echo");
+  });
+
+  it("skips leading env-var assignment with double-quoted spaces", () => {
+    expect(extractCommandName('FILES="a b c" echo hi')).toBe("echo");
+  });
+
+  it("skips leading env-var assignment with escaped quote in double-quoted value", () => {
+    expect(extractCommandName('FILES="a \\"b\\" c" echo hi')).toBe("echo");
+  });
+
   it("handles negation operator ! and returns next command", () => {
     expect(extractCommandName("! ls /tmp")).toBe("ls");
   });
@@ -153,10 +175,30 @@ describe("extractCommandName", () => {
     expect(extractCommandName("else")).toBeNull();
   });
 
+  it("extracts command after shell keyword 'else'", () => {
+    expect(extractCommandName("else cat file")).toBe("cat");
+  });
+
   it("returns null for shell keyword 'fi'", () => {
     expect(extractCommandName("fi")).toBeNull();
   });
 
+  it("extracts command after shell keyword 'elif'", () => {
+    expect(extractCommandName("elif grep x file")).toBe("grep");
+  });
+
+  it("returns null for shell keyword 'if'", () => {
+    expect(extractCommandName("if [ -f file ]")).toBeNull();
+  });
+
+  it("returns null for shell keyword 'for'", () => {
+    expect(extractCommandName("for f in a b c")).toBeNull();
+  });
+
+  it("extracts command after shell keyword 'do'", () => {
+    expect(extractCommandName("do git status")).toBe("git");
+  });
+
   it("returns null for a bare redirection like >file", () => {
     expect(extractCommandName(">file.txt")).toBeNull();
   });
@@ -280,6 +322,26 @@ describe("extractCommandNamesFromPipeline", () => {
   it("handles date with flags", () => {
     expect(extractCommandNamesFromPipeline("date +%Y-%m-%d && echo done")).toEqual(["date", "echo"]);
   });
+
+  it("extracts all command names from multiline script with variables and control flow", () => {
+    const cmd = `set -euo pipefail
+CACHE_DIR='cache/gh-aw/cache-memory/compiler-quality'
+ANALYSES_DIR="$CACHE_DIR/analyses"
+mkdir -p "$ANALYSES_DIR"
+FILES='compiler.go compiler_activation_jobs.go compiler_orchestrator.go compiler_jobs.go compiler_safe_outputs.go compiler_safe_outputs_config.go compiler_safe_outputs_job.go compiler_yaml.go compiler_yaml_main_job.go'
+for f in $FILES; do git -C /home/runner/work/gh-aw/gh-aw log -1 --format='%H' -- "pkg/workflow/$f" | sed "s|^|$f |"; done
+printf '---ROTATION---\n'
+if [ -f "$CACHE_DIR/rotation.json" ]; then cat "$CACHE_DIR/rotation.json"; fi
+printf '\n---HASHES---\n'
+if [ -f "$CACHE_DIR/file-hashes.json" ]; then cat "$CACHE_DIR/file-hashes.json"; fi
+printf '\n---FILES---\n'
+for f in $FILES; do wc -l "/home/runner/work/gh-aw/gh-aw/pkg/workflow/$f"; done`;
+    expect(extractCommandNamesFromPipeline(cmd)).toEqual(["set", "mkdir", "git", "sed", "printf", "cat", "wc"]);
+  });
+
+  it("keeps continued multiline command as one extracted command", () => {
+    expect(extractCommandNamesFromPipeline("git log \\\n  --oneline \\\n  --max-count=1")).toEqual(["git"]);
+  });
 });
 
 // ─────────────────────────────────────────────────────────────────────────────
@@ -351,7 +413,7 @@ describe("extractCommandName – extensive vectors", () => {
     { id: "BP-EC-004", segment: "2>&1", expected: null },
     { id: "BP-EC-005", segment: ">out.txt", expected: null },
     { id: "BP-EC-006", segment: "A=1 B=2 safeoutputs missing_data", expected: "safeoutputs" },
-    { id: "BP-EC-007", segment: "then cat file", expected: null },
+    { id: "BP-EC-007", segment: "then cat file", expected: "cat" },
     { id: "BP-EC-008", segment: "fi", expected: null },
     { id: "BP-EC-009", segment: "do", expected: null },
     { id: "BP-EC-010", segment: "done", expected: null },

diff --git a/actions/setup/js/bash_command_parser_spec_vectors.json b/actions/setup/js/bash_command_parser_spec_vectors.json
@@ -1,5 +1,5 @@
 {
-  "version": "1.0.0",
+  "version": "1.1.0",
   "metadata": {
     "spec": "docs/src/content/docs/specs/bash-command-parser-specification.md",
     "description": "Language-agnostic conformance vectors for the bash command parser",
@@ -54,6 +54,18 @@
         "source": "verification",
         "input": " ! ls /tmp &&  echo done ",
         "expected": ["! ls /tmp", "echo done"]
+      },
+      {
+        "id": "BP-SP-153",
+        "source": "verification",
+        "input": "pwd\nls -la\nsafeoutputs --help",
+        "expected": ["pwd", "ls -la", "safeoutputs --help"]
+      },
+      {
+        "id": "BP-SP-154",
+        "source": "verification",
+        "input": "git log \\\n  --oneline \\\n  --max-count=1",
+        "expected": ["git log --oneline --max-count=1"]
       }
     ],
     "extractCommandName": [
@@ -85,7 +97,7 @@
         "id": "BP-EC-105",
         "source": "model-based",
         "input": "then cat file",
-        "expected": null
+        "expected": "cat"
       },
       {
         "id": "BP-EC-151",
@@ -104,6 +116,18 @@
         "source": "verification",
         "input": "coproc",
         "expected": null
+      },
+      {
+        "id": "BP-EC-154",
+        "source": "verification",
+        "input": "FILES='a b c' echo hi",
+        "expected": "echo"
+      },
+      {
+        "id": "BP-EC-155",
+        "source": "verification",
+        "input": "FILES=\"a \\\"b\\\" c\" echo hi",
+        "expected": "echo"
       }
     ],
     "extractCommandNamesFromPipeline": [
@@ -148,6 +172,19 @@
         "source": "verification",
         "input": "cat $(ls /tmp)",
         "expected": ["cat"]
+      },
+      {
+        "id": "BP-EP-154",
+        "source": "verification",
+        "note": "Multiline control-flow script fixture to verify extraction of set/mkdir/git/sed/printf/cat/wc across for/if blocks.",
+        "input": "set -euo pipefail\nmkdir -p \"$ANALYSES_DIR\"\nfor f in $FILES; do git -C \"$REPO\" log -1 -- \"$f\" | sed \"s|^|$f |\"; done\nprintf '---ROTATION---\\n'\nif [ -f \"$CACHE_DIR/rotation.json\" ]; then cat \"$CACHE_DIR/rotation.json\"; fi\nfor f in $FILES; do wc -l \"$f\"; done",
+        "expected": ["set", "mkdir", "git", "sed", "printf", "cat", "wc"]
+      },
+      {
+        "id": "BP-EP-155",
+        "source": "verification",
+        "input": "git log \\\n  --oneline \\\n  --max-count=1",
+        "expected": ["git"]
       }
     ]
   },