diff --git a/actions/setup/js/claude_harness.cjs b/actions/setup/js/claude_harness.cjs index b967ded81a1..1cc6752790c 100644 --- a/actions/setup/js/claude_harness.cjs +++ b/actions/setup/js/claude_harness.cjs @@ -48,6 +48,7 @@ const { const { emitMissingToolPermissionIssue, hasNoopInSafeOutputs } = require("./safeoutputs_cli.cjs"); const { countPermissionDeniedIssues, hasNumerousPermissionDeniedIssues, extractDeniedCommands, buildMissingToolPermissionIssuePayload } = require("./permission_denied_helpers.cjs"); const { detectNonRetryableHarnessGuard } = require("./harness_retry_guard.cjs"); +const { MODEL_NOT_SUPPORTED_PATTERN: INVALID_MODEL_ERROR_PATTERN } = require("./detect_agent_errors.cjs"); // Maximum number of retry attempts after the initial run const MAX_RETRIES = 3; @@ -148,6 +149,15 @@ function isNoDeferredMarkerError(output) { return NO_DEFERRED_MARKER_PATTERN.test(output); } +/** + * Determines if the collected output indicates an invalid or unavailable model name. + * @param {string} output - Collected stdout+stderr from the process + * @returns {boolean} + */ +function isInvalidModelError(output) { + return INVALID_MODEL_ERROR_PATTERN.test(output); +} + /** * Determines whether the exit code corresponds to signal-style termination * (SIGKILL=137 / SIGTERM=143), typically from timeout/cancellation. @@ -372,6 +382,7 @@ async function main() { const isAuthenticationFailed = isAuthenticationFailedError(result.output); const isMaxTurns = isMaxTurnsExit(result.output); const isNoDeferredMarker = isNoDeferredMarkerError(result.output); + const isInvalidModel = isInvalidModelError(result.output); const permissionDeniedCount = countPermissionDeniedIssues(result.output); const hasNumerousPermissionDenied = hasNumerousPermissionDeniedIssues(result.output); log( @@ -382,6 +393,7 @@ async function main() { ` isAuthenticationFailedError=${isAuthenticationFailed}` + ` isMaxTurnsExit=${isMaxTurns}` + ` isNoDeferredMarkerError=${isNoDeferredMarker}` + + ` isInvalidModelError=${isInvalidModel}` + ` permissionDeniedCount=${permissionDeniedCount}` + ` hasNumerousPermissionDenied=${hasNumerousPermissionDenied}` + ` hasOutput=${result.hasOutput}` + @@ -411,6 +423,11 @@ async function main() { break; } + if (isInvalidModel) { + log(`attempt ${attempt + 1}: invalid/unsupported model configuration — not retrying (specify a valid engine model name in workflow frontmatter)`); + break; + } + if (hasNumerousPermissionDenied) { const deniedCommands = extractDeniedCommands(result.output); emitMissingToolPermissionIssue({ deniedCommands, logger: log }); @@ -494,6 +511,7 @@ if (typeof module !== "undefined" && module.exports) { isAuthenticationFailedError, isMaxTurnsExit, isNoDeferredMarkerError, + isInvalidModelError, isSignalTerminationExitCode, shouldRetryWithContinue, countPermissionDeniedIssues, diff --git a/actions/setup/js/claude_harness.test.cjs b/actions/setup/js/claude_harness.test.cjs index 3f03a70167c..4079843aab1 100644 --- a/actions/setup/js/claude_harness.test.cjs +++ b/actions/setup/js/claude_harness.test.cjs @@ -13,6 +13,7 @@ const { isAuthenticationFailedError, isMaxTurnsExit, isNoDeferredMarkerError, + isInvalidModelError, isSignalTerminationExitCode, shouldRetryWithContinue, countPermissionDeniedIssues, @@ -187,6 +188,27 @@ describe("claude_harness.cjs", () => { expect(isAuthenticationFailedError("Authentication failed (Request ID: C818:3ED713:19D401B:1C446B7:69D653CA)")).toBe(true); }); + describe("isInvalidModelError", () => { + it("returns true for model-not-supported errors", () => { + expect(isInvalidModelError("Execution failed: CAPIError: 400 The requested model is not supported.")).toBe(true); + }); + + it("returns true for invalid model name errors", () => { + expect(isInvalidModelError("invalid model name 'claude-sonnet-999'")).toBe(true); + expect(isInvalidModelError("model 'claude-ultra' does not exist")).toBe(true); + expect(isInvalidModelError("model claude-fake is not supported")).toBe(true); + expect(isInvalidModelError("model gemini-v99 is unavailable")).toBe(true); + expect(isInvalidModelError("model 'claude-3-5-sonnet@20241022' not found")).toBe(true); + }); + + it("returns false for unrelated errors", () => { + expect(isInvalidModelError("rate_limit_error")).toBe(false); + expect(isInvalidModelError("Error: invalid model response format")).toBe(false); + expect(isInvalidModelError('{"type":"result","subtype":"error_max_turns","is_error":true}')).toBe(false); + expect(isInvalidModelError("")).toBe(false); + }); + }); + it("returns false for unrelated output", () => { expect(isAuthenticationFailedError("No authentication information found")).toBe(false); expect(isAuthenticationFailedError("rate_limit_error")).toBe(false); diff --git a/actions/setup/js/codex_harness.cjs b/actions/setup/js/codex_harness.cjs index b04855d865d..a88d4026525 100644 --- a/actions/setup/js/codex_harness.cjs +++ b/actions/setup/js/codex_harness.cjs @@ -47,6 +47,7 @@ const { const { emitMissingToolPermissionIssue, hasNoopInSafeOutputs } = require("./safeoutputs_cli.cjs"); const { countPermissionDeniedIssues, hasNumerousPermissionDeniedIssues, extractDeniedCommands, buildMissingToolPermissionIssuePayload } = require("./permission_denied_helpers.cjs"); const { detectNonRetryableHarnessGuard } = require("./harness_retry_guard.cjs"); +const { MODEL_NOT_SUPPORTED_PATTERN: INVALID_MODEL_ERROR_PATTERN } = require("./detect_agent_errors.cjs"); // Maximum number of retry attempts after the initial run const MAX_RETRIES = 3; @@ -120,6 +121,15 @@ function isServerError(output) { return SERVER_ERROR_PATTERN.test(output); } +/** + * Determines if the collected output indicates an invalid or unavailable model name. + * @param {string} output - Collected stdout+stderr from the process + * @returns {boolean} + */ +function isInvalidModelError(output) { + return INVALID_MODEL_ERROR_PATTERN.test(output); +} + /** * Resolve --prompt-file arguments for the Codex run. * Strips the --prompt-file pair from args and appends the file content @@ -402,6 +412,7 @@ async function main() { const isAuthenticationFailed = isAuthenticationFailedError(result.output); const isMissingApiKey = isMissingApiKeyError(result.output); const isServer = isServerError(result.output); + const isInvalidModel = isInvalidModelError(result.output); const permissionDeniedCount = countPermissionDeniedIssues(result.output); const hasNumerousPermissionDenied = hasNumerousPermissionDeniedIssues(result.output); log( @@ -411,6 +422,7 @@ async function main() { ` isAuthenticationFailedError=${isAuthenticationFailed}` + ` isMissingApiKeyError=${isMissingApiKey}` + ` isServerError=${isServer}` + + ` isInvalidModelError=${isInvalidModel}` + ` permissionDeniedCount=${permissionDeniedCount}` + ` hasNumerousPermissionDenied=${hasNumerousPermissionDenied}` + ` hasOutput=${result.hasOutput}` + @@ -445,6 +457,11 @@ async function main() { break; } + if (isInvalidModel) { + log(`attempt ${attempt + 1}: invalid/unsupported model configuration — not retrying (specify a valid engine model name in workflow frontmatter)`); + break; + } + if (hasNumerousPermissionDenied) { const deniedCommands = extractDeniedCommands(result.output); emitMissingToolPermissionIssue({ deniedCommands, logger: log }); @@ -485,6 +502,7 @@ if (typeof module !== "undefined" && module.exports) { isAuthenticationFailedError, isMissingApiKeyError, isServerError, + isInvalidModelError, countPermissionDeniedIssues, hasNumerousPermissionDeniedIssues, extractDeniedCommands, diff --git a/actions/setup/js/codex_harness.test.cjs b/actions/setup/js/codex_harness.test.cjs index ccd7793fd0e..1b01eb0e7b4 100644 --- a/actions/setup/js/codex_harness.test.cjs +++ b/actions/setup/js/codex_harness.test.cjs @@ -13,6 +13,7 @@ const { isAuthenticationFailedError, isMissingApiKeyError, isServerError, + isInvalidModelError, countPermissionDeniedIssues, hasNumerousPermissionDeniedIssues, extractDeniedCommands, @@ -272,6 +273,26 @@ env_key = "OPENAI_API_KEY" expect(isServerError("InternalServerError: The server had an error processing your request")).toBe(true); }); + describe("isInvalidModelError", () => { + it("returns true for model-not-supported errors", () => { + expect(isInvalidModelError("Execution failed: CAPIError: 400 The requested model is not supported.")).toBe(true); + }); + + it("returns true for invalid model name errors", () => { + expect(isInvalidModelError("invalid model name 'claude-sonnet-999'")).toBe(true); + expect(isInvalidModelError("model 'gpt-foo' not found")).toBe(true); + expect(isInvalidModelError("model gpt-unknown is not available")).toBe(true); + expect(isInvalidModelError("model 'claude-3-5-sonnet@20241022' not found")).toBe(true); + }); + + it("returns false for unrelated errors", () => { + expect(isInvalidModelError("rate_limit_exceeded")).toBe(false); + expect(isInvalidModelError("unknown model behavior detected")).toBe(false); + expect(isInvalidModelError("ServiceUnavailableError")).toBe(false); + expect(isInvalidModelError("")).toBe(false); + }); + }); + it("returns true for ServiceUnavailableError", () => { expect(isServerError("ServiceUnavailableError: The server is temporarily unable to service your request")).toBe(true); }); diff --git a/actions/setup/js/detect_agent_errors.cjs b/actions/setup/js/detect_agent_errors.cjs index 074a5817626..d6162282ef2 100644 --- a/actions/setup/js/detect_agent_errors.cjs +++ b/actions/setup/js/detect_agent_errors.cjs @@ -1,7 +1,7 @@ // @ts-check /** - * Detect Copilot CLI errors in the agent stdio log. + * Detect agent engine errors in the agent stdio log. * * Scans the agent stdio log for known error patterns and sets GitHub Actions * output variables for each detected error class: @@ -13,8 +13,9 @@ * - agentic_engine_timeout: The agentic engine process was killed by a * signal (SIGTERM/SIGKILL/SIGINT), typically due to the step * timeout-minutes limit being reached. - * - model_not_supported_error: The requested model is not supported for - * the user's Copilot subscription tier (e.g., Copilot Pro/Education). + * - model_not_supported_error: The configured model is invalid or unsupported + * for the selected engine/account (for example unknown model name, model not + * found, or model unavailable for the plan). * * This replaces the individual bash scripts (detect_inference_access_error.sh, * detect_mcp_policy_error.sh) with a single JavaScript step. @@ -44,11 +45,15 @@ const MCP_POLICY_BLOCKED_PATTERN = /MCP servers were blocked by policy:/; // making it engine-agnostic. const AGENTIC_ENGINE_TIMEOUT_PATTERN = /signal=SIG(?:TERM|KILL|INT)/; -// Pattern: Requested model is not supported for the user's subscription tier. -// This occurs when Copilot Pro/Education users attempt to use a model that is -// not available for their plan. The full error from the Copilot CLI is: -// Execution failed: CAPIError: 400 The requested model is not supported. -const MODEL_NOT_SUPPORTED_PATTERN = /The requested model is not supported/; +// Pattern: Configured model is invalid or unavailable. +// Covers common engine/provider variants: +// - "The requested model is not supported" +// - "invalid model name '...'" +// - "unknown model " +// - "model ... not found" +// - "model ... does not exist" +const MODEL_NOT_SUPPORTED_PATTERN = + /(?:The requested model is not supported|invalid model(?:\s+name)?\s+['"`]?[a-z0-9._:/@-]+['"`]?(?=(?:\s*$|\s*[\n\r.,;:!?)]))|unknown model\s+['"`]?[a-z0-9._:/@-]+['"`]?(?=(?:\s*$|\s*[\n\r.,;:!?)]))|model(?:\s+name)?\s+['"`]?[a-z0-9._:/@-]+['"`]?\s+(?:is\s+)?(?:not found|does not exist|not supported|not available|unavailable))/i; /** * Detect known error patterns in a log string and return detection results. @@ -105,12 +110,14 @@ function main() { process.stderr.write("[detect-agent-errors] Detected timeout: engine process was killed by signal (step timeout-minutes likely exceeded)\n"); } if (results.modelNotSupportedError) { - process.stderr.write("[detect-agent-errors] Detected model-not-supported error: the requested model is unavailable for this subscription tier\n"); + process.stderr.write("[detect-agent-errors] Detected model configuration error: configured model is invalid or unavailable for this engine/account\n"); } writeOutputs(results); } -main(); +if (require.main === module) { + main(); +} module.exports = { detectErrors, INFERENCE_ACCESS_ERROR_PATTERN, MCP_POLICY_BLOCKED_PATTERN, AGENTIC_ENGINE_TIMEOUT_PATTERN, MODEL_NOT_SUPPORTED_PATTERN }; diff --git a/actions/setup/js/detect_agent_errors.test.cjs b/actions/setup/js/detect_agent_errors.test.cjs index e592b77fa30..261d44dbe90 100644 --- a/actions/setup/js/detect_agent_errors.test.cjs +++ b/actions/setup/js/detect_agent_errors.test.cjs @@ -97,6 +97,23 @@ describe("detect_agent_errors.cjs", () => { expect(MODEL_NOT_SUPPORTED_PATTERN.test(log)).toBe(true); }); + it("matches invalid/unknown model name variants", () => { + expect(MODEL_NOT_SUPPORTED_PATTERN.test("invalid model name 'claude-sonnet-999'")).toBe(true); + expect(MODEL_NOT_SUPPORTED_PATTERN.test("unknown model gpt-unknown")).toBe(true); + expect(MODEL_NOT_SUPPORTED_PATTERN.test("model 'gpt-foo' not found")).toBe(true); + expect(MODEL_NOT_SUPPORTED_PATTERN.test("model 'claude-ultra' does not exist")).toBe(true); + expect(MODEL_NOT_SUPPORTED_PATTERN.test("model claude-fake is not supported")).toBe(true); + expect(MODEL_NOT_SUPPORTED_PATTERN.test("model gpt-unknown is not available")).toBe(true); + expect(MODEL_NOT_SUPPORTED_PATTERN.test("model gemini-v99 is unavailable")).toBe(true); + expect(MODEL_NOT_SUPPORTED_PATTERN.test("model 'claude-3-5-sonnet@20241022' not found")).toBe(true); + }); + + it("does not match unrelated invalid/unknown model wording", () => { + expect(MODEL_NOT_SUPPORTED_PATTERN.test("Error: invalid model response format")).toBe(false); + expect(MODEL_NOT_SUPPORTED_PATTERN.test("Error: invalid model schema definition")).toBe(false); + expect(MODEL_NOT_SUPPORTED_PATTERN.test("unknown model behavior detected")).toBe(false); + }); + it("does not match other CAPIError 400 errors", () => { expect(MODEL_NOT_SUPPORTED_PATTERN.test("CAPIError: 400 Bad Request")).toBe(false); expect(MODEL_NOT_SUPPORTED_PATTERN.test("CAPIError: 400 400 Bad Request")).toBe(false); @@ -150,6 +167,14 @@ describe("detect_agent_errors.cjs", () => { expect(result.modelNotSupportedError).toBe(true); }); + it("detects invalid model name errors", () => { + const result = detectErrors("Error: invalid model name 'claude-sonnet-999'"); + expect(result.inferenceAccessError).toBe(false); + expect(result.mcpPolicyError).toBe(false); + expect(result.agenticEngineTimeout).toBe(false); + expect(result.modelNotSupportedError).toBe(true); + }); + it("detects both errors in the same log", () => { const log = "Access denied by policy settings\nMCP servers were blocked by policy: 'github'"; const result = detectErrors(log); diff --git a/actions/setup/md/model_not_supported_error.md b/actions/setup/md/model_not_supported_error.md index 44a7d138289..e54070a223b 100644 --- a/actions/setup/md/model_not_supported_error.md +++ b/actions/setup/md/model_not_supported_error.md @@ -1,12 +1,12 @@ > [!WARNING] -> **Model Not Supported**: The Copilot CLI failed because the requested model is not available for your subscription tier. This typically affects Copilot Pro and Education users. +> **Invalid or Unsupported Model**: The agent failed because the configured model name is invalid, unknown, or unavailable for this engine/account. This is a **configuration issue**, not a transient error — retrying will not help.
How to fix this -Specify a model that is supported by your subscription in the workflow frontmatter: +Specify a valid model for the selected engine in the workflow frontmatter: ```yaml --- @@ -15,6 +15,6 @@ model: gpt-5-mini --- ``` -To find the models available for your account, check your [Copilot settings](https://github.com/settings/copilot) or refer to the [supported models documentation](https://docs.github.com/en/copilot/using-github-copilot/using-github-copilot-in-the-command-line#supported-models). +To find valid models, check your engine/provider documentation (for Copilot see [supported models](https://docs.github.com/en/copilot/using-github-copilot/using-github-copilot-in-the-command-line#supported-models)).
diff --git a/docs/adr/38258-engine-agnostic-agent-error-detection.md b/docs/adr/38258-engine-agnostic-agent-error-detection.md new file mode 100644 index 00000000000..b36f780dd7a --- /dev/null +++ b/docs/adr/38258-engine-agnostic-agent-error-detection.md @@ -0,0 +1,45 @@ +# ADR-38258: Engine-Agnostic Agent Error Detection via Capability Method + +**Date**: 2026-06-10 +**Status**: Draft + +## Context + +Agentic workflows run on one of several engines (Copilot, Codex, Claude, Gemini). When a workflow specifies a model name that the selected engine cannot serve, the run fails. Previously, host-side post-run error classification (the `detect-agent-errors` step) and the specialized "model not supported" conclusion messaging were wired only for the Copilot engine via a concrete `engine.(*CopilotEngine)` type assertion, and the model-error pattern matched only the single Copilot subscription-tier string `The requested model is not supported`. As a result, invalid or unknown model names on Codex and Claude — and alternative error phrasings such as "unknown model" or "model … does not exist" — fell through to generic failures and triggered wasteful harness retries on a deterministic, non-recoverable misconfiguration. + +## Decision + +We will detect invalid/unknown model configuration in an engine-agnostic way and route it into the existing specialized conclusion-failure path. Concretely: + +1. Introduce a `GetErrorDetectionScriptId()` capability method on engines; engines that support host-side error detection return `detect_agent_errors`, and the conclusion-job wiring in `notify_comment.go` gates on `engine.GetErrorDetectionScriptId() != ""` instead of asserting the concrete `*CopilotEngine` type. +2. Broaden the shared `MODEL_NOT_SUPPORTED_PATTERN` in `detect_agent_errors.cjs` (and matching patterns in `codex_harness.cjs` and `claude_harness.cjs`) to cover invalid/unknown model-name variants, while keeping the stable `model_not_supported_error=true` output contract. +3. Classify invalid-model errors as non-retryable in the Codex and Claude harnesses so deterministic model-name failures break out of the retry loop immediately. + +This favors a capability-based interface over per-engine type checks so additional engines can opt in without touching the conclusion-job branching logic. + +## Alternatives Considered + +### Alternative 1: Keep the Copilot-only type assertion and add per-engine branches +Extend the existing `if _, ok := engine.(*CopilotEngine); ok` check to also test for `*CodexEngine` and `*ClaudeEngine`. Rejected because it hardcodes the engine list into the conclusion-job builder, must be edited for every new engine, and couples generic wiring to concrete engine types rather than a declared capability. + +### Alternative 2: Detect model errors only inside each harness, no shared host-side step +Handle invalid-model classification entirely within `codex_harness.cjs` / `claude_harness.cjs` and skip the `detect-agent-errors` host step for those engines. Rejected because it would duplicate the regex across harnesses, diverge over time, and bypass the unified `detect-agent-errors` output contract the conclusion job already consumes for inference-access, MCP-policy, and timeout errors. + +## Consequences + +### Positive +- Invalid/unknown model names now produce consistent, actionable specialized failure issues across Copilot, Codex, and Claude. +- Deterministic model-name failures no longer waste retry budget in the Codex and Claude harnesses. +- New engines can opt into error detection by implementing `GetErrorDetectionScriptId()`, with no changes to conclusion-job branching. + +### Negative +- A single broadened regex now governs classification for all participating engines, so a false positive or false negative in `MODEL_NOT_SUPPORTED_PATTERN` affects every engine at once. +- The pattern relies on textual provider error phrasing, which can change upstream and silently stop matching (or over-match unrelated text mentioning "model"). + +### Neutral +- Engines that return an empty `GetErrorDetectionScriptId()` (e.g., Gemini) intentionally remain excluded from the detection step and its outputs; tests were renamed from "non-Copilot" to "engine without detection script" to reflect the capability framing. +- Golden lock files for Codex and Claude now include the `detect-agent-errors` step and the associated job outputs. + +--- + +*This is a DRAFT ADR generated by the [Design Decision Gate](https://github.com/github/gh-aw/actions/runs/27248303188) workflow. The PR author must review, complete, and finalize this document before the PR can merge.* diff --git a/pkg/workflow/claude_engine.go b/pkg/workflow/claude_engine.go index 0528f7d6daf..3fe2b077334 100644 --- a/pkg/workflow/claude_engine.go +++ b/pkg/workflow/claude_engine.go @@ -525,6 +525,12 @@ func (e *ClaudeEngine) GetLogParserScriptId() string { return "parse_claude_log" } +// GetErrorDetectionScriptId returns the JavaScript script name for detecting +// post-run agent errors from the host runner (including invalid/unsupported model names). +func (e *ClaudeEngine) GetErrorDetectionScriptId() string { + return "detect_agent_errors" +} + // GetHarnessScriptName returns the filename of the JavaScript harness script that wraps // the Claude Code CLI with retry logic for transient Anthropic API errors (overload, rate limit). func (e *ClaudeEngine) GetHarnessScriptName() string { diff --git a/pkg/workflow/codex_logs.go b/pkg/workflow/codex_logs.go index 055cac0559d..8798bd9b243 100644 --- a/pkg/workflow/codex_logs.go +++ b/pkg/workflow/codex_logs.go @@ -381,3 +381,9 @@ func (e *CodexEngine) extractCodexTokenUsage(line string) int { func (e *CodexEngine) GetLogParserScriptId() string { return "parse_codex_log" } + +// GetErrorDetectionScriptId returns the JavaScript script name for detecting +// post-run agent errors from the host runner (including invalid/unsupported model names). +func (e *CodexEngine) GetErrorDetectionScriptId() string { + return "detect_agent_errors" +} diff --git a/pkg/workflow/inference_access_error_test.go b/pkg/workflow/inference_access_error_test.go index 23f60797a46..25693d4afee 100644 --- a/pkg/workflow/inference_access_error_test.go +++ b/pkg/workflow/inference_access_error_test.go @@ -99,15 +99,15 @@ Test workflow` } } -// TestInferenceAccessErrorNotInNonCopilotEngine tests that non-Copilot engines -// do NOT include Copilot-specific error outputs. -func TestInferenceAccessErrorNotInNonCopilotEngine(t *testing.T) { - testDir := testutil.TempDir(t, "test-inference-access-error-claude-*") +// TestInferenceAccessErrorNotInEngineWithoutDetectionScript tests that engines +// without detect-agent-errors support do not include these outputs. +func TestInferenceAccessErrorNotInEngineWithoutDetectionScript(t *testing.T) { + testDir := testutil.TempDir(t, "test-inference-access-error-gemini-*") workflowFile := filepath.Join(testDir, "test-workflow.md") workflow := `--- on: workflow_dispatch -engine: claude +engine: gemini --- Test workflow` @@ -130,13 +130,13 @@ Test workflow` lockStr := string(lockContent) - // Check that non-Copilot engines do NOT have the detect-agent-errors step + // Check that engines without detection script do NOT have the detect-agent-errors step if strings.Contains(lockStr, "id: detect-agent-errors") { - t.Error("Expected non-Copilot engine to NOT have detect-agent-errors step") + t.Error("Expected engine without detection script to NOT have detect-agent-errors step") } - // Check that non-Copilot engines do NOT have the inference_access_error output + // Check that engines without detection script do NOT have the inference_access_error output if strings.Contains(lockStr, "inference_access_error:") { - t.Error("Expected non-Copilot engine to NOT have inference_access_error output") + t.Error("Expected engine without detection script to NOT have inference_access_error output") } } diff --git a/pkg/workflow/mcp_policy_error_test.go b/pkg/workflow/mcp_policy_error_test.go index 5b92cf5d502..d8d3ee61839 100644 --- a/pkg/workflow/mcp_policy_error_test.go +++ b/pkg/workflow/mcp_policy_error_test.go @@ -99,18 +99,17 @@ Test workflow` } } -// TestMCPPolicyErrorNotInNonCopilotEngine tests that non-Copilot engines -// do NOT include Copilot-specific error outputs. -func TestMCPPolicyErrorNotInNonCopilotEngine(t *testing.T) { - testDir := testutil.TempDir(t, "test-mcp-policy-error-claude-*") +// TestMCPPolicyErrorNotInEngineWithoutDetectionScript tests that engines +// without detect-agent-errors support do not include these outputs. +func TestMCPPolicyErrorNotInEngineWithoutDetectionScript(t *testing.T) { + testDir := testutil.TempDir(t, "test-mcp-policy-error-gemini-*") workflowFile := filepath.Join(testDir, "test-workflow.md") - workflow := `--- -on: workflow_dispatch -engine: claude ---- - -Test workflow` + workflow := "---\n" + + "on: workflow_dispatch\n" + + "engine: gemini\n" + + "---\n\n" + + "Test workflow" if err := os.WriteFile(workflowFile, []byte(workflow), 0644); err != nil { t.Fatalf("Failed to write test workflow: %v", err) @@ -130,13 +129,13 @@ Test workflow` lockStr := string(lockContent) - // Check that non-Copilot engines do NOT have the detect-agent-errors step + // Check that engines without detection script do NOT have the detect-agent-errors step if strings.Contains(lockStr, "id: detect-agent-errors") { - t.Error("Expected non-Copilot engine to NOT have detect-agent-errors step") + t.Error("Expected engine without detection script to NOT have detect-agent-errors step") } - // Check that non-Copilot engines do NOT have the mcp_policy_error output + // Check that engines without detection script do NOT have the mcp_policy_error output if strings.Contains(lockStr, "mcp_policy_error:") { - t.Error("Expected non-Copilot engine to NOT have mcp_policy_error output") + t.Error("Expected engine without detection script to NOT have mcp_policy_error output") } } diff --git a/pkg/workflow/model_not_supported_error_test.go b/pkg/workflow/model_not_supported_error_test.go index ef5e092effa..6a40eb078ba 100644 --- a/pkg/workflow/model_not_supported_error_test.go +++ b/pkg/workflow/model_not_supported_error_test.go @@ -12,126 +12,86 @@ import ( "github.com/github/gh-aw/pkg/testutil" ) -// TestModelNotSupportedErrorDetectionStep tests that a Copilot engine workflow exposes -// model_not_supported_error from the detect-agent-errors step. -func TestModelNotSupportedErrorDetectionStep(t *testing.T) { +func compileWorkflowAndReadLock(t *testing.T, workflow string) string { + t.Helper() testDir := testutil.TempDir(t, "test-model-not-supported-*") workflowFile := filepath.Join(testDir, "test-workflow.md") - - workflow := `--- -on: workflow_dispatch -engine: copilot ---- - -Test workflow` - if err := os.WriteFile(workflowFile, []byte(workflow), 0644); err != nil { t.Fatalf("Failed to write test workflow: %v", err) } - compiler := NewCompiler() if err := compiler.CompileWorkflow(workflowFile); err != nil { t.Fatalf("Failed to compile workflow: %v", err) } - - // Read the generated lock file lockFile := stringutil.MarkdownToLockFile(workflowFile) lockContent, err := os.ReadFile(lockFile) if err != nil { t.Fatalf("Failed to read lock file: %v", err) } + return string(lockContent) +} - lockStr := string(lockContent) - - // Check that agent job has the primary execution step - if !strings.Contains(lockStr, "id: agentic_execution") { - t.Error("Expected agent job to have agentic_execution step") - } - - // Check that a separate detection step is generated on the host runner - if !strings.Contains(lockStr, "id: detect-agent-errors") { - t.Error("Expected agent job to have a separate detect-agent-errors step") - } +// TestModelNotSupportedErrorDetectionStep tests that engines with detect-agent-errors support +// expose model_not_supported_error from that step. +func TestModelNotSupportedErrorDetectionStep(t *testing.T) { + t.Parallel() + engines := []string{"copilot", "codex", "claude"} + for _, engine := range engines { + t.Run(engine, func(t *testing.T) { + t.Parallel() + lockStr := compileWorkflowAndReadLock(t, `--- +on: workflow_dispatch +engine: `+engine+` +--- - // Check that the agent job exposes model_not_supported_error output from the detection step - if !strings.Contains(lockStr, "model_not_supported_error: ${{ steps.detect-agent-errors.outputs.model_not_supported_error || 'false' }}") { - t.Error("Expected agent job to have model_not_supported_error output from detect-agent-errors step") +Test workflow`) + if !strings.Contains(lockStr, "id: agentic_execution") { + t.Error("Expected agent job to have agentic_execution step") + } + if !strings.Contains(lockStr, "id: detect-agent-errors") { + t.Error("Expected agent job to have a separate detect-agent-errors step") + } + if !strings.Contains(lockStr, "model_not_supported_error: ${{ steps.detect-agent-errors.outputs.model_not_supported_error || 'false' }}") { + t.Error("Expected agent job to have model_not_supported_error output from detect-agent-errors step") + } + }) } } // TestModelNotSupportedErrorInConclusionJob tests that the conclusion job receives the -// model-not-supported error env var when the Copilot engine is used. +// model-not-supported error env var when the engine provides detect-agent-errors support. func TestModelNotSupportedErrorInConclusionJob(t *testing.T) { - testDir := testutil.TempDir(t, "test-model-not-supported-conclusion-*") - workflowFile := filepath.Join(testDir, "test-workflow.md") - - workflow := `--- + t.Parallel() + engines := []string{"copilot", "codex", "claude"} + for _, engine := range engines { + t.Run(engine, func(t *testing.T) { + t.Parallel() + lockStr := compileWorkflowAndReadLock(t, `--- on: workflow_dispatch -engine: copilot +engine: `+engine+` safe-outputs: add-comment: max: 5 --- -Test workflow` - - if err := os.WriteFile(workflowFile, []byte(workflow), 0644); err != nil { - t.Fatalf("Failed to write test workflow: %v", err) - } - - compiler := NewCompiler() - if err := compiler.CompileWorkflow(workflowFile); err != nil { - t.Fatalf("Failed to compile workflow: %v", err) - } - - // Read the generated lock file - lockFile := stringutil.MarkdownToLockFile(workflowFile) - lockContent, err := os.ReadFile(lockFile) - if err != nil { - t.Fatalf("Failed to read lock file: %v", err) - } - - lockStr := string(lockContent) - - // Check that conclusion job receives model not supported error from agent job - if !strings.Contains(lockStr, "GH_AW_MODEL_NOT_SUPPORTED_ERROR: ${{ needs.agent.outputs.model_not_supported_error }}") { - t.Error("Expected conclusion job to receive model_not_supported_error from agent job") +Test workflow`) + if !strings.Contains(lockStr, "GH_AW_MODEL_NOT_SUPPORTED_ERROR: ${{ needs.agent.outputs.model_not_supported_error }}") { + t.Error("Expected conclusion job to receive model_not_supported_error from agent job") + } + }) } } -// TestModelNotSupportedErrorNotInNonCopilotEngine tests that non-Copilot engines -// do NOT include the model_not_supported_error output. -func TestModelNotSupportedErrorNotInNonCopilotEngine(t *testing.T) { - testDir := testutil.TempDir(t, "test-model-not-supported-claude-*") - workflowFile := filepath.Join(testDir, "test-workflow.md") - - workflow := `--- +// TestModelNotSupportedErrorNotInEngineWithoutDetectionScript tests that engines without +// detect-agent-errors support do not include model_not_supported_error output. +func TestModelNotSupportedErrorNotInEngineWithoutDetectionScript(t *testing.T) { + lockStr := compileWorkflowAndReadLock(t, `--- on: workflow_dispatch -engine: claude +engine: gemini --- -Test workflow` - - if err := os.WriteFile(workflowFile, []byte(workflow), 0644); err != nil { - t.Fatalf("Failed to write test workflow: %v", err) - } - - compiler := NewCompiler() - if err := compiler.CompileWorkflow(workflowFile); err != nil { - t.Fatalf("Failed to compile workflow: %v", err) - } - - // Read the generated lock file - lockFile := stringutil.MarkdownToLockFile(workflowFile) - lockContent, err := os.ReadFile(lockFile) - if err != nil { - t.Fatalf("Failed to read lock file: %v", err) - } - - lockStr := string(lockContent) - - // Check that non-Copilot engines do NOT have the model_not_supported_error output +Test workflow`) if strings.Contains(lockStr, "model_not_supported_error:") { - t.Error("Expected non-Copilot engine to NOT have model_not_supported_error output") + t.Error("Expected engine without detection script to NOT have model_not_supported_error output") } } diff --git a/pkg/workflow/notify_comment.go b/pkg/workflow/notify_comment.go index ef8cc541ce2..a957e5fe348 100644 --- a/pkg/workflow/notify_comment.go +++ b/pkg/workflow/notify_comment.go @@ -274,13 +274,16 @@ func (c *Compiler) buildConclusionJob(data *WorkflowData, mainJobName string, sa agentFailureEnvVars = append(agentFailureEnvVars, fmt.Sprintf(" GH_AW_EFFECTIVE_TOKENS: ${{ needs.%s.outputs.effective_tokens || '' }}\n", mainJobName)) agentFailureEnvVars = append(agentFailureEnvVars, fmt.Sprintf(" GH_AW_AI_CREDITS_RATE_LIMIT_ERROR: ${{ needs.%s.outputs.ai_credits_rate_limit_error || 'false' }}\n", mainJobName)) - // Pass Copilot-engine-specific error detection outputs to the conclusion job. - // These are set by the copilot_harness.cjs logic in the agentic_execution step and cover: + // Pass engine error-detection outputs to the conclusion job when the selected engine + // provides a host-runner detect-agent-errors step. + // Contract: engines returning a non-empty GetErrorDetectionScriptId() must run + // actions/setup/js/detect_agent_errors.cjs, which emits all four outputs below. + // These outputs cover: // - inference_access_error: token lacks inference access // - mcp_policy_error: MCP servers blocked by enterprise/organization policy // - agentic_engine_timeout: engine process killed by signal (step timeout) - // - model_not_supported_error: requested model unavailable for the subscription tier - if _, ok := engine.(*CopilotEngine); ok { + // - model_not_supported_error: configured model name is invalid or unavailable + if engine.GetErrorDetectionScriptId() != "" { agentFailureEnvVars = append(agentFailureEnvVars, fmt.Sprintf(" GH_AW_INFERENCE_ACCESS_ERROR: ${{ needs.%s.outputs.inference_access_error }}\n", mainJobName)) agentFailureEnvVars = append(agentFailureEnvVars, fmt.Sprintf(" GH_AW_MCP_POLICY_ERROR: ${{ needs.%s.outputs.mcp_policy_error }}\n", mainJobName)) agentFailureEnvVars = append(agentFailureEnvVars, fmt.Sprintf(" GH_AW_AGENTIC_ENGINE_TIMEOUT: ${{ needs.%s.outputs.agentic_engine_timeout }}\n", mainJobName)) diff --git a/pkg/workflow/testdata/TestWasmGolden_AllEngines/claude.golden b/pkg/workflow/testdata/TestWasmGolden_AllEngines/claude.golden index b87b25bd9f1..30207b404f5 100644 --- a/pkg/workflow/testdata/TestWasmGolden_AllEngines/claude.golden +++ b/pkg/workflow/testdata/TestWasmGolden_AllEngines/claude.golden @@ -289,12 +289,16 @@ jobs: env: GH_AW_WORKFLOW_ID_SANITIZED: workflow outputs: + agentic_engine_timeout: ${{ steps.detect-agent-errors.outputs.agentic_engine_timeout || 'false' }} ai_credits_rate_limit_error: ${{ steps.parse-mcp-gateway.outputs.ai_credits_rate_limit_error || 'false' }} aic: ${{ steps.parse-mcp-gateway.outputs.aic }} ambient_context: ${{ steps.parse-mcp-gateway.outputs.ambient_context }} checkout_pr_success: ${{ steps.checkout-pr.outputs.checkout_pr_success || 'true' }} effective_tokens: ${{ steps.parse-mcp-gateway.outputs.effective_tokens }} + inference_access_error: ${{ steps.detect-agent-errors.outputs.inference_access_error || 'false' }} + mcp_policy_error: ${{ steps.detect-agent-errors.outputs.mcp_policy_error || 'false' }} model: ${{ needs.activation.outputs.model }} + model_not_supported_error: ${{ steps.detect-agent-errors.outputs.model_not_supported_error || 'false' }} setup-parent-span-id: ${{ steps.setup.outputs.parent-span-id || steps.setup.outputs.span-id }} setup-span-id: ${{ steps.setup.outputs.span-id }} setup-trace-id: ${{ steps.setup.outputs.trace-id }} @@ -595,6 +599,11 @@ jobs: MCP_TIMEOUT: 120000 MCP_TOOL_TIMEOUT: 60000 RUNNER_TEMP: ${{ runner.temp }} + - name: Detect agent errors + if: always() + id: detect-agent-errors + continue-on-error: true + run: node "${RUNNER_TEMP}/gh-aw/actions/detect_agent_errors.cjs" - name: Configure Git credentials env: REPO_NAME: ${{ github.repository }} diff --git a/pkg/workflow/testdata/TestWasmGolden_AllEngines/codex.golden b/pkg/workflow/testdata/TestWasmGolden_AllEngines/codex.golden index 342f4934289..ef9a2d9c519 100644 --- a/pkg/workflow/testdata/TestWasmGolden_AllEngines/codex.golden +++ b/pkg/workflow/testdata/TestWasmGolden_AllEngines/codex.golden @@ -290,12 +290,16 @@ jobs: env: GH_AW_WORKFLOW_ID_SANITIZED: workflow outputs: + agentic_engine_timeout: ${{ steps.detect-agent-errors.outputs.agentic_engine_timeout || 'false' }} ai_credits_rate_limit_error: ${{ steps.parse-mcp-gateway.outputs.ai_credits_rate_limit_error || 'false' }} aic: ${{ steps.parse-mcp-gateway.outputs.aic }} ambient_context: ${{ steps.parse-mcp-gateway.outputs.ambient_context }} checkout_pr_success: ${{ steps.checkout-pr.outputs.checkout_pr_success || 'true' }} effective_tokens: ${{ steps.parse-mcp-gateway.outputs.effective_tokens }} + inference_access_error: ${{ steps.detect-agent-errors.outputs.inference_access_error || 'false' }} + mcp_policy_error: ${{ steps.detect-agent-errors.outputs.mcp_policy_error || 'false' }} model: ${{ needs.activation.outputs.model }} + model_not_supported_error: ${{ steps.detect-agent-errors.outputs.model_not_supported_error || 'false' }} setup-parent-span-id: ${{ steps.setup.outputs.parent-span-id || steps.setup.outputs.span-id }} setup-span-id: ${{ steps.setup.outputs.span-id }} setup-trace-id: ${{ steps.setup.outputs.trace-id }} @@ -559,6 +563,11 @@ jobs: OPENAI_API_KEY: ${{ secrets.CODEX_API_KEY || secrets.OPENAI_API_KEY }} RUNNER_TEMP: ${{ runner.temp }} RUST_LOG: ${{ runner.debug == 1 && 'trace,hyper_util=info,mio=info,reqwest=info,os_info=info,codex_otel=warn,codex_core=debug,ocodex_exec=debug' || 'warn' }} + - name: Detect agent errors + if: always() + id: detect-agent-errors + continue-on-error: true + run: node "${RUNNER_TEMP}/gh-aw/actions/detect_agent_errors.cjs" - name: Configure Git credentials env: REPO_NAME: ${{ github.repository }}