Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions actions/setup/js/claude_harness.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ const {
const { emitMissingToolPermissionIssue, hasNoopInSafeOutputs } = require("./safeoutputs_cli.cjs");
const { countPermissionDeniedIssues, hasNumerousPermissionDeniedIssues, extractDeniedCommands, buildMissingToolPermissionIssuePayload } = require("./permission_denied_helpers.cjs");
const { detectNonRetryableHarnessGuard } = require("./harness_retry_guard.cjs");
const { MODEL_NOT_SUPPORTED_PATTERN: INVALID_MODEL_ERROR_PATTERN } = require("./detect_agent_errors.cjs");

// Maximum number of retry attempts after the initial run
const MAX_RETRIES = 3;
Expand Down Expand Up @@ -148,6 +149,15 @@ function isNoDeferredMarkerError(output) {
return NO_DEFERRED_MARKER_PATTERN.test(output);
}

/**
* Determines if the collected output indicates an invalid or unavailable model name.
* @param {string} output - Collected stdout+stderr from the process
* @returns {boolean}
*/
function isInvalidModelError(output) {
return INVALID_MODEL_ERROR_PATTERN.test(output);
}

/**
* Determines whether the exit code corresponds to signal-style termination
* (SIGKILL=137 / SIGTERM=143), typically from timeout/cancellation.
Expand Down Expand Up @@ -372,6 +382,7 @@ async function main() {
const isAuthenticationFailed = isAuthenticationFailedError(result.output);
const isMaxTurns = isMaxTurnsExit(result.output);
const isNoDeferredMarker = isNoDeferredMarkerError(result.output);
const isInvalidModel = isInvalidModelError(result.output);
const permissionDeniedCount = countPermissionDeniedIssues(result.output);
const hasNumerousPermissionDenied = hasNumerousPermissionDeniedIssues(result.output);
log(
Expand All @@ -382,6 +393,7 @@ async function main() {
` isAuthenticationFailedError=${isAuthenticationFailed}` +
` isMaxTurnsExit=${isMaxTurns}` +
` isNoDeferredMarkerError=${isNoDeferredMarker}` +
` isInvalidModelError=${isInvalidModel}` +
` permissionDeniedCount=${permissionDeniedCount}` +
` hasNumerousPermissionDenied=${hasNumerousPermissionDenied}` +
` hasOutput=${result.hasOutput}` +
Expand Down Expand Up @@ -411,6 +423,11 @@ async function main() {
break;
}

if (isInvalidModel) {
log(`attempt ${attempt + 1}: invalid/unsupported model configuration — not retrying (specify a valid engine model name in workflow frontmatter)`);
break;
}

if (hasNumerousPermissionDenied) {
const deniedCommands = extractDeniedCommands(result.output);
emitMissingToolPermissionIssue({ deniedCommands, logger: log });
Expand Down Expand Up @@ -494,6 +511,7 @@ if (typeof module !== "undefined" && module.exports) {
isAuthenticationFailedError,
isMaxTurnsExit,
isNoDeferredMarkerError,
isInvalidModelError,
isSignalTerminationExitCode,
shouldRetryWithContinue,
countPermissionDeniedIssues,
Expand Down
22 changes: 22 additions & 0 deletions actions/setup/js/claude_harness.test.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ const {
isAuthenticationFailedError,
isMaxTurnsExit,
isNoDeferredMarkerError,
isInvalidModelError,
isSignalTerminationExitCode,
shouldRetryWithContinue,
countPermissionDeniedIssues,
Expand Down Expand Up @@ -187,6 +188,27 @@ describe("claude_harness.cjs", () => {
expect(isAuthenticationFailedError("Authentication failed (Request ID: C818:3ED713:19D401B:1C446B7:69D653CA)")).toBe(true);
});

describe("isInvalidModelError", () => {
it("returns true for model-not-supported errors", () => {
expect(isInvalidModelError("Execution failed: CAPIError: 400 The requested model is not supported.")).toBe(true);
});

it("returns true for invalid model name errors", () => {
expect(isInvalidModelError("invalid model name 'claude-sonnet-999'")).toBe(true);
expect(isInvalidModelError("model 'claude-ultra' does not exist")).toBe(true);
expect(isInvalidModelError("model claude-fake is not supported")).toBe(true);
expect(isInvalidModelError("model gemini-v99 is unavailable")).toBe(true);
expect(isInvalidModelError("model 'claude-3-5-sonnet@20241022' not found")).toBe(true);
});

it("returns false for unrelated errors", () => {
expect(isInvalidModelError("rate_limit_error")).toBe(false);
expect(isInvalidModelError("Error: invalid model response format")).toBe(false);
expect(isInvalidModelError('{"type":"result","subtype":"error_max_turns","is_error":true}')).toBe(false);
expect(isInvalidModelError("")).toBe(false);
});
});

it("returns false for unrelated output", () => {
expect(isAuthenticationFailedError("No authentication information found")).toBe(false);
expect(isAuthenticationFailedError("rate_limit_error")).toBe(false);
Expand Down
18 changes: 18 additions & 0 deletions actions/setup/js/codex_harness.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ const {
const { emitMissingToolPermissionIssue, hasNoopInSafeOutputs } = require("./safeoutputs_cli.cjs");
const { countPermissionDeniedIssues, hasNumerousPermissionDeniedIssues, extractDeniedCommands, buildMissingToolPermissionIssuePayload } = require("./permission_denied_helpers.cjs");
const { detectNonRetryableHarnessGuard } = require("./harness_retry_guard.cjs");
const { MODEL_NOT_SUPPORTED_PATTERN: INVALID_MODEL_ERROR_PATTERN } = require("./detect_agent_errors.cjs");

// Maximum number of retry attempts after the initial run
const MAX_RETRIES = 3;
Expand Down Expand Up @@ -120,6 +121,15 @@ function isServerError(output) {
return SERVER_ERROR_PATTERN.test(output);
}

/**
* Determines if the collected output indicates an invalid or unavailable model name.
* @param {string} output - Collected stdout+stderr from the process
* @returns {boolean}
*/
function isInvalidModelError(output) {
return INVALID_MODEL_ERROR_PATTERN.test(output);
}

/**
* Resolve --prompt-file arguments for the Codex run.
* Strips the --prompt-file <path> pair from args and appends the file content
Expand Down Expand Up @@ -402,6 +412,7 @@ async function main() {
const isAuthenticationFailed = isAuthenticationFailedError(result.output);
const isMissingApiKey = isMissingApiKeyError(result.output);
const isServer = isServerError(result.output);
const isInvalidModel = isInvalidModelError(result.output);
const permissionDeniedCount = countPermissionDeniedIssues(result.output);
const hasNumerousPermissionDenied = hasNumerousPermissionDeniedIssues(result.output);
log(
Expand All @@ -411,6 +422,7 @@ async function main() {
` isAuthenticationFailedError=${isAuthenticationFailed}` +
` isMissingApiKeyError=${isMissingApiKey}` +
` isServerError=${isServer}` +
` isInvalidModelError=${isInvalidModel}` +
` permissionDeniedCount=${permissionDeniedCount}` +
` hasNumerousPermissionDenied=${hasNumerousPermissionDenied}` +
` hasOutput=${result.hasOutput}` +
Expand Down Expand Up @@ -445,6 +457,11 @@ async function main() {
break;
}

if (isInvalidModel) {
log(`attempt ${attempt + 1}: invalid/unsupported model configuration — not retrying (specify a valid engine model name in workflow frontmatter)`);
break;
}

if (hasNumerousPermissionDenied) {
const deniedCommands = extractDeniedCommands(result.output);
emitMissingToolPermissionIssue({ deniedCommands, logger: log });
Expand Down Expand Up @@ -485,6 +502,7 @@ if (typeof module !== "undefined" && module.exports) {
isAuthenticationFailedError,
isMissingApiKeyError,
isServerError,
isInvalidModelError,
countPermissionDeniedIssues,
hasNumerousPermissionDeniedIssues,
extractDeniedCommands,
Expand Down
21 changes: 21 additions & 0 deletions actions/setup/js/codex_harness.test.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ const {
isAuthenticationFailedError,
isMissingApiKeyError,
isServerError,
isInvalidModelError,
countPermissionDeniedIssues,
hasNumerousPermissionDeniedIssues,
extractDeniedCommands,
Expand Down Expand Up @@ -272,6 +273,26 @@ env_key = "OPENAI_API_KEY"
expect(isServerError("InternalServerError: The server had an error processing your request")).toBe(true);
});

describe("isInvalidModelError", () => {
it("returns true for model-not-supported errors", () => {
expect(isInvalidModelError("Execution failed: CAPIError: 400 The requested model is not supported.")).toBe(true);
});

it("returns true for invalid model name errors", () => {
expect(isInvalidModelError("invalid model name 'claude-sonnet-999'")).toBe(true);
expect(isInvalidModelError("model 'gpt-foo' not found")).toBe(true);
expect(isInvalidModelError("model gpt-unknown is not available")).toBe(true);
expect(isInvalidModelError("model 'claude-3-5-sonnet@20241022' not found")).toBe(true);
});

it("returns false for unrelated errors", () => {
expect(isInvalidModelError("rate_limit_exceeded")).toBe(false);
expect(isInvalidModelError("unknown model behavior detected")).toBe(false);
expect(isInvalidModelError("ServiceUnavailableError")).toBe(false);
expect(isInvalidModelError("")).toBe(false);
});
});

it("returns true for ServiceUnavailableError", () => {
expect(isServerError("ServiceUnavailableError: The server is temporarily unable to service your request")).toBe(true);
});
Expand Down
27 changes: 17 additions & 10 deletions actions/setup/js/detect_agent_errors.cjs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// @ts-check

/**
* Detect Copilot CLI errors in the agent stdio log.
* Detect agent engine errors in the agent stdio log.
*
* Scans the agent stdio log for known error patterns and sets GitHub Actions
* output variables for each detected error class:
Expand All @@ -13,8 +13,9 @@
* - agentic_engine_timeout: The agentic engine process was killed by a
* signal (SIGTERM/SIGKILL/SIGINT), typically due to the step
* timeout-minutes limit being reached.
* - model_not_supported_error: The requested model is not supported for
* the user's Copilot subscription tier (e.g., Copilot Pro/Education).
* - model_not_supported_error: The configured model is invalid or unsupported
* for the selected engine/account (for example unknown model name, model not
* found, or model unavailable for the plan).
*
* This replaces the individual bash scripts (detect_inference_access_error.sh,
* detect_mcp_policy_error.sh) with a single JavaScript step.
Expand Down Expand Up @@ -44,11 +45,15 @@ const MCP_POLICY_BLOCKED_PATTERN = /MCP servers were blocked by policy:/;
// making it engine-agnostic.
const AGENTIC_ENGINE_TIMEOUT_PATTERN = /signal=SIG(?:TERM|KILL|INT)/;

// Pattern: Requested model is not supported for the user's subscription tier.
// This occurs when Copilot Pro/Education users attempt to use a model that is
// not available for their plan. The full error from the Copilot CLI is:
// Execution failed: CAPIError: 400 The requested model is not supported.
const MODEL_NOT_SUPPORTED_PATTERN = /The requested model is not supported/;
// Pattern: Configured model is invalid or unavailable.
// Covers common engine/provider variants:
// - "The requested model is not supported"
// - "invalid model name '...'"
// - "unknown model <id>"
// - "model ... not found"
// - "model ... does not exist"
const MODEL_NOT_SUPPORTED_PATTERN =
/(?:The requested model is not supported|invalid model(?:\s+name)?\s+['"`]?[a-z0-9._:/@-]+['"`]?(?=(?:\s*$|\s*[\n\r.,;:!?)]))|unknown model\s+['"`]?[a-z0-9._:/@-]+['"`]?(?=(?:\s*$|\s*[\n\r.,;:!?)]))|model(?:\s+name)?\s+['"`]?[a-z0-9._:/@-]+['"`]?\s+(?:is\s+)?(?:not found|does not exist|not supported|not available|unavailable))/i;

/**
* Detect known error patterns in a log string and return detection results.
Expand Down Expand Up @@ -105,12 +110,14 @@ function main() {
process.stderr.write("[detect-agent-errors] Detected timeout: engine process was killed by signal (step timeout-minutes likely exceeded)\n");
}
if (results.modelNotSupportedError) {
process.stderr.write("[detect-agent-errors] Detected model-not-supported error: the requested model is unavailable for this subscription tier\n");
process.stderr.write("[detect-agent-errors] Detected model configuration error: configured model is invalid or unavailable for this engine/account\n");
}

writeOutputs(results);
}

main();
if (require.main === module) {
main();
}

module.exports = { detectErrors, INFERENCE_ACCESS_ERROR_PATTERN, MCP_POLICY_BLOCKED_PATTERN, AGENTIC_ENGINE_TIMEOUT_PATTERN, MODEL_NOT_SUPPORTED_PATTERN };
25 changes: 25 additions & 0 deletions actions/setup/js/detect_agent_errors.test.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,23 @@ describe("detect_agent_errors.cjs", () => {
expect(MODEL_NOT_SUPPORTED_PATTERN.test(log)).toBe(true);
});

it("matches invalid/unknown model name variants", () => {
expect(MODEL_NOT_SUPPORTED_PATTERN.test("invalid model name 'claude-sonnet-999'")).toBe(true);
expect(MODEL_NOT_SUPPORTED_PATTERN.test("unknown model gpt-unknown")).toBe(true);
expect(MODEL_NOT_SUPPORTED_PATTERN.test("model 'gpt-foo' not found")).toBe(true);
expect(MODEL_NOT_SUPPORTED_PATTERN.test("model 'claude-ultra' does not exist")).toBe(true);
expect(MODEL_NOT_SUPPORTED_PATTERN.test("model claude-fake is not supported")).toBe(true);
expect(MODEL_NOT_SUPPORTED_PATTERN.test("model gpt-unknown is not available")).toBe(true);
expect(MODEL_NOT_SUPPORTED_PATTERN.test("model gemini-v99 is unavailable")).toBe(true);
expect(MODEL_NOT_SUPPORTED_PATTERN.test("model 'claude-3-5-sonnet@20241022' not found")).toBe(true);
});

it("does not match unrelated invalid/unknown model wording", () => {
expect(MODEL_NOT_SUPPORTED_PATTERN.test("Error: invalid model response format")).toBe(false);
expect(MODEL_NOT_SUPPORTED_PATTERN.test("Error: invalid model schema definition")).toBe(false);
expect(MODEL_NOT_SUPPORTED_PATTERN.test("unknown model behavior detected")).toBe(false);
});

it("does not match other CAPIError 400 errors", () => {
expect(MODEL_NOT_SUPPORTED_PATTERN.test("CAPIError: 400 Bad Request")).toBe(false);
expect(MODEL_NOT_SUPPORTED_PATTERN.test("CAPIError: 400 400 Bad Request")).toBe(false);
Expand Down Expand Up @@ -150,6 +167,14 @@ describe("detect_agent_errors.cjs", () => {
expect(result.modelNotSupportedError).toBe(true);
});

it("detects invalid model name errors", () => {
const result = detectErrors("Error: invalid model name 'claude-sonnet-999'");
expect(result.inferenceAccessError).toBe(false);
expect(result.mcpPolicyError).toBe(false);
expect(result.agenticEngineTimeout).toBe(false);
expect(result.modelNotSupportedError).toBe(true);
});

it("detects both errors in the same log", () => {
const log = "Access denied by policy settings\nMCP servers were blocked by policy: 'github'";
const result = detectErrors(log);
Expand Down
6 changes: 3 additions & 3 deletions actions/setup/md/model_not_supported_error.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
> [!WARNING]
> **Model Not Supported**: The Copilot CLI failed because the requested model is not available for your subscription tier. This typically affects Copilot Pro and Education users.
> **Invalid or Unsupported Model**: The agent failed because the configured model name is invalid, unknown, or unavailable for this engine/account.

This is a **configuration issue**, not a transient error — retrying will not help.

<details>
<summary>How to fix this</summary>

Specify a model that is supported by your subscription in the workflow frontmatter:
Specify a valid model for the selected engine in the workflow frontmatter:

```yaml
---
Expand All @@ -15,6 +15,6 @@ model: gpt-5-mini
---
```

To find the models available for your account, check your [Copilot settings](https://github.com/settings/copilot) or refer to the [supported models documentation](https://docs.github.com/en/copilot/using-github-copilot/using-github-copilot-in-the-command-line#supported-models).
To find valid models, check your engine/provider documentation (for Copilot see [supported models](https://docs.github.com/en/copilot/using-github-copilot/using-github-copilot-in-the-command-line#supported-models)).

</details>
45 changes: 45 additions & 0 deletions docs/adr/38258-engine-agnostic-agent-error-detection.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# ADR-38258: Engine-Agnostic Agent Error Detection via Capability Method

**Date**: 2026-06-10
**Status**: Draft

## Context

Agentic workflows run on one of several engines (Copilot, Codex, Claude, Gemini). When a workflow specifies a model name that the selected engine cannot serve, the run fails. Previously, host-side post-run error classification (the `detect-agent-errors` step) and the specialized "model not supported" conclusion messaging were wired only for the Copilot engine via a concrete `engine.(*CopilotEngine)` type assertion, and the model-error pattern matched only the single Copilot subscription-tier string `The requested model is not supported`. As a result, invalid or unknown model names on Codex and Claude — and alternative error phrasings such as "unknown model" or "model … does not exist" — fell through to generic failures and triggered wasteful harness retries on a deterministic, non-recoverable misconfiguration.

## Decision

We will detect invalid/unknown model configuration in an engine-agnostic way and route it into the existing specialized conclusion-failure path. Concretely:

1. Introduce a `GetErrorDetectionScriptId()` capability method on engines; engines that support host-side error detection return `detect_agent_errors`, and the conclusion-job wiring in `notify_comment.go` gates on `engine.GetErrorDetectionScriptId() != ""` instead of asserting the concrete `*CopilotEngine` type.
2. Broaden the shared `MODEL_NOT_SUPPORTED_PATTERN` in `detect_agent_errors.cjs` (and matching patterns in `codex_harness.cjs` and `claude_harness.cjs`) to cover invalid/unknown model-name variants, while keeping the stable `model_not_supported_error=true` output contract.
3. Classify invalid-model errors as non-retryable in the Codex and Claude harnesses so deterministic model-name failures break out of the retry loop immediately.

This favors a capability-based interface over per-engine type checks so additional engines can opt in without touching the conclusion-job branching logic.

## Alternatives Considered

### Alternative 1: Keep the Copilot-only type assertion and add per-engine branches
Extend the existing `if _, ok := engine.(*CopilotEngine); ok` check to also test for `*CodexEngine` and `*ClaudeEngine`. Rejected because it hardcodes the engine list into the conclusion-job builder, must be edited for every new engine, and couples generic wiring to concrete engine types rather than a declared capability.

### Alternative 2: Detect model errors only inside each harness, no shared host-side step
Handle invalid-model classification entirely within `codex_harness.cjs` / `claude_harness.cjs` and skip the `detect-agent-errors` host step for those engines. Rejected because it would duplicate the regex across harnesses, diverge over time, and bypass the unified `detect-agent-errors` output contract the conclusion job already consumes for inference-access, MCP-policy, and timeout errors.

## Consequences

### Positive
- Invalid/unknown model names now produce consistent, actionable specialized failure issues across Copilot, Codex, and Claude.
- Deterministic model-name failures no longer waste retry budget in the Codex and Claude harnesses.
- New engines can opt into error detection by implementing `GetErrorDetectionScriptId()`, with no changes to conclusion-job branching.

### Negative
- A single broadened regex now governs classification for all participating engines, so a false positive or false negative in `MODEL_NOT_SUPPORTED_PATTERN` affects every engine at once.
- The pattern relies on textual provider error phrasing, which can change upstream and silently stop matching (or over-match unrelated text mentioning "model").

### Neutral
- Engines that return an empty `GetErrorDetectionScriptId()` (e.g., Gemini) intentionally remain excluded from the detection step and its outputs; tests were renamed from "non-Copilot" to "engine without detection script" to reflect the capability framing.
- Golden lock files for Codex and Claude now include the `detect-agent-errors` step and the associated job outputs.

---

*This is a DRAFT ADR generated by the [Design Decision Gate](https://github.com/github/gh-aw/actions/runs/27248303188) workflow. The PR author must review, complete, and finalize this document before the PR can merge.*
Loading