diff --git a/pkg/workflow/awf_config_test.go b/pkg/workflow/awf_config_test.go index 9aca201ba51..15e121ef2a9 100644 --- a/pkg/workflow/awf_config_test.go +++ b/pkg/workflow/awf_config_test.go @@ -609,6 +609,29 @@ func TestBuildAWFCommand_UsesConfigFile(t *testing.T) { assert.Contains(t, command, `"enabled":true`, "config JSON should have apiProxy enabled") } +func TestBuildAWFCommand_IncludesSquidStartupRetryAndLogs(t *testing.T) { + config := AWFCommandConfig{ + EngineName: "copilot", + EngineCommand: "copilot --prompt-file /tmp/prompt.txt", + LogFile: "/tmp/gh-aw/agent-stdio.log", + AllowedDomains: "github.com", + WorkflowData: &WorkflowData{ + EngineConfig: &EngineConfig{ID: "copilot"}, + NetworkPermissions: &NetworkPermissions{ + Firewall: &FirewallConfig{Enabled: true}, + }, + }, + } + + command := BuildAWFCommand(config) + + assert.Contains(t, command, "awf_bootstrap_retry_max=3", "expected retry max for AWF startup") + assert.Contains(t, command, "awf_bootstrap_retry_delay=5", "expected retry backoff for AWF startup") + assert.Contains(t, command, "dependency failed to start: container awf-squid is unhealthy", "expected squid healthcheck retry match") + assert.Contains(t, command, "docker compose up -d --pull never", "expected compose failure retry match") + assert.Contains(t, command, "docker logs awf-squid", "expected squid logs capture on final failure") +} + func TestBuildAWFCommand_PreservesGitHubExpressionOperatorsInConfigJSON(t *testing.T) { config := AWFCommandConfig{ EngineName: "copilot", diff --git a/pkg/workflow/awf_helpers.go b/pkg/workflow/awf_helpers.go index 0009ef80066..061c942b813 100644 --- a/pkg/workflow/awf_helpers.go +++ b/pkg/workflow/awf_helpers.go @@ -265,6 +265,44 @@ fi`, // Build the complete command with proper formatting. // configFileSetup (if non-empty) writes the AWF config JSON immediately before the // AWF invocation so the file is present when AWF parses --config. + awfRunWithRetry := fmt.Sprintf(`awf_bootstrap_retry_max=3 +awf_bootstrap_retry_delay=5 +awf_bootstrap_attempt=1 +while true; do + awf_attempt_log=$(umask 177 && mktemp "${RUNNER_TEMP:-/tmp}/awf-startup-XXXXXX.log") + # shellcheck disable=SC1003 + %s %s %s %s \ + -- %s 2>&1 | tee "$awf_attempt_log" | tee -a %s + awf_exit=${PIPESTATUS[0]} + if [[ $awf_exit -eq 0 ]]; then + rm -f "$awf_attempt_log" + break + fi + if grep -Fq "dependency failed to start: container awf-squid is unhealthy" "$awf_attempt_log" || \ + (grep -Fq "Failed to start containers:" "$awf_attempt_log" && grep -Fq "docker compose up -d --pull never" "$awf_attempt_log"); then + if [[ $awf_bootstrap_attempt -lt $awf_bootstrap_retry_max ]]; then + echo "[WARN] AWF startup failed due to awf-squid healthcheck (attempt $awf_bootstrap_attempt/$awf_bootstrap_retry_max); retrying in ${awf_bootstrap_retry_delay}s..." | tee -a %s + rm -f "$awf_attempt_log" + sleep "$awf_bootstrap_retry_delay" + awf_bootstrap_attempt=$((awf_bootstrap_attempt + 1)) + continue + fi + echo "[ERROR] AWF startup failed after $awf_bootstrap_retry_max attempts; capturing awf-squid logs" | tee -a %s + docker logs awf-squid 2>&1 | tail -200 | sed 's/^/[awf-squid] /' | tee -a %s || true + fi + rm -f "$awf_attempt_log" + exit "$awf_exit" +done`, + awfCommand, + expandableArgs, + arcDindPrefixArgsRef, + shellJoinArgs(awfArgs), + shellWrappedCommand, + shellEscapeArg(config.LogFile), + shellEscapeArg(config.LogFile), + shellEscapeArg(config.LogFile), + shellEscapeArg(config.LogFile)) + var command string if config.PathSetup != "" && configFileSetup != "" { command = fmt.Sprintf(`set -o pipefail @@ -273,20 +311,13 @@ fi`, %s %s %s -# shellcheck disable=SC1003 -%s %s %s %s \ - -- %s 2>&1 | tee -a %s`, +%s`, writeAgentCLIStartMs, config.PathSetup, preCreateLog, configFileSetup, arcDindPrefixProbe, - awfCommand, - expandableArgs, - arcDindPrefixArgsRef, - shellJoinArgs(awfArgs), - shellWrappedCommand, - shellEscapeArg(config.LogFile)) + awfRunWithRetry) } else if config.PathSetup != "" { // Include path setup before AWF command (runs on host before AWF) command = fmt.Sprintf(`set -o pipefail @@ -294,55 +325,34 @@ fi`, %s %s %s -# shellcheck disable=SC1003 -%s %s %s %s \ - -- %s 2>&1 | tee -a %s`, +%s`, writeAgentCLIStartMs, config.PathSetup, preCreateLog, arcDindPrefixProbe, - awfCommand, - expandableArgs, - arcDindPrefixArgsRef, - shellJoinArgs(awfArgs), - shellWrappedCommand, - shellEscapeArg(config.LogFile)) + awfRunWithRetry) } else if configFileSetup != "" { command = fmt.Sprintf(`set -o pipefail %s %s %s %s -# shellcheck disable=SC1003 -%s %s %s %s \ - -- %s 2>&1 | tee -a %s`, +%s`, writeAgentCLIStartMs, preCreateLog, configFileSetup, arcDindPrefixProbe, - awfCommand, - expandableArgs, - arcDindPrefixArgsRef, - shellJoinArgs(awfArgs), - shellWrappedCommand, - shellEscapeArg(config.LogFile)) + awfRunWithRetry) } else { command = fmt.Sprintf(`set -o pipefail %s %s %s -# shellcheck disable=SC1003 -%s %s %s %s \ - -- %s 2>&1 | tee -a %s`, +%s`, writeAgentCLIStartMs, preCreateLog, arcDindPrefixProbe, - awfCommand, - expandableArgs, - arcDindPrefixArgsRef, - shellJoinArgs(awfArgs), - shellWrappedCommand, - shellEscapeArg(config.LogFile)) + awfRunWithRetry) } awfHelpersLog.Print("Successfully built AWF command") diff --git a/pkg/workflow/testdata/TestWasmGolden_AllEngines/claude.golden b/pkg/workflow/testdata/TestWasmGolden_AllEngines/claude.golden index 65b571c5267..4c4578d76cf 100644 --- a/pkg/workflow/testdata/TestWasmGolden_AllEngines/claude.golden +++ b/pkg/workflow/testdata/TestWasmGolden_AllEngines/claude.golden @@ -523,9 +523,34 @@ jobs: if [[ "${DOCKER_HOST:-}" =~ ^tcp:// ]]; then GH_AW_DOCKER_HOST_PATH_PREFIX_ARGS="--docker-host-path-prefix /tmp/gh-aw" fi - # shellcheck disable=SC1003 - sudo -E awf --config "${RUNNER_TEMP}/gh-aw/awf-config.json" --container-workdir "${GITHUB_WORKSPACE}" --mount "${RUNNER_TEMP}/gh-aw:${RUNNER_TEMP}/gh-aw:ro" --mount "${RUNNER_TEMP}/gh-aw:/host${RUNNER_TEMP}/gh-aw:ro" ${GH_AW_DOCKER_HOST_PATH_PREFIX_ARGS} --tty --env-all --exclude-env ANTHROPIC_API_KEY --exclude-env GITHUB_MCP_SERVER_TOKEN --exclude-env MCP_GATEWAY_API_KEY --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --audit-dir /tmp/gh-aw/sandbox/firewall/audit --enable-host-access --allow-host-ports 80,443,8080 --skip-pull \ - -- /bin/bash -c 'export PATH="$(find /opt/hostedtoolcache /home/runner/work/_tool -maxdepth 5 -type d -name bin 2>/dev/null | tr '\''\n'\'' '\'':'\'')$PATH"; [ -n "$GOROOT" ] && export PATH="$GOROOT/bin:$PATH" || true && GH_AW_NODE_EXEC="${GH_AW_NODE_BIN:-}"; if [ -z "$GH_AW_NODE_EXEC" ] || [ ! -x "$GH_AW_NODE_EXEC" ]; then GH_AW_NODE_EXEC="$(command -v node 2>/dev/null || true)"; fi; if [ -z "$GH_AW_NODE_EXEC" ]; then echo "node runtime missing on this runner — check runtimes.node in workflow YAML" >&2; exit 127; fi; "$GH_AW_NODE_EXEC" ${RUNNER_TEMP}/gh-aw/actions/claude_harness.cjs claude --print --no-chrome --allowed-tools '\''Bash,BashOutput,Edit,Edit(/tmp/*),Edit(/tmp/gh-aw/agent/*),ExitPlanMode,Glob,Grep,KillBash,LS,MultiEdit,MultiEdit(/tmp/*),MultiEdit(/tmp/gh-aw/agent/*),NotebookEdit,NotebookRead,Read,Read(/tmp/*),Read(/tmp/gh-aw/agent/*),Task,TodoWrite,Write,Write(/tmp/*),Write(/tmp/gh-aw/agent/*),mcp__github__download_workflow_run_artifact,mcp__github__get_code_scanning_alert,mcp__github__get_commit,mcp__github__get_dependabot_alert,mcp__github__get_discussion,mcp__github__get_discussion_comments,mcp__github__get_file_contents,mcp__github__get_job_logs,mcp__github__get_label,mcp__github__get_latest_release,mcp__github__get_me,mcp__github__get_notification_details,mcp__github__get_pull_request,mcp__github__get_pull_request_comments,mcp__github__get_pull_request_diff,mcp__github__get_pull_request_files,mcp__github__get_pull_request_review_comments,mcp__github__get_pull_request_reviews,mcp__github__get_pull_request_status,mcp__github__get_release_by_tag,mcp__github__get_secret_scanning_alert,mcp__github__get_tag,mcp__github__get_workflow_run,mcp__github__get_workflow_run_logs,mcp__github__get_workflow_run_usage,mcp__github__issue_read,mcp__github__list_branches,mcp__github__list_code_scanning_alerts,mcp__github__list_commits,mcp__github__list_dependabot_alerts,mcp__github__list_discussion_categories,mcp__github__list_discussions,mcp__github__list_issue_types,mcp__github__list_issues,mcp__github__list_label,mcp__github__list_notifications,mcp__github__list_pull_requests,mcp__github__list_releases,mcp__github__list_secret_scanning_alerts,mcp__github__list_starred_repositories,mcp__github__list_tags,mcp__github__list_workflow_jobs,mcp__github__list_workflow_run_artifacts,mcp__github__list_workflow_runs,mcp__github__list_workflows,mcp__github__pull_request_read,mcp__github__search_code,mcp__github__search_issues,mcp__github__search_orgs,mcp__github__search_pull_requests,mcp__github__search_repositories,mcp__github__search_users'\'' --debug-file /tmp/gh-aw/agent-stdio.log --verbose --permission-mode acceptEdits --output-format stream-json --mcp-config "${RUNNER_TEMP}/gh-aw/mcp-config/mcp-servers.json" --prompt-file /tmp/gh-aw/aw-prompts/prompt.txt${GH_AW_MODEL_DETECTION_CLAUDE:+ --model "$GH_AW_MODEL_DETECTION_CLAUDE"}' 2>&1 | tee -a /tmp/gh-aw/agent-stdio.log + awf_bootstrap_retry_max=3 + awf_bootstrap_retry_delay=5 + awf_bootstrap_attempt=1 + while true; do + awf_attempt_log=$(umask 177 && mktemp "${RUNNER_TEMP:-/tmp}/awf-startup-XXXXXX.log") + # shellcheck disable=SC1003 + sudo -E awf --config "${RUNNER_TEMP}/gh-aw/awf-config.json" --container-workdir "${GITHUB_WORKSPACE}" --mount "${RUNNER_TEMP}/gh-aw:${RUNNER_TEMP}/gh-aw:ro" --mount "${RUNNER_TEMP}/gh-aw:/host${RUNNER_TEMP}/gh-aw:ro" ${GH_AW_DOCKER_HOST_PATH_PREFIX_ARGS} --tty --env-all --exclude-env ANTHROPIC_API_KEY --exclude-env GITHUB_MCP_SERVER_TOKEN --exclude-env MCP_GATEWAY_API_KEY --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --audit-dir /tmp/gh-aw/sandbox/firewall/audit --enable-host-access --allow-host-ports 80,443,8080 --skip-pull \ + -- /bin/bash -c 'export PATH="$(find /opt/hostedtoolcache /home/runner/work/_tool -maxdepth 5 -type d -name bin 2>/dev/null | tr '\''\n'\'' '\'':'\'')$PATH"; [ -n "$GOROOT" ] && export PATH="$GOROOT/bin:$PATH" || true && GH_AW_NODE_EXEC="${GH_AW_NODE_BIN:-}"; if [ -z "$GH_AW_NODE_EXEC" ] || [ ! -x "$GH_AW_NODE_EXEC" ]; then GH_AW_NODE_EXEC="$(command -v node 2>/dev/null || true)"; fi; if [ -z "$GH_AW_NODE_EXEC" ]; then echo "node runtime missing on this runner — check runtimes.node in workflow YAML" >&2; exit 127; fi; "$GH_AW_NODE_EXEC" ${RUNNER_TEMP}/gh-aw/actions/claude_harness.cjs claude --print --no-chrome --allowed-tools '\''Bash,BashOutput,Edit,Edit(/tmp/*),Edit(/tmp/gh-aw/agent/*),ExitPlanMode,Glob,Grep,KillBash,LS,MultiEdit,MultiEdit(/tmp/*),MultiEdit(/tmp/gh-aw/agent/*),NotebookEdit,NotebookRead,Read,Read(/tmp/*),Read(/tmp/gh-aw/agent/*),Task,TodoWrite,Write,Write(/tmp/*),Write(/tmp/gh-aw/agent/*),mcp__github__download_workflow_run_artifact,mcp__github__get_code_scanning_alert,mcp__github__get_commit,mcp__github__get_dependabot_alert,mcp__github__get_discussion,mcp__github__get_discussion_comments,mcp__github__get_file_contents,mcp__github__get_job_logs,mcp__github__get_label,mcp__github__get_latest_release,mcp__github__get_me,mcp__github__get_notification_details,mcp__github__get_pull_request,mcp__github__get_pull_request_comments,mcp__github__get_pull_request_diff,mcp__github__get_pull_request_files,mcp__github__get_pull_request_review_comments,mcp__github__get_pull_request_reviews,mcp__github__get_pull_request_status,mcp__github__get_release_by_tag,mcp__github__get_secret_scanning_alert,mcp__github__get_tag,mcp__github__get_workflow_run,mcp__github__get_workflow_run_logs,mcp__github__get_workflow_run_usage,mcp__github__issue_read,mcp__github__list_branches,mcp__github__list_code_scanning_alerts,mcp__github__list_commits,mcp__github__list_dependabot_alerts,mcp__github__list_discussion_categories,mcp__github__list_discussions,mcp__github__list_issue_types,mcp__github__list_issues,mcp__github__list_label,mcp__github__list_notifications,mcp__github__list_pull_requests,mcp__github__list_releases,mcp__github__list_secret_scanning_alerts,mcp__github__list_starred_repositories,mcp__github__list_tags,mcp__github__list_workflow_jobs,mcp__github__list_workflow_run_artifacts,mcp__github__list_workflow_runs,mcp__github__list_workflows,mcp__github__pull_request_read,mcp__github__search_code,mcp__github__search_issues,mcp__github__search_orgs,mcp__github__search_pull_requests,mcp__github__search_repositories,mcp__github__search_users'\'' --debug-file /tmp/gh-aw/agent-stdio.log --verbose --permission-mode acceptEdits --output-format stream-json --mcp-config "${RUNNER_TEMP}/gh-aw/mcp-config/mcp-servers.json" --prompt-file /tmp/gh-aw/aw-prompts/prompt.txt${GH_AW_MODEL_DETECTION_CLAUDE:+ --model "$GH_AW_MODEL_DETECTION_CLAUDE"}' 2>&1 | tee "$awf_attempt_log" | tee -a /tmp/gh-aw/agent-stdio.log + awf_exit=${PIPESTATUS[0]} + if [[ $awf_exit -eq 0 ]]; then + rm -f "$awf_attempt_log" + break + fi + if grep -Fq "dependency failed to start: container awf-squid is unhealthy" "$awf_attempt_log" || \ + (grep -Fq "Failed to start containers:" "$awf_attempt_log" && grep -Fq "docker compose up -d --pull never" "$awf_attempt_log"); then + if [[ $awf_bootstrap_attempt -lt $awf_bootstrap_retry_max ]]; then + echo "[WARN] AWF startup failed due to awf-squid healthcheck (attempt $awf_bootstrap_attempt/$awf_bootstrap_retry_max); retrying in ${awf_bootstrap_retry_delay}s..." | tee -a /tmp/gh-aw/agent-stdio.log + rm -f "$awf_attempt_log" + sleep "$awf_bootstrap_retry_delay" + awf_bootstrap_attempt=$((awf_bootstrap_attempt + 1)) + continue + fi + echo "[ERROR] AWF startup failed after $awf_bootstrap_retry_max attempts; capturing awf-squid logs" | tee -a /tmp/gh-aw/agent-stdio.log + docker logs awf-squid 2>&1 | tail -200 | sed 's/^/[awf-squid] /' | tee -a /tmp/gh-aw/agent-stdio.log || true + fi + rm -f "$awf_attempt_log" + exit "$awf_exit" + done env: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} BASH_DEFAULT_TIMEOUT_MS: 60000 diff --git a/pkg/workflow/testdata/TestWasmGolden_AllEngines/codex.golden b/pkg/workflow/testdata/TestWasmGolden_AllEngines/codex.golden index 49164933bdd..702455643c8 100644 --- a/pkg/workflow/testdata/TestWasmGolden_AllEngines/codex.golden +++ b/pkg/workflow/testdata/TestWasmGolden_AllEngines/codex.golden @@ -493,9 +493,34 @@ jobs: if [[ "${DOCKER_HOST:-}" =~ ^tcp:// ]]; then GH_AW_DOCKER_HOST_PATH_PREFIX_ARGS="--docker-host-path-prefix /tmp/gh-aw" fi - # shellcheck disable=SC1003 - sudo -E awf --config "${RUNNER_TEMP}/gh-aw/awf-config.json" --container-workdir "${GITHUB_WORKSPACE}" --mount "${RUNNER_TEMP}/gh-aw:${RUNNER_TEMP}/gh-aw:ro" --mount "${RUNNER_TEMP}/gh-aw:/host${RUNNER_TEMP}/gh-aw:ro" ${GH_AW_DOCKER_HOST_PATH_PREFIX_ARGS} --env-all --exclude-env GITHUB_MCP_SERVER_TOKEN --exclude-env MCP_GATEWAY_API_KEY --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --audit-dir /tmp/gh-aw/sandbox/firewall/audit --enable-host-access --allow-host-ports 80,443,8080 --skip-pull \ - -- /bin/bash -c 'export PATH="$(find /opt/hostedtoolcache /home/runner/work/_tool -maxdepth 5 -type d -name bin 2>/dev/null | tr '\''\n'\'' '\'':'\'')$PATH"; [ -n "$GOROOT" ] && export PATH="$GOROOT/bin:$PATH" || true && GH_AW_NODE_EXEC="${GH_AW_NODE_BIN:-}"; if [ -z "$GH_AW_NODE_EXEC" ] || [ ! -x "$GH_AW_NODE_EXEC" ]; then GH_AW_NODE_EXEC="$(command -v node 2>/dev/null || true)"; fi; if [ -z "$GH_AW_NODE_EXEC" ]; then echo "node runtime missing on this runner — check runtimes.node in workflow YAML" >&2; exit 127; fi; "$GH_AW_NODE_EXEC" ${RUNNER_TEMP}/gh-aw/actions/codex_harness.cjs codex exec${GH_AW_MODEL_DETECTION_CODEX:+ --model "$GH_AW_MODEL_DETECTION_CODEX"} -c web_search="disabled" --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check --prompt-file /tmp/gh-aw/aw-prompts/prompt.txt' 2>&1 | tee -a /tmp/gh-aw/agent-stdio.log + awf_bootstrap_retry_max=3 + awf_bootstrap_retry_delay=5 + awf_bootstrap_attempt=1 + while true; do + awf_attempt_log=$(umask 177 && mktemp "${RUNNER_TEMP:-/tmp}/awf-startup-XXXXXX.log") + # shellcheck disable=SC1003 + sudo -E awf --config "${RUNNER_TEMP}/gh-aw/awf-config.json" --container-workdir "${GITHUB_WORKSPACE}" --mount "${RUNNER_TEMP}/gh-aw:${RUNNER_TEMP}/gh-aw:ro" --mount "${RUNNER_TEMP}/gh-aw:/host${RUNNER_TEMP}/gh-aw:ro" ${GH_AW_DOCKER_HOST_PATH_PREFIX_ARGS} --env-all --exclude-env GITHUB_MCP_SERVER_TOKEN --exclude-env MCP_GATEWAY_API_KEY --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --audit-dir /tmp/gh-aw/sandbox/firewall/audit --enable-host-access --allow-host-ports 80,443,8080 --skip-pull \ + -- /bin/bash -c 'export PATH="$(find /opt/hostedtoolcache /home/runner/work/_tool -maxdepth 5 -type d -name bin 2>/dev/null | tr '\''\n'\'' '\'':'\'')$PATH"; [ -n "$GOROOT" ] && export PATH="$GOROOT/bin:$PATH" || true && GH_AW_NODE_EXEC="${GH_AW_NODE_BIN:-}"; if [ -z "$GH_AW_NODE_EXEC" ] || [ ! -x "$GH_AW_NODE_EXEC" ]; then GH_AW_NODE_EXEC="$(command -v node 2>/dev/null || true)"; fi; if [ -z "$GH_AW_NODE_EXEC" ]; then echo "node runtime missing on this runner — check runtimes.node in workflow YAML" >&2; exit 127; fi; "$GH_AW_NODE_EXEC" ${RUNNER_TEMP}/gh-aw/actions/codex_harness.cjs codex exec${GH_AW_MODEL_DETECTION_CODEX:+ --model "$GH_AW_MODEL_DETECTION_CODEX"} -c web_search="disabled" --dangerously-bypass-approvals-and-sandbox --skip-git-repo-check --prompt-file /tmp/gh-aw/aw-prompts/prompt.txt' 2>&1 | tee "$awf_attempt_log" | tee -a /tmp/gh-aw/agent-stdio.log + awf_exit=${PIPESTATUS[0]} + if [[ $awf_exit -eq 0 ]]; then + rm -f "$awf_attempt_log" + break + fi + if grep -Fq "dependency failed to start: container awf-squid is unhealthy" "$awf_attempt_log" || \ + (grep -Fq "Failed to start containers:" "$awf_attempt_log" && grep -Fq "docker compose up -d --pull never" "$awf_attempt_log"); then + if [[ $awf_bootstrap_attempt -lt $awf_bootstrap_retry_max ]]; then + echo "[WARN] AWF startup failed due to awf-squid healthcheck (attempt $awf_bootstrap_attempt/$awf_bootstrap_retry_max); retrying in ${awf_bootstrap_retry_delay}s..." | tee -a /tmp/gh-aw/agent-stdio.log + rm -f "$awf_attempt_log" + sleep "$awf_bootstrap_retry_delay" + awf_bootstrap_attempt=$((awf_bootstrap_attempt + 1)) + continue + fi + echo "[ERROR] AWF startup failed after $awf_bootstrap_retry_max attempts; capturing awf-squid logs" | tee -a /tmp/gh-aw/agent-stdio.log + docker logs awf-squid 2>&1 | tail -200 | sed 's/^/[awf-squid] /' | tee -a /tmp/gh-aw/agent-stdio.log || true + fi + rm -f "$awf_attempt_log" + exit "$awf_exit" + done env: CODEX_API_KEY: ${{ secrets.CODEX_API_KEY || secrets.OPENAI_API_KEY }} CODEX_HOME: /tmp/gh-aw/mcp-config diff --git a/pkg/workflow/testdata/TestWasmGolden_AllEngines/copilot.golden b/pkg/workflow/testdata/TestWasmGolden_AllEngines/copilot.golden index 61946ab7f31..fe1c71cee51 100644 --- a/pkg/workflow/testdata/TestWasmGolden_AllEngines/copilot.golden +++ b/pkg/workflow/testdata/TestWasmGolden_AllEngines/copilot.golden @@ -454,9 +454,34 @@ jobs: if [[ "${DOCKER_HOST:-}" =~ ^tcp:// ]]; then GH_AW_DOCKER_HOST_PATH_PREFIX_ARGS="--docker-host-path-prefix /tmp/gh-aw" fi - # shellcheck disable=SC1003 - sudo -E awf --config "${RUNNER_TEMP}/gh-aw/awf-config.json" --container-workdir "${GITHUB_WORKSPACE}" --mount "${RUNNER_TEMP}/gh-aw:${RUNNER_TEMP}/gh-aw:ro" --mount "${RUNNER_TEMP}/gh-aw:/host${RUNNER_TEMP}/gh-aw:ro" ${GH_AW_DOCKER_HOST_PATH_PREFIX_ARGS} --env-all --exclude-env COPILOT_GITHUB_TOKEN --exclude-env GITHUB_MCP_SERVER_TOKEN --exclude-env MCP_GATEWAY_API_KEY --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --audit-dir /tmp/gh-aw/sandbox/firewall/audit --enable-host-access --allow-host-ports 80,443,8080 --skip-pull \ - -- /bin/bash -c 'export PATH="$(find /opt/hostedtoolcache /home/runner/work/_tool -maxdepth 5 -type d -name bin 2>/dev/null | tr '\''\n'\'' '\'':'\'')$PATH"; [ -n "$GOROOT" ] && export PATH="$GOROOT/bin:$PATH" || true && GH_AW_NODE_EXEC="${GH_AW_NODE_BIN:-}"; if [ -z "$GH_AW_NODE_EXEC" ] || [ ! -x "$GH_AW_NODE_EXEC" ]; then GH_AW_NODE_EXEC="$(command -v node 2>/dev/null || true)"; fi; if [ -z "$GH_AW_NODE_EXEC" ]; then echo "node runtime missing on this runner — check runtimes.node in workflow YAML" >&2; exit 127; fi; "$GH_AW_NODE_EXEC" ${RUNNER_TEMP}/gh-aw/actions/copilot_harness.cjs /usr/local/bin/copilot --add-dir /tmp/gh-aw/ --log-level all --log-dir /tmp/gh-aw/sandbox/agent/logs/ --disable-builtin-mcps --no-ask-user --allow-all-tools --allow-all-paths --add-dir "${GITHUB_WORKSPACE}" --prompt-file /tmp/gh-aw/aw-prompts/prompt.txt' 2>&1 | tee -a /tmp/gh-aw/agent-stdio.log + awf_bootstrap_retry_max=3 + awf_bootstrap_retry_delay=5 + awf_bootstrap_attempt=1 + while true; do + awf_attempt_log=$(umask 177 && mktemp "${RUNNER_TEMP:-/tmp}/awf-startup-XXXXXX.log") + # shellcheck disable=SC1003 + sudo -E awf --config "${RUNNER_TEMP}/gh-aw/awf-config.json" --container-workdir "${GITHUB_WORKSPACE}" --mount "${RUNNER_TEMP}/gh-aw:${RUNNER_TEMP}/gh-aw:ro" --mount "${RUNNER_TEMP}/gh-aw:/host${RUNNER_TEMP}/gh-aw:ro" ${GH_AW_DOCKER_HOST_PATH_PREFIX_ARGS} --env-all --exclude-env COPILOT_GITHUB_TOKEN --exclude-env GITHUB_MCP_SERVER_TOKEN --exclude-env MCP_GATEWAY_API_KEY --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --audit-dir /tmp/gh-aw/sandbox/firewall/audit --enable-host-access --allow-host-ports 80,443,8080 --skip-pull \ + -- /bin/bash -c 'export PATH="$(find /opt/hostedtoolcache /home/runner/work/_tool -maxdepth 5 -type d -name bin 2>/dev/null | tr '\''\n'\'' '\'':'\'')$PATH"; [ -n "$GOROOT" ] && export PATH="$GOROOT/bin:$PATH" || true && GH_AW_NODE_EXEC="${GH_AW_NODE_BIN:-}"; if [ -z "$GH_AW_NODE_EXEC" ] || [ ! -x "$GH_AW_NODE_EXEC" ]; then GH_AW_NODE_EXEC="$(command -v node 2>/dev/null || true)"; fi; if [ -z "$GH_AW_NODE_EXEC" ]; then echo "node runtime missing on this runner — check runtimes.node in workflow YAML" >&2; exit 127; fi; "$GH_AW_NODE_EXEC" ${RUNNER_TEMP}/gh-aw/actions/copilot_harness.cjs /usr/local/bin/copilot --add-dir /tmp/gh-aw/ --log-level all --log-dir /tmp/gh-aw/sandbox/agent/logs/ --disable-builtin-mcps --no-ask-user --allow-all-tools --allow-all-paths --add-dir "${GITHUB_WORKSPACE}" --prompt-file /tmp/gh-aw/aw-prompts/prompt.txt' 2>&1 | tee "$awf_attempt_log" | tee -a /tmp/gh-aw/agent-stdio.log + awf_exit=${PIPESTATUS[0]} + if [[ $awf_exit -eq 0 ]]; then + rm -f "$awf_attempt_log" + break + fi + if grep -Fq "dependency failed to start: container awf-squid is unhealthy" "$awf_attempt_log" || \ + (grep -Fq "Failed to start containers:" "$awf_attempt_log" && grep -Fq "docker compose up -d --pull never" "$awf_attempt_log"); then + if [[ $awf_bootstrap_attempt -lt $awf_bootstrap_retry_max ]]; then + echo "[WARN] AWF startup failed due to awf-squid healthcheck (attempt $awf_bootstrap_attempt/$awf_bootstrap_retry_max); retrying in ${awf_bootstrap_retry_delay}s..." | tee -a /tmp/gh-aw/agent-stdio.log + rm -f "$awf_attempt_log" + sleep "$awf_bootstrap_retry_delay" + awf_bootstrap_attempt=$((awf_bootstrap_attempt + 1)) + continue + fi + echo "[ERROR] AWF startup failed after $awf_bootstrap_retry_max attempts; capturing awf-squid logs" | tee -a /tmp/gh-aw/agent-stdio.log + docker logs awf-squid 2>&1 | tail -200 | sed 's/^/[awf-squid] /' | tee -a /tmp/gh-aw/agent-stdio.log || true + fi + rm -f "$awf_attempt_log" + exit "$awf_exit" + done env: AWF_REFLECT_ENABLED: 1 COPILOT_AGENT_RUNNER_TYPE: STANDALONE diff --git a/pkg/workflow/testdata/TestWasmGolden_AllEngines/gemini.golden b/pkg/workflow/testdata/TestWasmGolden_AllEngines/gemini.golden index 8bd4f6158c5..a677ed90c8f 100644 --- a/pkg/workflow/testdata/TestWasmGolden_AllEngines/gemini.golden +++ b/pkg/workflow/testdata/TestWasmGolden_AllEngines/gemini.golden @@ -455,9 +455,34 @@ jobs: if [[ "${DOCKER_HOST:-}" =~ ^tcp:// ]]; then GH_AW_DOCKER_HOST_PATH_PREFIX_ARGS="--docker-host-path-prefix /tmp/gh-aw" fi - # shellcheck disable=SC1003 - sudo -E awf --config "${RUNNER_TEMP}/gh-aw/awf-config.json" --container-workdir "${GITHUB_WORKSPACE}" --mount "${RUNNER_TEMP}/gh-aw:${RUNNER_TEMP}/gh-aw:ro" --mount "${RUNNER_TEMP}/gh-aw:/host${RUNNER_TEMP}/gh-aw:ro" ${GH_AW_DOCKER_HOST_PATH_PREFIX_ARGS} --env-all --exclude-env GEMINI_API_KEY --exclude-env GITHUB_MCP_SERVER_TOKEN --exclude-env MCP_GATEWAY_API_KEY --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --audit-dir /tmp/gh-aw/sandbox/firewall/audit --enable-host-access --allow-host-ports 80,443,8080 --skip-pull \ - -- /bin/bash -c 'export PATH="$(find /opt/hostedtoolcache /home/runner/work/_tool -maxdepth 5 -type d -name bin 2>/dev/null | tr '\''\n'\'' '\'':'\'')$PATH"; [ -n "$GOROOT" ] && export PATH="$GOROOT/bin:$PATH" || true && gemini --yolo --skip-trust --output-format stream-json --prompt "$(cat /tmp/gh-aw/aw-prompts/prompt.txt)"' 2>&1 | tee -a /tmp/gh-aw/agent-stdio.log + awf_bootstrap_retry_max=3 + awf_bootstrap_retry_delay=5 + awf_bootstrap_attempt=1 + while true; do + awf_attempt_log=$(umask 177 && mktemp "${RUNNER_TEMP:-/tmp}/awf-startup-XXXXXX.log") + # shellcheck disable=SC1003 + sudo -E awf --config "${RUNNER_TEMP}/gh-aw/awf-config.json" --container-workdir "${GITHUB_WORKSPACE}" --mount "${RUNNER_TEMP}/gh-aw:${RUNNER_TEMP}/gh-aw:ro" --mount "${RUNNER_TEMP}/gh-aw:/host${RUNNER_TEMP}/gh-aw:ro" ${GH_AW_DOCKER_HOST_PATH_PREFIX_ARGS} --env-all --exclude-env GEMINI_API_KEY --exclude-env GITHUB_MCP_SERVER_TOKEN --exclude-env MCP_GATEWAY_API_KEY --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --audit-dir /tmp/gh-aw/sandbox/firewall/audit --enable-host-access --allow-host-ports 80,443,8080 --skip-pull \ + -- /bin/bash -c 'export PATH="$(find /opt/hostedtoolcache /home/runner/work/_tool -maxdepth 5 -type d -name bin 2>/dev/null | tr '\''\n'\'' '\'':'\'')$PATH"; [ -n "$GOROOT" ] && export PATH="$GOROOT/bin:$PATH" || true && gemini --yolo --skip-trust --output-format stream-json --prompt "$(cat /tmp/gh-aw/aw-prompts/prompt.txt)"' 2>&1 | tee "$awf_attempt_log" | tee -a /tmp/gh-aw/agent-stdio.log + awf_exit=${PIPESTATUS[0]} + if [[ $awf_exit -eq 0 ]]; then + rm -f "$awf_attempt_log" + break + fi + if grep -Fq "dependency failed to start: container awf-squid is unhealthy" "$awf_attempt_log" || \ + (grep -Fq "Failed to start containers:" "$awf_attempt_log" && grep -Fq "docker compose up -d --pull never" "$awf_attempt_log"); then + if [[ $awf_bootstrap_attempt -lt $awf_bootstrap_retry_max ]]; then + echo "[WARN] AWF startup failed due to awf-squid healthcheck (attempt $awf_bootstrap_attempt/$awf_bootstrap_retry_max); retrying in ${awf_bootstrap_retry_delay}s..." | tee -a /tmp/gh-aw/agent-stdio.log + rm -f "$awf_attempt_log" + sleep "$awf_bootstrap_retry_delay" + awf_bootstrap_attempt=$((awf_bootstrap_attempt + 1)) + continue + fi + echo "[ERROR] AWF startup failed after $awf_bootstrap_retry_max attempts; capturing awf-squid logs" | tee -a /tmp/gh-aw/agent-stdio.log + docker logs awf-squid 2>&1 | tail -200 | sed 's/^/[awf-squid] /' | tee -a /tmp/gh-aw/agent-stdio.log || true + fi + rm -f "$awf_attempt_log" + exit "$awf_exit" + done env: DEBUG: gemini-cli:* GEMINI_API_BASE_URL: http://host.docker.internal:10003 diff --git a/pkg/workflow/testdata/TestWasmGolden_AllEngines/pi.golden b/pkg/workflow/testdata/TestWasmGolden_AllEngines/pi.golden index f7ec3064277..02d2abb6d53 100644 --- a/pkg/workflow/testdata/TestWasmGolden_AllEngines/pi.golden +++ b/pkg/workflow/testdata/TestWasmGolden_AllEngines/pi.golden @@ -391,9 +391,34 @@ jobs: if [[ "${DOCKER_HOST:-}" =~ ^tcp:// ]]; then GH_AW_DOCKER_HOST_PATH_PREFIX_ARGS="--docker-host-path-prefix /tmp/gh-aw" fi - # shellcheck disable=SC1003 - sudo -E awf --config "${RUNNER_TEMP}/gh-aw/awf-config.json" --container-workdir "${GITHUB_WORKSPACE}" --mount "${RUNNER_TEMP}/gh-aw:${RUNNER_TEMP}/gh-aw:ro" --mount "${RUNNER_TEMP}/gh-aw:/host${RUNNER_TEMP}/gh-aw:ro" ${GH_AW_DOCKER_HOST_PATH_PREFIX_ARGS} --env-all --exclude-env COPILOT_GITHUB_TOKEN --exclude-env GH_TOKEN --exclude-env GITHUB_MCP_SERVER_TOKEN --exclude-env MCP_GATEWAY_API_KEY --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --audit-dir /tmp/gh-aw/sandbox/firewall/audit --enable-host-access --allow-host-ports 80,443,8080 --skip-pull --difc-proxy-host host.docker.internal:18443 --difc-proxy-ca-cert /tmp/gh-aw/difc-proxy-tls/ca.crt \ - -- /bin/bash -c 'export PATH="$(find /opt/hostedtoolcache /home/runner/work/_tool -maxdepth 5 -type d -name bin 2>/dev/null | tr '\''\n'\'' '\'':'\'')$PATH"; [ -n "$GOROOT" ] && export PATH="$GOROOT/bin:$PATH" || true && cat /tmp/gh-aw/aw-prompts/prompt.txt | pi --print --mode json --no-session --extension "${RUNNER_TEMP}/gh-aw/actions/pi_provider.cjs" --extension "${RUNNER_TEMP}/gh-aw/actions/pi_steering_extension.cjs" 2>&1 | tee /tmp/gh-aw/pi-streaming.jsonl' 2>&1 | tee -a /tmp/gh-aw/agent-stdio.log + awf_bootstrap_retry_max=3 + awf_bootstrap_retry_delay=5 + awf_bootstrap_attempt=1 + while true; do + awf_attempt_log=$(umask 177 && mktemp "${RUNNER_TEMP:-/tmp}/awf-startup-XXXXXX.log") + # shellcheck disable=SC1003 + sudo -E awf --config "${RUNNER_TEMP}/gh-aw/awf-config.json" --container-workdir "${GITHUB_WORKSPACE}" --mount "${RUNNER_TEMP}/gh-aw:${RUNNER_TEMP}/gh-aw:ro" --mount "${RUNNER_TEMP}/gh-aw:/host${RUNNER_TEMP}/gh-aw:ro" ${GH_AW_DOCKER_HOST_PATH_PREFIX_ARGS} --env-all --exclude-env COPILOT_GITHUB_TOKEN --exclude-env GH_TOKEN --exclude-env GITHUB_MCP_SERVER_TOKEN --exclude-env MCP_GATEWAY_API_KEY --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --audit-dir /tmp/gh-aw/sandbox/firewall/audit --enable-host-access --allow-host-ports 80,443,8080 --skip-pull --difc-proxy-host host.docker.internal:18443 --difc-proxy-ca-cert /tmp/gh-aw/difc-proxy-tls/ca.crt \ + -- /bin/bash -c 'export PATH="$(find /opt/hostedtoolcache /home/runner/work/_tool -maxdepth 5 -type d -name bin 2>/dev/null | tr '\''\n'\'' '\'':'\'')$PATH"; [ -n "$GOROOT" ] && export PATH="$GOROOT/bin:$PATH" || true && cat /tmp/gh-aw/aw-prompts/prompt.txt | pi --print --mode json --no-session --extension "${RUNNER_TEMP}/gh-aw/actions/pi_provider.cjs" --extension "${RUNNER_TEMP}/gh-aw/actions/pi_steering_extension.cjs" 2>&1 | tee /tmp/gh-aw/pi-streaming.jsonl' 2>&1 | tee "$awf_attempt_log" | tee -a /tmp/gh-aw/agent-stdio.log + awf_exit=${PIPESTATUS[0]} + if [[ $awf_exit -eq 0 ]]; then + rm -f "$awf_attempt_log" + break + fi + if grep -Fq "dependency failed to start: container awf-squid is unhealthy" "$awf_attempt_log" || \ + (grep -Fq "Failed to start containers:" "$awf_attempt_log" && grep -Fq "docker compose up -d --pull never" "$awf_attempt_log"); then + if [[ $awf_bootstrap_attempt -lt $awf_bootstrap_retry_max ]]; then + echo "[WARN] AWF startup failed due to awf-squid healthcheck (attempt $awf_bootstrap_attempt/$awf_bootstrap_retry_max); retrying in ${awf_bootstrap_retry_delay}s..." | tee -a /tmp/gh-aw/agent-stdio.log + rm -f "$awf_attempt_log" + sleep "$awf_bootstrap_retry_delay" + awf_bootstrap_attempt=$((awf_bootstrap_attempt + 1)) + continue + fi + echo "[ERROR] AWF startup failed after $awf_bootstrap_retry_max attempts; capturing awf-squid logs" | tee -a /tmp/gh-aw/agent-stdio.log + docker logs awf-squid 2>&1 | tail -200 | sed 's/^/[awf-squid] /' | tee -a /tmp/gh-aw/agent-stdio.log || true + fi + rm -f "$awf_attempt_log" + exit "$awf_exit" + done env: AWF_REFLECT_ENABLED: 1 COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} diff --git a/pkg/workflow/testdata/TestWasmGolden_CompileFixtures/basic-copilot.golden b/pkg/workflow/testdata/TestWasmGolden_CompileFixtures/basic-copilot.golden index 4439776c2d9..565cc1369a2 100644 --- a/pkg/workflow/testdata/TestWasmGolden_CompileFixtures/basic-copilot.golden +++ b/pkg/workflow/testdata/TestWasmGolden_CompileFixtures/basic-copilot.golden @@ -454,9 +454,34 @@ jobs: if [[ "${DOCKER_HOST:-}" =~ ^tcp:// ]]; then GH_AW_DOCKER_HOST_PATH_PREFIX_ARGS="--docker-host-path-prefix /tmp/gh-aw" fi - # shellcheck disable=SC1003 - sudo -E awf --config "${RUNNER_TEMP}/gh-aw/awf-config.json" --container-workdir "${GITHUB_WORKSPACE}" --mount "${RUNNER_TEMP}/gh-aw:${RUNNER_TEMP}/gh-aw:ro" --mount "${RUNNER_TEMP}/gh-aw:/host${RUNNER_TEMP}/gh-aw:ro" ${GH_AW_DOCKER_HOST_PATH_PREFIX_ARGS} --env-all --exclude-env COPILOT_GITHUB_TOKEN --exclude-env GITHUB_MCP_SERVER_TOKEN --exclude-env MCP_GATEWAY_API_KEY --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --audit-dir /tmp/gh-aw/sandbox/firewall/audit --enable-host-access --allow-host-ports 80,443,8080 --skip-pull \ - -- /bin/bash -c 'export PATH="$(find /opt/hostedtoolcache /home/runner/work/_tool -maxdepth 5 -type d -name bin 2>/dev/null | tr '\''\n'\'' '\'':'\'')$PATH"; [ -n "$GOROOT" ] && export PATH="$GOROOT/bin:$PATH" || true && GH_AW_NODE_EXEC="${GH_AW_NODE_BIN:-}"; if [ -z "$GH_AW_NODE_EXEC" ] || [ ! -x "$GH_AW_NODE_EXEC" ]; then GH_AW_NODE_EXEC="$(command -v node 2>/dev/null || true)"; fi; if [ -z "$GH_AW_NODE_EXEC" ]; then echo "node runtime missing on this runner — check runtimes.node in workflow YAML" >&2; exit 127; fi; "$GH_AW_NODE_EXEC" ${RUNNER_TEMP}/gh-aw/actions/copilot_harness.cjs /usr/local/bin/copilot --add-dir /tmp/gh-aw/ --log-level all --log-dir /tmp/gh-aw/sandbox/agent/logs/ --disable-builtin-mcps --no-ask-user --allow-all-tools --allow-all-paths --add-dir "${GITHUB_WORKSPACE}" --prompt-file /tmp/gh-aw/aw-prompts/prompt.txt' 2>&1 | tee -a /tmp/gh-aw/agent-stdio.log + awf_bootstrap_retry_max=3 + awf_bootstrap_retry_delay=5 + awf_bootstrap_attempt=1 + while true; do + awf_attempt_log=$(umask 177 && mktemp "${RUNNER_TEMP:-/tmp}/awf-startup-XXXXXX.log") + # shellcheck disable=SC1003 + sudo -E awf --config "${RUNNER_TEMP}/gh-aw/awf-config.json" --container-workdir "${GITHUB_WORKSPACE}" --mount "${RUNNER_TEMP}/gh-aw:${RUNNER_TEMP}/gh-aw:ro" --mount "${RUNNER_TEMP}/gh-aw:/host${RUNNER_TEMP}/gh-aw:ro" ${GH_AW_DOCKER_HOST_PATH_PREFIX_ARGS} --env-all --exclude-env COPILOT_GITHUB_TOKEN --exclude-env GITHUB_MCP_SERVER_TOKEN --exclude-env MCP_GATEWAY_API_KEY --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --audit-dir /tmp/gh-aw/sandbox/firewall/audit --enable-host-access --allow-host-ports 80,443,8080 --skip-pull \ + -- /bin/bash -c 'export PATH="$(find /opt/hostedtoolcache /home/runner/work/_tool -maxdepth 5 -type d -name bin 2>/dev/null | tr '\''\n'\'' '\'':'\'')$PATH"; [ -n "$GOROOT" ] && export PATH="$GOROOT/bin:$PATH" || true && GH_AW_NODE_EXEC="${GH_AW_NODE_BIN:-}"; if [ -z "$GH_AW_NODE_EXEC" ] || [ ! -x "$GH_AW_NODE_EXEC" ]; then GH_AW_NODE_EXEC="$(command -v node 2>/dev/null || true)"; fi; if [ -z "$GH_AW_NODE_EXEC" ]; then echo "node runtime missing on this runner — check runtimes.node in workflow YAML" >&2; exit 127; fi; "$GH_AW_NODE_EXEC" ${RUNNER_TEMP}/gh-aw/actions/copilot_harness.cjs /usr/local/bin/copilot --add-dir /tmp/gh-aw/ --log-level all --log-dir /tmp/gh-aw/sandbox/agent/logs/ --disable-builtin-mcps --no-ask-user --allow-all-tools --allow-all-paths --add-dir "${GITHUB_WORKSPACE}" --prompt-file /tmp/gh-aw/aw-prompts/prompt.txt' 2>&1 | tee "$awf_attempt_log" | tee -a /tmp/gh-aw/agent-stdio.log + awf_exit=${PIPESTATUS[0]} + if [[ $awf_exit -eq 0 ]]; then + rm -f "$awf_attempt_log" + break + fi + if grep -Fq "dependency failed to start: container awf-squid is unhealthy" "$awf_attempt_log" || \ + (grep -Fq "Failed to start containers:" "$awf_attempt_log" && grep -Fq "docker compose up -d --pull never" "$awf_attempt_log"); then + if [[ $awf_bootstrap_attempt -lt $awf_bootstrap_retry_max ]]; then + echo "[WARN] AWF startup failed due to awf-squid healthcheck (attempt $awf_bootstrap_attempt/$awf_bootstrap_retry_max); retrying in ${awf_bootstrap_retry_delay}s..." | tee -a /tmp/gh-aw/agent-stdio.log + rm -f "$awf_attempt_log" + sleep "$awf_bootstrap_retry_delay" + awf_bootstrap_attempt=$((awf_bootstrap_attempt + 1)) + continue + fi + echo "[ERROR] AWF startup failed after $awf_bootstrap_retry_max attempts; capturing awf-squid logs" | tee -a /tmp/gh-aw/agent-stdio.log + docker logs awf-squid 2>&1 | tail -200 | sed 's/^/[awf-squid] /' | tee -a /tmp/gh-aw/agent-stdio.log || true + fi + rm -f "$awf_attempt_log" + exit "$awf_exit" + done env: AWF_REFLECT_ENABLED: 1 COPILOT_AGENT_RUNNER_TYPE: STANDALONE diff --git a/pkg/workflow/testdata/TestWasmGolden_CompileFixtures/playwright-cli-mode.golden b/pkg/workflow/testdata/TestWasmGolden_CompileFixtures/playwright-cli-mode.golden index f881d2fced4..a8e75d5c769 100644 --- a/pkg/workflow/testdata/TestWasmGolden_CompileFixtures/playwright-cli-mode.golden +++ b/pkg/workflow/testdata/TestWasmGolden_CompileFixtures/playwright-cli-mode.golden @@ -470,9 +470,34 @@ jobs: if [[ "${DOCKER_HOST:-}" =~ ^tcp:// ]]; then GH_AW_DOCKER_HOST_PATH_PREFIX_ARGS="--docker-host-path-prefix /tmp/gh-aw" fi - # shellcheck disable=SC1003 - sudo -E awf --config "${RUNNER_TEMP}/gh-aw/awf-config.json" --container-workdir "${GITHUB_WORKSPACE}" --mount "${RUNNER_TEMP}/gh-aw:${RUNNER_TEMP}/gh-aw:ro" --mount "${RUNNER_TEMP}/gh-aw:/host${RUNNER_TEMP}/gh-aw:ro" ${GH_AW_DOCKER_HOST_PATH_PREFIX_ARGS} --env-all --exclude-env COPILOT_GITHUB_TOKEN --exclude-env GITHUB_MCP_SERVER_TOKEN --exclude-env MCP_GATEWAY_API_KEY --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --audit-dir /tmp/gh-aw/sandbox/firewall/audit --enable-host-access --allow-host-ports 80,443,8080 --skip-pull \ - -- /bin/bash -c 'export PATH="$(find /opt/hostedtoolcache /home/runner/work/_tool -maxdepth 5 -type d -name bin 2>/dev/null | tr '\''\n'\'' '\'':'\'')$PATH"; [ -n "$GOROOT" ] && export PATH="$GOROOT/bin:$PATH" || true && GH_AW_NODE_EXEC="${GH_AW_NODE_BIN:-}"; if [ -z "$GH_AW_NODE_EXEC" ] || [ ! -x "$GH_AW_NODE_EXEC" ]; then GH_AW_NODE_EXEC="$(command -v node 2>/dev/null || true)"; fi; if [ -z "$GH_AW_NODE_EXEC" ]; then echo "node runtime missing on this runner — check runtimes.node in workflow YAML" >&2; exit 127; fi; "$GH_AW_NODE_EXEC" ${RUNNER_TEMP}/gh-aw/actions/copilot_harness.cjs /usr/local/bin/copilot --add-dir /tmp/gh-aw/ --log-level all --log-dir /tmp/gh-aw/sandbox/agent/logs/ --disable-builtin-mcps --no-ask-user --allow-all-tools --allow-all-paths --add-dir "${GITHUB_WORKSPACE}" --prompt-file /tmp/gh-aw/aw-prompts/prompt.txt' 2>&1 | tee -a /tmp/gh-aw/agent-stdio.log + awf_bootstrap_retry_max=3 + awf_bootstrap_retry_delay=5 + awf_bootstrap_attempt=1 + while true; do + awf_attempt_log=$(umask 177 && mktemp "${RUNNER_TEMP:-/tmp}/awf-startup-XXXXXX.log") + # shellcheck disable=SC1003 + sudo -E awf --config "${RUNNER_TEMP}/gh-aw/awf-config.json" --container-workdir "${GITHUB_WORKSPACE}" --mount "${RUNNER_TEMP}/gh-aw:${RUNNER_TEMP}/gh-aw:ro" --mount "${RUNNER_TEMP}/gh-aw:/host${RUNNER_TEMP}/gh-aw:ro" ${GH_AW_DOCKER_HOST_PATH_PREFIX_ARGS} --env-all --exclude-env COPILOT_GITHUB_TOKEN --exclude-env GITHUB_MCP_SERVER_TOKEN --exclude-env MCP_GATEWAY_API_KEY --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --audit-dir /tmp/gh-aw/sandbox/firewall/audit --enable-host-access --allow-host-ports 80,443,8080 --skip-pull \ + -- /bin/bash -c 'export PATH="$(find /opt/hostedtoolcache /home/runner/work/_tool -maxdepth 5 -type d -name bin 2>/dev/null | tr '\''\n'\'' '\'':'\'')$PATH"; [ -n "$GOROOT" ] && export PATH="$GOROOT/bin:$PATH" || true && GH_AW_NODE_EXEC="${GH_AW_NODE_BIN:-}"; if [ -z "$GH_AW_NODE_EXEC" ] || [ ! -x "$GH_AW_NODE_EXEC" ]; then GH_AW_NODE_EXEC="$(command -v node 2>/dev/null || true)"; fi; if [ -z "$GH_AW_NODE_EXEC" ]; then echo "node runtime missing on this runner — check runtimes.node in workflow YAML" >&2; exit 127; fi; "$GH_AW_NODE_EXEC" ${RUNNER_TEMP}/gh-aw/actions/copilot_harness.cjs /usr/local/bin/copilot --add-dir /tmp/gh-aw/ --log-level all --log-dir /tmp/gh-aw/sandbox/agent/logs/ --disable-builtin-mcps --no-ask-user --allow-all-tools --allow-all-paths --add-dir "${GITHUB_WORKSPACE}" --prompt-file /tmp/gh-aw/aw-prompts/prompt.txt' 2>&1 | tee "$awf_attempt_log" | tee -a /tmp/gh-aw/agent-stdio.log + awf_exit=${PIPESTATUS[0]} + if [[ $awf_exit -eq 0 ]]; then + rm -f "$awf_attempt_log" + break + fi + if grep -Fq "dependency failed to start: container awf-squid is unhealthy" "$awf_attempt_log" || \ + (grep -Fq "Failed to start containers:" "$awf_attempt_log" && grep -Fq "docker compose up -d --pull never" "$awf_attempt_log"); then + if [[ $awf_bootstrap_attempt -lt $awf_bootstrap_retry_max ]]; then + echo "[WARN] AWF startup failed due to awf-squid healthcheck (attempt $awf_bootstrap_attempt/$awf_bootstrap_retry_max); retrying in ${awf_bootstrap_retry_delay}s..." | tee -a /tmp/gh-aw/agent-stdio.log + rm -f "$awf_attempt_log" + sleep "$awf_bootstrap_retry_delay" + awf_bootstrap_attempt=$((awf_bootstrap_attempt + 1)) + continue + fi + echo "[ERROR] AWF startup failed after $awf_bootstrap_retry_max attempts; capturing awf-squid logs" | tee -a /tmp/gh-aw/agent-stdio.log + docker logs awf-squid 2>&1 | tail -200 | sed 's/^/[awf-squid] /' | tee -a /tmp/gh-aw/agent-stdio.log || true + fi + rm -f "$awf_attempt_log" + exit "$awf_exit" + done env: AWF_REFLECT_ENABLED: 1 COPILOT_AGENT_RUNNER_TYPE: STANDALONE diff --git a/pkg/workflow/testdata/TestWasmGolden_CompileFixtures/smoke-copilot.golden b/pkg/workflow/testdata/TestWasmGolden_CompileFixtures/smoke-copilot.golden index 8eb0442fc26..c53380683bd 100644 --- a/pkg/workflow/testdata/TestWasmGolden_CompileFixtures/smoke-copilot.golden +++ b/pkg/workflow/testdata/TestWasmGolden_CompileFixtures/smoke-copilot.golden @@ -713,9 +713,34 @@ jobs: if [[ "${DOCKER_HOST:-}" =~ ^tcp:// ]]; then GH_AW_DOCKER_HOST_PATH_PREFIX_ARGS="--docker-host-path-prefix /tmp/gh-aw" fi - # shellcheck disable=SC1003 - sudo -E awf --config "${RUNNER_TEMP}/gh-aw/awf-config.json" --container-workdir "${GITHUB_WORKSPACE}" --mount "${RUNNER_TEMP}/gh-aw:${RUNNER_TEMP}/gh-aw:ro" --mount "${RUNNER_TEMP}/gh-aw:/host${RUNNER_TEMP}/gh-aw:ro" ${GH_AW_DOCKER_HOST_PATH_PREFIX_ARGS} --env-all --exclude-env COPILOT_GITHUB_TOKEN --exclude-env GITHUB_MCP_SERVER_TOKEN --exclude-env MCP_GATEWAY_API_KEY --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --audit-dir /tmp/gh-aw/sandbox/firewall/audit --enable-host-access --allow-host-ports 80,443,8080 --skip-pull \ - -- /bin/bash -c 'export PATH="$(find /opt/hostedtoolcache /home/runner/work/_tool -maxdepth 5 -type d -name bin 2>/dev/null | tr '\''\n'\'' '\'':'\'')$PATH"; [ -n "$GOROOT" ] && export PATH="$GOROOT/bin:$PATH" || true && GH_AW_NODE_EXEC="${GH_AW_NODE_BIN:-}"; if [ -z "$GH_AW_NODE_EXEC" ] || [ ! -x "$GH_AW_NODE_EXEC" ]; then GH_AW_NODE_EXEC="$(command -v node 2>/dev/null || true)"; fi; if [ -z "$GH_AW_NODE_EXEC" ]; then echo "node runtime missing on this runner — check runtimes.node in workflow YAML" >&2; exit 127; fi; "$GH_AW_NODE_EXEC" ${RUNNER_TEMP}/gh-aw/actions/copilot_harness.cjs /usr/local/bin/copilot --add-dir /tmp/gh-aw/ --log-level all --log-dir /tmp/gh-aw/sandbox/agent/logs/ --disable-builtin-mcps --no-ask-user --allow-all-tools --allow-all-paths --add-dir "${GITHUB_WORKSPACE}" --prompt-file /tmp/gh-aw/aw-prompts/prompt.txt' 2>&1 | tee -a /tmp/gh-aw/agent-stdio.log + awf_bootstrap_retry_max=3 + awf_bootstrap_retry_delay=5 + awf_bootstrap_attempt=1 + while true; do + awf_attempt_log=$(umask 177 && mktemp "${RUNNER_TEMP:-/tmp}/awf-startup-XXXXXX.log") + # shellcheck disable=SC1003 + sudo -E awf --config "${RUNNER_TEMP}/gh-aw/awf-config.json" --container-workdir "${GITHUB_WORKSPACE}" --mount "${RUNNER_TEMP}/gh-aw:${RUNNER_TEMP}/gh-aw:ro" --mount "${RUNNER_TEMP}/gh-aw:/host${RUNNER_TEMP}/gh-aw:ro" ${GH_AW_DOCKER_HOST_PATH_PREFIX_ARGS} --env-all --exclude-env COPILOT_GITHUB_TOKEN --exclude-env GITHUB_MCP_SERVER_TOKEN --exclude-env MCP_GATEWAY_API_KEY --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --audit-dir /tmp/gh-aw/sandbox/firewall/audit --enable-host-access --allow-host-ports 80,443,8080 --skip-pull \ + -- /bin/bash -c 'export PATH="$(find /opt/hostedtoolcache /home/runner/work/_tool -maxdepth 5 -type d -name bin 2>/dev/null | tr '\''\n'\'' '\'':'\'')$PATH"; [ -n "$GOROOT" ] && export PATH="$GOROOT/bin:$PATH" || true && GH_AW_NODE_EXEC="${GH_AW_NODE_BIN:-}"; if [ -z "$GH_AW_NODE_EXEC" ] || [ ! -x "$GH_AW_NODE_EXEC" ]; then GH_AW_NODE_EXEC="$(command -v node 2>/dev/null || true)"; fi; if [ -z "$GH_AW_NODE_EXEC" ]; then echo "node runtime missing on this runner — check runtimes.node in workflow YAML" >&2; exit 127; fi; "$GH_AW_NODE_EXEC" ${RUNNER_TEMP}/gh-aw/actions/copilot_harness.cjs /usr/local/bin/copilot --add-dir /tmp/gh-aw/ --log-level all --log-dir /tmp/gh-aw/sandbox/agent/logs/ --disable-builtin-mcps --no-ask-user --allow-all-tools --allow-all-paths --add-dir "${GITHUB_WORKSPACE}" --prompt-file /tmp/gh-aw/aw-prompts/prompt.txt' 2>&1 | tee "$awf_attempt_log" | tee -a /tmp/gh-aw/agent-stdio.log + awf_exit=${PIPESTATUS[0]} + if [[ $awf_exit -eq 0 ]]; then + rm -f "$awf_attempt_log" + break + fi + if grep -Fq "dependency failed to start: container awf-squid is unhealthy" "$awf_attempt_log" || \ + (grep -Fq "Failed to start containers:" "$awf_attempt_log" && grep -Fq "docker compose up -d --pull never" "$awf_attempt_log"); then + if [[ $awf_bootstrap_attempt -lt $awf_bootstrap_retry_max ]]; then + echo "[WARN] AWF startup failed due to awf-squid healthcheck (attempt $awf_bootstrap_attempt/$awf_bootstrap_retry_max); retrying in ${awf_bootstrap_retry_delay}s..." | tee -a /tmp/gh-aw/agent-stdio.log + rm -f "$awf_attempt_log" + sleep "$awf_bootstrap_retry_delay" + awf_bootstrap_attempt=$((awf_bootstrap_attempt + 1)) + continue + fi + echo "[ERROR] AWF startup failed after $awf_bootstrap_retry_max attempts; capturing awf-squid logs" | tee -a /tmp/gh-aw/agent-stdio.log + docker logs awf-squid 2>&1 | tail -200 | sed 's/^/[awf-squid] /' | tee -a /tmp/gh-aw/agent-stdio.log || true + fi + rm -f "$awf_attempt_log" + exit "$awf_exit" + done env: AWF_REFLECT_ENABLED: 1 COPILOT_AGENT_RUNNER_TYPE: STANDALONE diff --git a/pkg/workflow/testdata/TestWasmGolden_CompileFixtures/with-imports.golden b/pkg/workflow/testdata/TestWasmGolden_CompileFixtures/with-imports.golden index 1aa6aaa0209..c16ba1f6def 100644 --- a/pkg/workflow/testdata/TestWasmGolden_CompileFixtures/with-imports.golden +++ b/pkg/workflow/testdata/TestWasmGolden_CompileFixtures/with-imports.golden @@ -455,9 +455,34 @@ jobs: if [[ "${DOCKER_HOST:-}" =~ ^tcp:// ]]; then GH_AW_DOCKER_HOST_PATH_PREFIX_ARGS="--docker-host-path-prefix /tmp/gh-aw" fi - # shellcheck disable=SC1003 - sudo -E awf --config "${RUNNER_TEMP}/gh-aw/awf-config.json" --container-workdir "${GITHUB_WORKSPACE}" --mount "${RUNNER_TEMP}/gh-aw:${RUNNER_TEMP}/gh-aw:ro" --mount "${RUNNER_TEMP}/gh-aw:/host${RUNNER_TEMP}/gh-aw:ro" ${GH_AW_DOCKER_HOST_PATH_PREFIX_ARGS} --env-all --exclude-env COPILOT_GITHUB_TOKEN --exclude-env GITHUB_MCP_SERVER_TOKEN --exclude-env MCP_GATEWAY_API_KEY --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --audit-dir /tmp/gh-aw/sandbox/firewall/audit --enable-host-access --allow-host-ports 80,443,8080 --skip-pull \ - -- /bin/bash -c 'export PATH="$(find /opt/hostedtoolcache /home/runner/work/_tool -maxdepth 5 -type d -name bin 2>/dev/null | tr '\''\n'\'' '\'':'\'')$PATH"; [ -n "$GOROOT" ] && export PATH="$GOROOT/bin:$PATH" || true && GH_AW_NODE_EXEC="${GH_AW_NODE_BIN:-}"; if [ -z "$GH_AW_NODE_EXEC" ] || [ ! -x "$GH_AW_NODE_EXEC" ]; then GH_AW_NODE_EXEC="$(command -v node 2>/dev/null || true)"; fi; if [ -z "$GH_AW_NODE_EXEC" ]; then echo "node runtime missing on this runner — check runtimes.node in workflow YAML" >&2; exit 127; fi; "$GH_AW_NODE_EXEC" ${RUNNER_TEMP}/gh-aw/actions/copilot_harness.cjs /usr/local/bin/copilot --add-dir /tmp/gh-aw/ --log-level all --log-dir /tmp/gh-aw/sandbox/agent/logs/ --disable-builtin-mcps --no-ask-user --allow-all-tools --allow-all-paths --add-dir "${GITHUB_WORKSPACE}" --prompt-file /tmp/gh-aw/aw-prompts/prompt.txt' 2>&1 | tee -a /tmp/gh-aw/agent-stdio.log + awf_bootstrap_retry_max=3 + awf_bootstrap_retry_delay=5 + awf_bootstrap_attempt=1 + while true; do + awf_attempt_log=$(umask 177 && mktemp "${RUNNER_TEMP:-/tmp}/awf-startup-XXXXXX.log") + # shellcheck disable=SC1003 + sudo -E awf --config "${RUNNER_TEMP}/gh-aw/awf-config.json" --container-workdir "${GITHUB_WORKSPACE}" --mount "${RUNNER_TEMP}/gh-aw:${RUNNER_TEMP}/gh-aw:ro" --mount "${RUNNER_TEMP}/gh-aw:/host${RUNNER_TEMP}/gh-aw:ro" ${GH_AW_DOCKER_HOST_PATH_PREFIX_ARGS} --env-all --exclude-env COPILOT_GITHUB_TOKEN --exclude-env GITHUB_MCP_SERVER_TOKEN --exclude-env MCP_GATEWAY_API_KEY --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --audit-dir /tmp/gh-aw/sandbox/firewall/audit --enable-host-access --allow-host-ports 80,443,8080 --skip-pull \ + -- /bin/bash -c 'export PATH="$(find /opt/hostedtoolcache /home/runner/work/_tool -maxdepth 5 -type d -name bin 2>/dev/null | tr '\''\n'\'' '\'':'\'')$PATH"; [ -n "$GOROOT" ] && export PATH="$GOROOT/bin:$PATH" || true && GH_AW_NODE_EXEC="${GH_AW_NODE_BIN:-}"; if [ -z "$GH_AW_NODE_EXEC" ] || [ ! -x "$GH_AW_NODE_EXEC" ]; then GH_AW_NODE_EXEC="$(command -v node 2>/dev/null || true)"; fi; if [ -z "$GH_AW_NODE_EXEC" ]; then echo "node runtime missing on this runner — check runtimes.node in workflow YAML" >&2; exit 127; fi; "$GH_AW_NODE_EXEC" ${RUNNER_TEMP}/gh-aw/actions/copilot_harness.cjs /usr/local/bin/copilot --add-dir /tmp/gh-aw/ --log-level all --log-dir /tmp/gh-aw/sandbox/agent/logs/ --disable-builtin-mcps --no-ask-user --allow-all-tools --allow-all-paths --add-dir "${GITHUB_WORKSPACE}" --prompt-file /tmp/gh-aw/aw-prompts/prompt.txt' 2>&1 | tee "$awf_attempt_log" | tee -a /tmp/gh-aw/agent-stdio.log + awf_exit=${PIPESTATUS[0]} + if [[ $awf_exit -eq 0 ]]; then + rm -f "$awf_attempt_log" + break + fi + if grep -Fq "dependency failed to start: container awf-squid is unhealthy" "$awf_attempt_log" || \ + (grep -Fq "Failed to start containers:" "$awf_attempt_log" && grep -Fq "docker compose up -d --pull never" "$awf_attempt_log"); then + if [[ $awf_bootstrap_attempt -lt $awf_bootstrap_retry_max ]]; then + echo "[WARN] AWF startup failed due to awf-squid healthcheck (attempt $awf_bootstrap_attempt/$awf_bootstrap_retry_max); retrying in ${awf_bootstrap_retry_delay}s..." | tee -a /tmp/gh-aw/agent-stdio.log + rm -f "$awf_attempt_log" + sleep "$awf_bootstrap_retry_delay" + awf_bootstrap_attempt=$((awf_bootstrap_attempt + 1)) + continue + fi + echo "[ERROR] AWF startup failed after $awf_bootstrap_retry_max attempts; capturing awf-squid logs" | tee -a /tmp/gh-aw/agent-stdio.log + docker logs awf-squid 2>&1 | tail -200 | sed 's/^/[awf-squid] /' | tee -a /tmp/gh-aw/agent-stdio.log || true + fi + rm -f "$awf_attempt_log" + exit "$awf_exit" + done env: AWF_REFLECT_ENABLED: 1 COPILOT_AGENT_RUNNER_TYPE: STANDALONE