From b6b765b82b27b82f909d8574cef2ec1d4dfd345a Mon Sep 17 00:00:00 2001 From: Landon Cox Date: Sat, 2 May 2026 11:03:33 -0700 Subject: [PATCH] fix: move smoke-gemini tests into agent container Previously, smoke-gemini ran all tests (curl connectivity, file write/read, gh pr list) in a host pre-step, then had the agent merely verify pre-computed results. This meant the tests validated host connectivity, not AWF sandbox connectivity. Now the agent performs all tests inside the sandbox (like smoke-claude), properly exercising the firewall's domain allowlist, bash tool, and MCP connectivity from within the container. Changes: - Remove Pre-compute smoke test data host pre-step - Move test requirements into agent prompt - Agent now runs curl, file write/read, and MCP calls itself - Keep post-step safe-output validation unchanged Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/workflows/smoke-gemini.lock.yml | 45 +++++------------- .github/workflows/smoke-gemini.md | 63 +++---------------------- 2 files changed, 19 insertions(+), 89 deletions(-) diff --git a/.github/workflows/smoke-gemini.lock.yml b/.github/workflows/smoke-gemini.lock.yml index 188ff55d4..c3097837d 100644 --- a/.github/workflows/smoke-gemini.lock.yml +++ b/.github/workflows/smoke-gemini.lock.yml @@ -1,4 +1,4 @@ -# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"b9d646a8d961bebe9369aca26bf6f72cfbdbc473c01dd225f83760faa55092b5","compiler_version":"v0.71.1","strict":true,"agent_id":"gemini"} +# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"67d8d9994f3da85966ed33106b7a4e12c606a0b224f99056fa512f46c0d9bdae","compiler_version":"v0.71.1","strict":true,"agent_id":"gemini"} # gh-aw-manifest: {"version":1,"secrets":["GEMINI_API_KEY","GH_AW_GITHUB_MCP_SERVER_TOKEN","GH_AW_GITHUB_TOKEN","GITHUB_TOKEN"],"actions":[{"repo":"actions/checkout","sha":"de0fac2e4500dabe0009e67214ff5f5447ce83dd","version":"v6.0.2"},{"repo":"actions/download-artifact","sha":"3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c","version":"v8.0.1"},{"repo":"actions/github-script","sha":"3a2844b7e9c422d3c10d287c895573f7108da1b3","version":"v9"},{"repo":"actions/setup-node","sha":"48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e","version":"v6.4.0"},{"repo":"actions/upload-artifact","sha":"043fb46d1a93c77aae656e7c1c64a875d1fc6a0a","version":"v7.0.1"},{"repo":"github/gh-aw-actions/setup","sha":"239aec45b78c8799417efdd5bc6d8cc036629ec1","version":"v0.71.1"}],"containers":[{"image":"ghcr.io/github/gh-aw-mcpg:v0.3.0","digest":"sha256:9c2228324fb1f26f39dc9471612e530ae3efc3156dac05efb2e8d212878d454d","pinned_image":"ghcr.io/github/gh-aw-mcpg:v0.3.0@sha256:9c2228324fb1f26f39dc9471612e530ae3efc3156dac05efb2e8d212878d454d"},{"image":"ghcr.io/github/github-mcp-server:v1.0.2","digest":"sha256:26db03408086a99cf1916348dcc4f9614206658f9082a8060dc7c81ad787f4ba","pinned_image":"ghcr.io/github/github-mcp-server:v1.0.2@sha256:26db03408086a99cf1916348dcc4f9614206658f9082a8060dc7c81ad787f4ba"},{"image":"node:lts-alpine","digest":"sha256:d1b3b4da11eefd5941e7f0b9cf17783fc99d9c6fc34884a665f40a06dbdfc94f","pinned_image":"node:lts-alpine@sha256:d1b3b4da11eefd5941e7f0b9cf17783fc99d9c6fc34884a665f40a06dbdfc94f"}]} # ___ _ _ # / _ \ | | (_) @@ -190,10 +190,6 @@ jobs: env: GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt GH_AW_SAFE_OUTPUTS: ${{ runner.temp }}/gh-aw/safeoutputs/outputs.jsonl - GH_AW_EXPR_2805DAC9: ${{ steps.smoke-data.outputs.SMOKE_FILE_PATH }} - GH_AW_EXPR_7EA93000: ${{ steps.smoke-data.outputs.SMOKE_HTTP_CODE }} - GH_AW_EXPR_ABDF8D58: ${{ steps.smoke-data.outputs.SMOKE_PR_DATA }} - GH_AW_EXPR_EC16C26C: ${{ steps.smoke-data.outputs.SMOKE_FILE_CONTENT }} GH_AW_GITHUB_ACTOR: ${{ github.actor }} GH_AW_GITHUB_EVENT_COMMENT_ID: ${{ github.event.comment.id }} GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER: ${{ github.event.discussion.number }} @@ -206,14 +202,14 @@ jobs: run: | bash "${RUNNER_TEMP}/gh-aw/actions/create_prompt_first.sh" { - cat << 'GH_AW_PROMPT_cac2f6b6857f65a1_EOF' + cat << 'GH_AW_PROMPT_b32a5656e6d98956_EOF' - GH_AW_PROMPT_cac2f6b6857f65a1_EOF + GH_AW_PROMPT_b32a5656e6d98956_EOF cat "${RUNNER_TEMP}/gh-aw/prompts/xpia.md" cat "${RUNNER_TEMP}/gh-aw/prompts/temp_folder_prompt.md" cat "${RUNNER_TEMP}/gh-aw/prompts/markdown.md" cat "${RUNNER_TEMP}/gh-aw/prompts/safe_outputs_prompt.md" - cat << 'GH_AW_PROMPT_cac2f6b6857f65a1_EOF' + cat << 'GH_AW_PROMPT_b32a5656e6d98956_EOF' Tools: add_comment, add_labels, missing_tool, missing_data, noop @@ -245,22 +241,19 @@ jobs: {{/if}} - GH_AW_PROMPT_cac2f6b6857f65a1_EOF + GH_AW_PROMPT_b32a5656e6d98956_EOF cat "${RUNNER_TEMP}/gh-aw/prompts/github_mcp_tools_with_safeoutputs_prompt.md" - cat << 'GH_AW_PROMPT_cac2f6b6857f65a1_EOF' + cat << 'GH_AW_PROMPT_b32a5656e6d98956_EOF' {{#runtime-import .github/workflows/smoke-gemini.md}} - GH_AW_PROMPT_cac2f6b6857f65a1_EOF + GH_AW_PROMPT_b32a5656e6d98956_EOF } > "$GH_AW_PROMPT" - name: Interpolate variables and render templates uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9 env: GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt GH_AW_GITHUB_REPOSITORY: ${{ github.repository }} - GH_AW_EXPR_EC16C26C: ${{ steps.smoke-data.outputs.SMOKE_FILE_CONTENT }} - GH_AW_EXPR_2805DAC9: ${{ steps.smoke-data.outputs.SMOKE_FILE_PATH }} - GH_AW_EXPR_7EA93000: ${{ steps.smoke-data.outputs.SMOKE_HTTP_CODE }} - GH_AW_EXPR_ABDF8D58: ${{ steps.smoke-data.outputs.SMOKE_PR_DATA }} + GH_AW_GITHUB_RUN_ID: ${{ github.run_id }} with: script: | const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); @@ -271,10 +264,6 @@ jobs: uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9 env: GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt - GH_AW_EXPR_2805DAC9: ${{ steps.smoke-data.outputs.SMOKE_FILE_PATH }} - GH_AW_EXPR_7EA93000: ${{ steps.smoke-data.outputs.SMOKE_HTTP_CODE }} - GH_AW_EXPR_ABDF8D58: ${{ steps.smoke-data.outputs.SMOKE_PR_DATA }} - GH_AW_EXPR_EC16C26C: ${{ steps.smoke-data.outputs.SMOKE_FILE_CONTENT }} GH_AW_GITHUB_ACTOR: ${{ github.actor }} GH_AW_GITHUB_EVENT_COMMENT_ID: ${{ github.event.comment.id }} GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER: ${{ github.event.discussion.number }} @@ -294,10 +283,6 @@ jobs: return await substitutePlaceholders({ file: process.env.GH_AW_PROMPT, substitutions: { - GH_AW_EXPR_2805DAC9: process.env.GH_AW_EXPR_2805DAC9, - GH_AW_EXPR_7EA93000: process.env.GH_AW_EXPR_7EA93000, - GH_AW_EXPR_ABDF8D58: process.env.GH_AW_EXPR_ABDF8D58, - GH_AW_EXPR_EC16C26C: process.env.GH_AW_EXPR_EC16C26C, GH_AW_GITHUB_ACTOR: process.env.GH_AW_GITHUB_ACTOR, GH_AW_GITHUB_EVENT_COMMENT_ID: process.env.GH_AW_GITHUB_EVENT_COMMENT_ID, GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER: process.env.GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER, @@ -380,12 +365,6 @@ jobs: run: bash "${RUNNER_TEMP}/gh-aw/actions/configure_gh_for_ghe.sh" env: GH_TOKEN: ${{ github.token }} - - env: - GH_TOKEN: ${{ github.token }} - id: smoke-data - name: Pre-compute smoke test data - run: "echo \"::group::Fetching last 2 merged PRs\"\nPR_DATA=$(gh pr list --repo \"$GITHUB_REPOSITORY\" --state merged --limit 2 \\\n --json number,title,author,mergedAt \\\n --jq '.[] | \"PR #\\(.number): \\(.title) (by @\\(.author.login), merged \\(.mergedAt))\"')\necho \"$PR_DATA\"\necho \"::endgroup::\"\n\necho \"::group::GitHub.com connectivity check\"\nHTTP_CODE=$(curl -s -o /dev/null -w \"%{http_code}\" --max-time 10 https://github.com)\necho \"github.com returned HTTP $HTTP_CODE\"\necho \"::endgroup::\"\n\necho \"::group::File write/read test\"\nTEST_DIR=\"/tmp/gh-aw/agent\"\nTEST_FILE=\"$TEST_DIR/smoke-test-gemini-${GITHUB_RUN_ID}.txt\"\nmkdir -p \"$TEST_DIR\"\necho \"Smoke test passed for Gemini at $(date)\" > \"$TEST_FILE\"\nFILE_CONTENT=$(cat \"$TEST_FILE\")\necho \"Wrote and read back: $FILE_CONTENT\"\necho \"::endgroup::\"\n\n{\n echo \"SMOKE_PR_DATA<> \"$GITHUB_OUTPUT\"\n" - - name: Configure Git credentials env: REPO_NAME: ${{ github.repository }} @@ -448,9 +427,9 @@ jobs: mkdir -p "${RUNNER_TEMP}/gh-aw/safeoutputs" mkdir -p /tmp/gh-aw/safeoutputs mkdir -p /tmp/gh-aw/mcp-logs/safeoutputs - cat > "${RUNNER_TEMP}/gh-aw/safeoutputs/config.json" << 'GH_AW_SAFE_OUTPUTS_CONFIG_2fd9d3f49ca6122d_EOF' + cat > "${RUNNER_TEMP}/gh-aw/safeoutputs/config.json" << 'GH_AW_SAFE_OUTPUTS_CONFIG_c9f49abd21e2934c_EOF' {"add_comment":{"hide_older_comments":true,"max":1},"add_labels":{"allowed":["smoke-gemini"]},"create_report_incomplete_issue":{},"missing_data":{},"missing_tool":{},"noop":{"max":1,"report-as-issue":"true"},"report_incomplete":{}} - GH_AW_SAFE_OUTPUTS_CONFIG_2fd9d3f49ca6122d_EOF + GH_AW_SAFE_OUTPUTS_CONFIG_c9f49abd21e2934c_EOF - name: Write Safe Outputs Tools env: GH_AW_TOOLS_META_JSON: | @@ -657,7 +636,7 @@ jobs: export MCP_GATEWAY_DOCKER_COMMAND='docker run -i --rm --network host --add-host host.docker.internal:127.0.0.1 --user '"${MCP_GATEWAY_UID}"':'"${MCP_GATEWAY_GID}"' --group-add '"${DOCKER_SOCK_GID}"' -v /var/run/docker.sock:/var/run/docker.sock -e MCP_GATEWAY_PORT -e MCP_GATEWAY_DOMAIN -e MCP_GATEWAY_API_KEY -e MCP_GATEWAY_PAYLOAD_DIR -e MCP_GATEWAY_PAYLOAD_SIZE_THRESHOLD -e DEBUG -e MCP_GATEWAY_LOG_DIR -e GH_AW_MCP_LOG_DIR -e GH_AW_SAFE_OUTPUTS -e GH_AW_SAFE_OUTPUTS_CONFIG_PATH -e GH_AW_SAFE_OUTPUTS_TOOLS_PATH -e GH_AW_ASSETS_BRANCH -e GH_AW_ASSETS_MAX_SIZE_KB -e GH_AW_ASSETS_ALLOWED_EXTS -e DEFAULT_BRANCH -e GITHUB_MCP_SERVER_TOKEN -e GITHUB_MCP_GUARD_MIN_INTEGRITY -e GITHUB_MCP_GUARD_REPOS -e GITHUB_REPOSITORY -e GITHUB_SERVER_URL -e GITHUB_SHA -e GITHUB_WORKSPACE -e GITHUB_TOKEN -e GITHUB_RUN_ID -e GITHUB_RUN_NUMBER -e GITHUB_RUN_ATTEMPT -e GITHUB_JOB -e GITHUB_ACTION -e GITHUB_EVENT_NAME -e GITHUB_EVENT_PATH -e GITHUB_ACTOR -e GITHUB_ACTOR_ID -e GITHUB_TRIGGERING_ACTOR -e GITHUB_WORKFLOW -e GITHUB_WORKFLOW_REF -e GITHUB_WORKFLOW_SHA -e GITHUB_REF -e GITHUB_REF_NAME -e GITHUB_REF_TYPE -e GITHUB_HEAD_REF -e GITHUB_BASE_REF -e GH_AW_SAFE_OUTPUTS_PORT -e GH_AW_SAFE_OUTPUTS_API_KEY -v /tmp/gh-aw/mcp-payloads:/tmp/gh-aw/mcp-payloads:rw -v /opt:/opt:ro -v /tmp:/tmp:rw -v '"${GITHUB_WORKSPACE}"':'"${GITHUB_WORKSPACE}"':rw ghcr.io/github/gh-aw-mcpg:v0.3.0' GH_AW_NODE=$(which node 2>/dev/null || command -v node 2>/dev/null || echo node) - cat << GH_AW_MCP_CONFIG_c9062e0270891756_EOF | "$GH_AW_NODE" "${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.cjs" + cat << GH_AW_MCP_CONFIG_eede5300048260c6_EOF | "$GH_AW_NODE" "${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.cjs" { "mcpServers": { "github": { @@ -697,7 +676,7 @@ jobs: "payloadDir": "${MCP_GATEWAY_PAYLOAD_DIR}" } } - GH_AW_MCP_CONFIG_c9062e0270891756_EOF + GH_AW_MCP_CONFIG_eede5300048260c6_EOF - name: Clean git credentials continue-on-error: true run: bash "${RUNNER_TEMP}/gh-aw/actions/clean_git_credentials.sh" diff --git a/.github/workflows/smoke-gemini.md b/.github/workflows/smoke-gemini.md index 8cb7a39f6..4501032ad 100644 --- a/.github/workflows/smoke-gemini.md +++ b/.github/workflows/smoke-gemini.md @@ -44,41 +44,6 @@ sandbox: agent: version: v0.25.29 strict: true -steps: - - name: Pre-compute smoke test data - id: smoke-data - run: | - echo "::group::Fetching last 2 merged PRs" - PR_DATA=$(gh pr list --repo "$GITHUB_REPOSITORY" --state merged --limit 2 \ - --json number,title,author,mergedAt \ - --jq '.[] | "PR #\(.number): \(.title) (by @\(.author.login), merged \(.mergedAt))"') - echo "$PR_DATA" - echo "::endgroup::" - - echo "::group::GitHub.com connectivity check" - HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" --max-time 10 https://github.com) - echo "github.com returned HTTP $HTTP_CODE" - echo "::endgroup::" - - echo "::group::File write/read test" - TEST_DIR="/tmp/gh-aw/agent" - TEST_FILE="$TEST_DIR/smoke-test-gemini-${GITHUB_RUN_ID}.txt" - mkdir -p "$TEST_DIR" - echo "Smoke test passed for Gemini at $(date)" > "$TEST_FILE" - FILE_CONTENT=$(cat "$TEST_FILE") - echo "Wrote and read back: $FILE_CONTENT" - echo "::endgroup::" - - { - echo "SMOKE_PR_DATA<> "$GITHUB_OUTPUT" - env: - GH_TOKEN: ${{ github.token }} post-steps: - name: Validate safe outputs were invoked run: | @@ -103,35 +68,21 @@ post-steps: **IMPORTANT: Keep all outputs extremely short and concise. Use single-line responses where possible. No verbose explanations.** -## Pre-Computed Test Results - -The following tests were already executed in a deterministic pre-agent step. Your job is to verify the results and produce the summary comment. - -### 1. GitHub MCP Testing -Verify MCP connectivity by calling `github-list_pull_requests` for ${{ github.repository }} (limit 1, state merged). Confirm the result matches the pre-fetched data below. - -### 2. GitHub.com Connectivity -Pre-step result: HTTP ${{ steps.smoke-data.outputs.SMOKE_HTTP_CODE }} from github.com. -✅ if HTTP 200 or 301, ❌ otherwise. - -### 3. File Write/Read Test -Pre-step wrote and read back: "${{ steps.smoke-data.outputs.SMOKE_FILE_CONTENT }}" -File path: ${{ steps.smoke-data.outputs.SMOKE_FILE_PATH }} -Verify by running `cat` on the file path using bash to confirm it exists. +> Use `perPage: 2` when listing PRs. -## Pre-Fetched PR Data +## Test Requirements -``` -${{ steps.smoke-data.outputs.SMOKE_PR_DATA }} -``` +1. **GitHub MCP Testing**: Review the last 2 merged pull requests in ${{ github.repository }} +2. **GitHub.com Connectivity**: Use bash to run `curl -s -o /dev/null -w "%{http_code}" --max-time 10 https://github.com` and verify the HTTP status is 200 or 301 +3. **File Writing Testing**: Create a test file `/tmp/gh-aw/agent/smoke-test-gemini-${{ github.run_id }}.txt` with content "Smoke test passed for Gemini at $(date)" (create the directory if it doesn't exist) +4. **Bash Tool Testing**: Execute bash commands to verify file creation was successful (use `cat` to read the file back) ## Output -Add a **very brief** comment (max 5-10 lines) to the current pull request with: +**If triggered by a pull request**, add a **very brief** comment (max 5-10 lines) to the current pull request with: - PR titles only (no descriptions) - ✅ or ❌ for each test result - Overall status: PASS or FAIL -- Mention the pull request author and any assignees If all tests pass, add the label `smoke-gemini` to the pull request.