From feb6e2249f08fb1211cfc4a5874dd64aa86d70b4 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 6 Jun 2026 16:07:20 +0000 Subject: [PATCH 1/2] Initial plan From dd7e10d5b58358561a4d81892ddcc7a5c328a5d0 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 6 Jun 2026 16:21:39 +0000 Subject: [PATCH 2/2] fix: remove parallel_sub_agents experiment from smoke-pi workflow The sub_agent_decomposition experiment was selecting the parallel_sub_agents variant ~50% of runs. This path asked the Pi engine to launch background task agents and wait for async notifications, but Pi runs in single-pass mode (--no-session) and cannot receive async notifications or use task/read_agent tools. The agent would exhaust tokens attempting this but never call any safe output tool, triggering the "No Safe Outputs Generated" failure. Remove the experiment and always use the sequential execution path. Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --- .github/workflows/smoke-pi.lock.yml | 126 +--------------------------- .github/workflows/smoke-pi.md | 31 ------- 2 files changed, 1 insertion(+), 156 deletions(-) diff --git a/.github/workflows/smoke-pi.lock.yml b/.github/workflows/smoke-pi.lock.yml index 193a857b582..46d94495407 100644 --- a/.github/workflows/smoke-pi.lock.yml +++ b/.github/workflows/smoke-pi.lock.yml @@ -1,4 +1,4 @@ -# gh-aw-metadata: {"schema_version":"v4","frontmatter_hash":"76edb0c56e7dab4c1bb7014bf5cf7f4f7895c49fb63dc0e187903320f5221218","body_hash":"07ea62dc335d2b124062d5af97b2e7ea5f4ec4305c9829a2fd52b147247c8902","strict":true,"agent_id":"pi","agent_model":"copilot/gpt-5.4","engine_versions":{"pi":"0.75.4"}} +# gh-aw-metadata: {"schema_version":"v4","frontmatter_hash":"c27dd49ba5a0550b86685eb3a79e7a15ecd1e4593628c2537d24422ea734428d","body_hash":"b7512ab2cce72ecfc286e40eeef3b439626c3a58871465c037547f21c2de5d1f","strict":true,"agent_id":"pi","agent_model":"copilot/gpt-5.4","engine_versions":{"pi":"0.75.4"}} # gh-aw-manifest: {"version":1,"secrets":["COPILOT_GITHUB_TOKEN","GH_AW_GITHUB_MCP_SERVER_TOKEN","GH_AW_GITHUB_TOKEN","GH_AW_OTEL_GRAFANA_AUTHORIZATION","GH_AW_OTEL_GRAFANA_ENDPOINT","GH_AW_OTEL_SENTRY_AUTHORIZATION","GH_AW_OTEL_SENTRY_ENDPOINT","GITHUB_TOKEN"],"actions":[{"repo":"actions/cache/restore","sha":"27d5ce7f107fe9357f9df03efb73ab90386fccae","version":"v5.0.5"},{"repo":"actions/cache/save","sha":"27d5ce7f107fe9357f9df03efb73ab90386fccae","version":"v5.0.5"},{"repo":"actions/checkout","sha":"de0fac2e4500dabe0009e67214ff5f5447ce83dd","version":"v6.0.2"},{"repo":"actions/download-artifact","sha":"3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c","version":"v8.0.1"},{"repo":"actions/github-script","sha":"3a2844b7e9c422d3c10d287c895573f7108da1b3","version":"v9.0.0"},{"repo":"actions/setup-node","sha":"48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e","version":"v6.4.0"},{"repo":"actions/upload-artifact","sha":"043fb46d1a93c77aae656e7c1c64a875d1fc6a0a","version":"v7.0.1"}],"containers":[{"image":"ghcr.io/github/gh-aw-firewall/agent:0.25.65"},{"image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.25.65"},{"image":"ghcr.io/github/gh-aw-firewall/cli-proxy:0.25.65"},{"image":"ghcr.io/github/gh-aw-firewall/squid:0.25.65"},{"image":"ghcr.io/github/gh-aw-mcpg:v0.3.23","digest":"sha256:0dd1bd91a41e24a3ccc31b1ec6cb61d36608997fabf91f2d643b64e3fc33180a","pinned_image":"ghcr.io/github/gh-aw-mcpg:v0.3.23@sha256:0dd1bd91a41e24a3ccc31b1ec6cb61d36608997fabf91f2d643b64e3fc33180a"},{"image":"ghcr.io/github/github-mcp-server:v1.1.2","digest":"sha256:30197479d8036c7811892bc07e06f9a05c9ef3cdd79bc59f256d50647f95788c","pinned_image":"ghcr.io/github/github-mcp-server:v1.1.2@sha256:30197479d8036c7811892bc07e06f9a05c9ef3cdd79bc59f256d50647f95788c"},{"image":"node:lts-alpine","digest":"sha256:2bdb65ed1dab192432bc31c95f94155ca5ad7fc1392fb7eb7526ab682fa5bf14","pinned_image":"node:lts-alpine@sha256:2bdb65ed1dab192432bc31c95f94155ca5ad7fc1392fb7eb7526ab682fa5bf14"}]} # ___ _ _ # / _ \ | | (_) @@ -113,7 +113,6 @@ jobs: daily_effective_workflow_threshold: ${{ steps.daily-effective-workflow-guardrail.outputs.daily_effective_workflow_threshold || '' }} daily_effective_workflow_total_effective_tokens: ${{ steps.daily-effective-workflow-guardrail.outputs.daily_effective_workflow_total_effective_tokens || '' }} engine_id: ${{ steps.generate_aw_info.outputs.engine_id }} - experiments: ${{ steps.pick-experiment.outputs.experiments }} lockdown_check_failed: ${{ steps.generate_aw_info.outputs.lockdown_check_failed == 'true' }} model: ${{ steps.generate_aw_info.outputs.model }} secret_verification_result: ${{ steps.validate-secret.outputs.verification_result }} @@ -122,7 +121,6 @@ jobs: setup-trace-id: ${{ steps.setup.outputs.trace-id }} slash_command: ${{ needs.pre_activation.outputs.matched_command }} stale_lock_file_failed: ${{ steps.check-lock-file.outputs.stale_lock_file_failed == 'true' }} - sub_agent_decomposition: ${{ steps.pick-experiment.outputs.sub_agent_decomposition }} text: ${{ steps.sanitized.outputs.text }} title: ${{ steps.sanitized.outputs.title }} steps: @@ -270,45 +268,10 @@ jobs: setupGlobals(core, github, context, exec, io, getOctokit); const { main } = require('${{ runner.temp }}/gh-aw/actions/add_workflow_run_comment.cjs'); await main(); - - name: Restore experiment state from git - id: restore-experiment-state - uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 - env: - GH_AW_EXPERIMENT_STATE_FILE: /tmp/gh-aw/experiments/state.json - GH_AW_EXPERIMENT_STATE_DIR: /tmp/gh-aw/experiments - GH_AW_EXPERIMENT_BRANCH: experiments/smokepi - with: - script: | - const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); - setupGlobals(core, github, context, exec, io, getOctokit); - const { main } = require('${{ runner.temp }}/gh-aw/actions/load_experiment_state_from_repo.cjs'); - await main(); - - name: Pick experiment variants - id: pick-experiment - uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 - env: - GH_AW_EXPERIMENT_SPEC: '{"sub_agent_decomposition":{"variants":["single_agent","parallel_sub_agents"],"description":"Test whether decomposing smoke tests into parallel sub-agents reduces token cost","hypothesis":"H0: no change in effective token consumption. H1: parallel sub-agents reduce tokens by 15-25% by eliminating unnecessary context sharing","metric":"effective_token_count","secondary_metrics":["run_duration_seconds","test_pass_rate","false_failure_rate"],"guardrail_metrics":[{"name":"test_completion_rate","threshold":"\u003e=0.95"},{"name":"overall_pass_rate","threshold":"\u003e=0.80"}],"min_samples":20,"weight":[50,50],"start_date":"2026-05-22","analysis_type":"mann_whitney","tags":["cost_optimization","smoke_tests","pi_engine"]}}' - GH_AW_EXPERIMENT_STATE_FILE: /tmp/gh-aw/experiments/state.json - GH_AW_EXPERIMENT_STATE_DIR: /tmp/gh-aw/experiments - with: - script: | - const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); - setupGlobals(core, github, context, exec, io, getOctokit); - const { main } = require('${{ runner.temp }}/gh-aw/actions/pick_experiment.cjs'); - await main(); - - name: Upload experiment artifact - if: always() - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 - with: - name: smokepi-experiment - path: /tmp/gh-aw/experiments - if-no-files-found: ignore - retention-days: 30 - name: Create prompt with built-in context env: GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt GH_AW_SAFE_OUTPUTS: ${{ runner.temp }}/gh-aw/safeoutputs/outputs.jsonl - GH_AW_EXPERIMENTS_SUB_AGENT_DECOMPOSITION: ${{ steps.pick-experiment.outputs.sub_agent_decomposition }} GH_AW_EXPR_1A3A194A: ${{ github.event.discussion.number || (fromJSON(github.event.inputs.aw_context || github.event.client_payload.aw_context || '{}').item_type == 'discussion' && fromJSON(github.event.inputs.aw_context || github.event.client_payload.aw_context || '{}').item_number) }} GH_AW_EXPR_463A214A: ${{ github.event.pull_request.number || (fromJSON(github.event.inputs.aw_context || github.event.client_payload.aw_context || '{}').item_type == 'pull_request' && fromJSON(github.event.inputs.aw_context || github.event.client_payload.aw_context || '{}').item_number) }} GH_AW_EXPR_802A9F6A: ${{ github.event.issue.number || (fromJSON(github.event.inputs.aw_context || github.event.client_payload.aw_context || '{}').item_type == 'issue' && fromJSON(github.event.inputs.aw_context || github.event.client_payload.aw_context || '{}').item_number) }} @@ -389,7 +352,6 @@ jobs: GH_AW_GITHUB_REPOSITORY: ${{ github.repository }} GH_AW_GITHUB_RUN_ID: ${{ github.run_id }} GH_AW_GITHUB_SERVER_URL: ${{ github.server_url }} - GH_AW_EXPERIMENTS_SUB_AGENT_DECOMPOSITION: ${{ steps.pick-experiment.outputs.sub_agent_decomposition }} with: script: | const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); @@ -403,7 +365,6 @@ jobs: GH_AW_ALLOWED_EXTENSIONS: '' GH_AW_CACHE_DESCRIPTION: '' GH_AW_CACHE_DIR: '/tmp/gh-aw/cache-memory/' - GH_AW_EXPERIMENTS_SUB_AGENT_DECOMPOSITION: ${{ steps.pick-experiment.outputs.sub_agent_decomposition }} GH_AW_EXPR_1A3A194A: ${{ github.event.discussion.number || (fromJSON(github.event.inputs.aw_context || github.event.client_payload.aw_context || '{}').item_type == 'discussion' && fromJSON(github.event.inputs.aw_context || github.event.client_payload.aw_context || '{}').item_number) }} GH_AW_EXPR_463A214A: ${{ github.event.pull_request.number || (fromJSON(github.event.inputs.aw_context || github.event.client_payload.aw_context || '{}').item_type == 'pull_request' && fromJSON(github.event.inputs.aw_context || github.event.client_payload.aw_context || '{}').item_number) }} GH_AW_EXPR_802A9F6A: ${{ github.event.issue.number || (fromJSON(github.event.inputs.aw_context || github.event.client_payload.aw_context || '{}').item_type == 'issue' && fromJSON(github.event.inputs.aw_context || github.event.client_payload.aw_context || '{}').item_number) }} @@ -431,7 +392,6 @@ jobs: GH_AW_ALLOWED_EXTENSIONS: process.env.GH_AW_ALLOWED_EXTENSIONS, GH_AW_CACHE_DESCRIPTION: process.env.GH_AW_CACHE_DESCRIPTION, GH_AW_CACHE_DIR: process.env.GH_AW_CACHE_DIR, - GH_AW_EXPERIMENTS_SUB_AGENT_DECOMPOSITION: process.env.GH_AW_EXPERIMENTS_SUB_AGENT_DECOMPOSITION, GH_AW_EXPR_1A3A194A: process.env.GH_AW_EXPR_1A3A194A, GH_AW_EXPR_463A214A: process.env.GH_AW_EXPR_463A214A, GH_AW_EXPR_802A9F6A: process.env.GH_AW_EXPR_802A9F6A, @@ -1151,7 +1111,6 @@ jobs: - activation - agent - detection - - push_experiments_state - safe_outputs - update_cache_memory if: > @@ -1379,12 +1338,6 @@ jobs: mkdir -p /tmp/gh-aw/ find "/tmp/gh-aw/" -type f -print echo "GH_AW_AGENT_OUTPUT=/tmp/gh-aw/agent_output.json" >> "$GITHUB_OUTPUT" - - name: Download experiment artifact - continue-on-error: true - uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 - with: - name: smokepi-experiment - path: /tmp/gh-aw/experiments/ - name: Checkout repository for patch context if: needs.agent.outputs.has_patch == 'true' uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 @@ -1617,83 +1570,6 @@ jobs: const { main } = require('${{ runner.temp }}/gh-aw/actions/check_command_position.cjs'); await main(); - push_experiments_state: - needs: activation - if: always() && (!cancelled()) && needs.activation.result == 'success' - runs-on: ubuntu-slim - permissions: - contents: write - steps: - - name: Checkout actions folder - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - repository: github/gh-aw - sparse-checkout: | - actions - persist-credentials: false - - name: Setup Scripts - id: setup - uses: ./actions/setup - with: - destination: ${{ runner.temp }}/gh-aw/actions - job-name: ${{ github.job }} - trace-id: ${{ needs.activation.outputs.setup-trace-id }} - parent-span-id: ${{ needs.activation.outputs.setup-parent-span-id || needs.activation.outputs.setup-span-id }} - env: - GH_AW_SETUP_WORKFLOW_NAME: "Smoke Pi" - GH_AW_CURRENT_WORKFLOW_REF: ${{ github.repository }}/.github/workflows/smoke-pi.lock.yml@${{ github.ref }} - GH_AW_INFO_VERSION: "0.75.4" - GH_AW_INFO_ENGINE_ID: "pi" - - name: Checkout repository - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - persist-credentials: false - sparse-checkout: . - - name: Configure Git credentials - env: - REPO_NAME: ${{ github.repository }} - SERVER_URL: ${{ github.server_url }} - GITHUB_TOKEN: ${{ github.token }} - run: | - git config --global user.email "github-actions[bot]@users.noreply.github.com" - git config --global user.name "github-actions[bot]" - git config --global am.keepcr true - # Re-authenticate git with GitHub token - SERVER_URL_STRIPPED="${SERVER_URL#https://}" - git remote set-url origin "https://x-access-token:${GITHUB_TOKEN}@${SERVER_URL_STRIPPED}/${REPO_NAME}.git" - echo "Git configured with standard GitHub Actions identity" - - name: Download experiment artifact - uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 - continue-on-error: true - with: - name: smokepi-experiment - path: /tmp/gh-aw/experiments - - name: Push experiment state to git - id: push_experiments_state - if: always() - uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 - env: - GH_TOKEN: ${{ github.token }} - GITHUB_RUN_ID: ${{ github.run_id }} - GITHUB_SERVER_URL: ${{ github.server_url }} - GH_AW_EXPERIMENT_STATE_DIR: /tmp/gh-aw/experiments - GH_AW_EXPERIMENT_BRANCH: experiments/smokepi - with: - script: | - const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); - setupGlobals(core, github, context, exec, io, getOctokit); - const { main } = require('${{ runner.temp }}/gh-aw/actions/push_experiment_state.cjs'); - await main(); - - name: Restore actions folder - if: always() - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - repository: github/gh-aw - sparse-checkout: | - actions/setup - sparse-checkout-cone-mode: true - persist-credentials: false - safe_outputs: needs: - activation diff --git a/.github/workflows/smoke-pi.md b/.github/workflows/smoke-pi.md index 34bf71c1ee8..be286adbb06 100644 --- a/.github/workflows/smoke-pi.md +++ b/.github/workflows/smoke-pi.md @@ -17,24 +17,6 @@ permissions: issues: read pull-requests: read name: Smoke Pi -experiments: - sub_agent_decomposition: - variants: [single_agent, parallel_sub_agents] - description: "Test whether decomposing smoke tests into parallel sub-agents reduces token cost" - hypothesis: "H0: no change in effective token consumption. H1: parallel sub-agents reduce tokens by 15-25% by eliminating unnecessary context sharing" - metric: effective_token_count - secondary_metrics: [run_duration_seconds, test_pass_rate, false_failure_rate] - guardrail_metrics: - - name: test_completion_rate - threshold: ">=0.95" - - name: overall_pass_rate - threshold: ">=0.80" - min_samples: 20 - weight: [50, 50] - start_date: "2026-05-22" - analysis_type: mann_whitney - tags: [cost_optimization, smoke_tests, pi_engine] - # issue: PLACEHOLDER_ISSUE_NUMBER engine: id: pi model: copilot/gpt-5.4 @@ -96,18 +78,6 @@ timeout-minutes: 10 ## Test Requirements -{{#if experiments.sub_agent_decomposition == 'parallel_sub_agents'}} -Launch five parallel `task` agents using mode: "background" to execute each smoke test independently. Use the `task` agent type with `description` field for each: - -1. **GitHub MCP Test Agent**: Fetch 2 merged PR titles from ${{ github.repository }} -2. **Web Fetch Test Agent**: Fetch https://github.com and verify "GitHub" in response using web-fetch MCP -3. **File I/O Test Agent**: Create `/tmp/gh-aw/agent/smoke-test-pi-${{ github.run_id }}.txt` with timestamp -4. **Bash Test Agent**: Verify file creation with `cat` command -5. **Build Test Agent**: Run `GOCACHE=/tmp/gh-aw/agent/go-cache GOMODCACHE=/tmp/gh-aw/agent/go-mod make build` - -Wait for all five agents to complete (you'll receive notifications). Read each agent's result using `read_agent`. Aggregate the results into a unified report with ✅/❌ status for each test. - -{{else}} Execute the following tests sequentially in a single turn: 1. **GitHub MCP Testing**: Use GitHub MCP tools to fetch details of exactly 2 merged pull requests from ${{ github.repository }} (title and number only) @@ -115,7 +85,6 @@ Execute the following tests sequentially in a single turn: 3. **File Writing Testing**: Create a test file `/tmp/gh-aw/agent/smoke-test-pi-${{ github.run_id }}.txt` with content "Smoke test passed for Pi at $(date)" (create the directory if it doesn't exist) 4. **Bash Tool Testing**: Execute bash commands to verify file creation was successful (use `cat` to read the file back) 5. **Build gh-aw**: Run `GOCACHE=/tmp/gh-aw/agent/go-cache GOMODCACHE=/tmp/gh-aw/agent/go-mod make build` to verify the agent can successfully build the gh-aw project. If the command fails, mark this test as ❌ and report the failure. -{{/if}} ## Output