diff --git a/.github/workflows/smoke-pi.lock.yml b/.github/workflows/smoke-pi.lock.yml index 193a857b582..46d94495407 100644 --- a/.github/workflows/smoke-pi.lock.yml +++ b/.github/workflows/smoke-pi.lock.yml @@ -1,4 +1,4 @@ -# gh-aw-metadata: {"schema_version":"v4","frontmatter_hash":"76edb0c56e7dab4c1bb7014bf5cf7f4f7895c49fb63dc0e187903320f5221218","body_hash":"07ea62dc335d2b124062d5af97b2e7ea5f4ec4305c9829a2fd52b147247c8902","strict":true,"agent_id":"pi","agent_model":"copilot/gpt-5.4","engine_versions":{"pi":"0.75.4"}} +# gh-aw-metadata: {"schema_version":"v4","frontmatter_hash":"c27dd49ba5a0550b86685eb3a79e7a15ecd1e4593628c2537d24422ea734428d","body_hash":"b7512ab2cce72ecfc286e40eeef3b439626c3a58871465c037547f21c2de5d1f","strict":true,"agent_id":"pi","agent_model":"copilot/gpt-5.4","engine_versions":{"pi":"0.75.4"}} # gh-aw-manifest: {"version":1,"secrets":["COPILOT_GITHUB_TOKEN","GH_AW_GITHUB_MCP_SERVER_TOKEN","GH_AW_GITHUB_TOKEN","GH_AW_OTEL_GRAFANA_AUTHORIZATION","GH_AW_OTEL_GRAFANA_ENDPOINT","GH_AW_OTEL_SENTRY_AUTHORIZATION","GH_AW_OTEL_SENTRY_ENDPOINT","GITHUB_TOKEN"],"actions":[{"repo":"actions/cache/restore","sha":"27d5ce7f107fe9357f9df03efb73ab90386fccae","version":"v5.0.5"},{"repo":"actions/cache/save","sha":"27d5ce7f107fe9357f9df03efb73ab90386fccae","version":"v5.0.5"},{"repo":"actions/checkout","sha":"de0fac2e4500dabe0009e67214ff5f5447ce83dd","version":"v6.0.2"},{"repo":"actions/download-artifact","sha":"3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c","version":"v8.0.1"},{"repo":"actions/github-script","sha":"3a2844b7e9c422d3c10d287c895573f7108da1b3","version":"v9.0.0"},{"repo":"actions/setup-node","sha":"48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e","version":"v6.4.0"},{"repo":"actions/upload-artifact","sha":"043fb46d1a93c77aae656e7c1c64a875d1fc6a0a","version":"v7.0.1"}],"containers":[{"image":"ghcr.io/github/gh-aw-firewall/agent:0.25.65"},{"image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.25.65"},{"image":"ghcr.io/github/gh-aw-firewall/cli-proxy:0.25.65"},{"image":"ghcr.io/github/gh-aw-firewall/squid:0.25.65"},{"image":"ghcr.io/github/gh-aw-mcpg:v0.3.23","digest":"sha256:0dd1bd91a41e24a3ccc31b1ec6cb61d36608997fabf91f2d643b64e3fc33180a","pinned_image":"ghcr.io/github/gh-aw-mcpg:v0.3.23@sha256:0dd1bd91a41e24a3ccc31b1ec6cb61d36608997fabf91f2d643b64e3fc33180a"},{"image":"ghcr.io/github/github-mcp-server:v1.1.2","digest":"sha256:30197479d8036c7811892bc07e06f9a05c9ef3cdd79bc59f256d50647f95788c","pinned_image":"ghcr.io/github/github-mcp-server:v1.1.2@sha256:30197479d8036c7811892bc07e06f9a05c9ef3cdd79bc59f256d50647f95788c"},{"image":"node:lts-alpine","digest":"sha256:2bdb65ed1dab192432bc31c95f94155ca5ad7fc1392fb7eb7526ab682fa5bf14","pinned_image":"node:lts-alpine@sha256:2bdb65ed1dab192432bc31c95f94155ca5ad7fc1392fb7eb7526ab682fa5bf14"}]} # ___ _ _ # / _ \ | | (_) @@ -113,7 +113,6 @@ jobs: daily_effective_workflow_threshold: ${{ steps.daily-effective-workflow-guardrail.outputs.daily_effective_workflow_threshold || '' }} daily_effective_workflow_total_effective_tokens: ${{ steps.daily-effective-workflow-guardrail.outputs.daily_effective_workflow_total_effective_tokens || '' }} engine_id: ${{ steps.generate_aw_info.outputs.engine_id }} - experiments: ${{ steps.pick-experiment.outputs.experiments }} lockdown_check_failed: ${{ steps.generate_aw_info.outputs.lockdown_check_failed == 'true' }} model: ${{ steps.generate_aw_info.outputs.model }} secret_verification_result: ${{ steps.validate-secret.outputs.verification_result }} @@ -122,7 +121,6 @@ jobs: setup-trace-id: ${{ steps.setup.outputs.trace-id }} slash_command: ${{ needs.pre_activation.outputs.matched_command }} stale_lock_file_failed: ${{ steps.check-lock-file.outputs.stale_lock_file_failed == 'true' }} - sub_agent_decomposition: ${{ steps.pick-experiment.outputs.sub_agent_decomposition }} text: ${{ steps.sanitized.outputs.text }} title: ${{ steps.sanitized.outputs.title }} steps: @@ -270,45 +268,10 @@ jobs: setupGlobals(core, github, context, exec, io, getOctokit); const { main } = require('${{ runner.temp }}/gh-aw/actions/add_workflow_run_comment.cjs'); await main(); - - name: Restore experiment state from git - id: restore-experiment-state - uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 - env: - GH_AW_EXPERIMENT_STATE_FILE: /tmp/gh-aw/experiments/state.json - GH_AW_EXPERIMENT_STATE_DIR: /tmp/gh-aw/experiments - GH_AW_EXPERIMENT_BRANCH: experiments/smokepi - with: - script: | - const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); - setupGlobals(core, github, context, exec, io, getOctokit); - const { main } = require('${{ runner.temp }}/gh-aw/actions/load_experiment_state_from_repo.cjs'); - await main(); - - name: Pick experiment variants - id: pick-experiment - uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 - env: - GH_AW_EXPERIMENT_SPEC: '{"sub_agent_decomposition":{"variants":["single_agent","parallel_sub_agents"],"description":"Test whether decomposing smoke tests into parallel sub-agents reduces token cost","hypothesis":"H0: no change in effective token consumption. H1: parallel sub-agents reduce tokens by 15-25% by eliminating unnecessary context sharing","metric":"effective_token_count","secondary_metrics":["run_duration_seconds","test_pass_rate","false_failure_rate"],"guardrail_metrics":[{"name":"test_completion_rate","threshold":"\u003e=0.95"},{"name":"overall_pass_rate","threshold":"\u003e=0.80"}],"min_samples":20,"weight":[50,50],"start_date":"2026-05-22","analysis_type":"mann_whitney","tags":["cost_optimization","smoke_tests","pi_engine"]}}' - GH_AW_EXPERIMENT_STATE_FILE: /tmp/gh-aw/experiments/state.json - GH_AW_EXPERIMENT_STATE_DIR: /tmp/gh-aw/experiments - with: - script: | - const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); - setupGlobals(core, github, context, exec, io, getOctokit); - const { main } = require('${{ runner.temp }}/gh-aw/actions/pick_experiment.cjs'); - await main(); - - name: Upload experiment artifact - if: always() - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 - with: - name: smokepi-experiment - path: /tmp/gh-aw/experiments - if-no-files-found: ignore - retention-days: 30 - name: Create prompt with built-in context env: GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt GH_AW_SAFE_OUTPUTS: ${{ runner.temp }}/gh-aw/safeoutputs/outputs.jsonl - GH_AW_EXPERIMENTS_SUB_AGENT_DECOMPOSITION: ${{ steps.pick-experiment.outputs.sub_agent_decomposition }} GH_AW_EXPR_1A3A194A: ${{ github.event.discussion.number || (fromJSON(github.event.inputs.aw_context || github.event.client_payload.aw_context || '{}').item_type == 'discussion' && fromJSON(github.event.inputs.aw_context || github.event.client_payload.aw_context || '{}').item_number) }} GH_AW_EXPR_463A214A: ${{ github.event.pull_request.number || (fromJSON(github.event.inputs.aw_context || github.event.client_payload.aw_context || '{}').item_type == 'pull_request' && fromJSON(github.event.inputs.aw_context || github.event.client_payload.aw_context || '{}').item_number) }} GH_AW_EXPR_802A9F6A: ${{ github.event.issue.number || (fromJSON(github.event.inputs.aw_context || github.event.client_payload.aw_context || '{}').item_type == 'issue' && fromJSON(github.event.inputs.aw_context || github.event.client_payload.aw_context || '{}').item_number) }} @@ -389,7 +352,6 @@ jobs: GH_AW_GITHUB_REPOSITORY: ${{ github.repository }} GH_AW_GITHUB_RUN_ID: ${{ github.run_id }} GH_AW_GITHUB_SERVER_URL: ${{ github.server_url }} - GH_AW_EXPERIMENTS_SUB_AGENT_DECOMPOSITION: ${{ steps.pick-experiment.outputs.sub_agent_decomposition }} with: script: | const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); @@ -403,7 +365,6 @@ jobs: GH_AW_ALLOWED_EXTENSIONS: '' GH_AW_CACHE_DESCRIPTION: '' GH_AW_CACHE_DIR: '/tmp/gh-aw/cache-memory/' - GH_AW_EXPERIMENTS_SUB_AGENT_DECOMPOSITION: ${{ steps.pick-experiment.outputs.sub_agent_decomposition }} GH_AW_EXPR_1A3A194A: ${{ github.event.discussion.number || (fromJSON(github.event.inputs.aw_context || github.event.client_payload.aw_context || '{}').item_type == 'discussion' && fromJSON(github.event.inputs.aw_context || github.event.client_payload.aw_context || '{}').item_number) }} GH_AW_EXPR_463A214A: ${{ github.event.pull_request.number || (fromJSON(github.event.inputs.aw_context || github.event.client_payload.aw_context || '{}').item_type == 'pull_request' && fromJSON(github.event.inputs.aw_context || github.event.client_payload.aw_context || '{}').item_number) }} GH_AW_EXPR_802A9F6A: ${{ github.event.issue.number || (fromJSON(github.event.inputs.aw_context || github.event.client_payload.aw_context || '{}').item_type == 'issue' && fromJSON(github.event.inputs.aw_context || github.event.client_payload.aw_context || '{}').item_number) }} @@ -431,7 +392,6 @@ jobs: GH_AW_ALLOWED_EXTENSIONS: process.env.GH_AW_ALLOWED_EXTENSIONS, GH_AW_CACHE_DESCRIPTION: process.env.GH_AW_CACHE_DESCRIPTION, GH_AW_CACHE_DIR: process.env.GH_AW_CACHE_DIR, - GH_AW_EXPERIMENTS_SUB_AGENT_DECOMPOSITION: process.env.GH_AW_EXPERIMENTS_SUB_AGENT_DECOMPOSITION, GH_AW_EXPR_1A3A194A: process.env.GH_AW_EXPR_1A3A194A, GH_AW_EXPR_463A214A: process.env.GH_AW_EXPR_463A214A, GH_AW_EXPR_802A9F6A: process.env.GH_AW_EXPR_802A9F6A, @@ -1151,7 +1111,6 @@ jobs: - activation - agent - detection - - push_experiments_state - safe_outputs - update_cache_memory if: > @@ -1379,12 +1338,6 @@ jobs: mkdir -p /tmp/gh-aw/ find "/tmp/gh-aw/" -type f -print echo "GH_AW_AGENT_OUTPUT=/tmp/gh-aw/agent_output.json" >> "$GITHUB_OUTPUT" - - name: Download experiment artifact - continue-on-error: true - uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 - with: - name: smokepi-experiment - path: /tmp/gh-aw/experiments/ - name: Checkout repository for patch context if: needs.agent.outputs.has_patch == 'true' uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 @@ -1617,83 +1570,6 @@ jobs: const { main } = require('${{ runner.temp }}/gh-aw/actions/check_command_position.cjs'); await main(); - push_experiments_state: - needs: activation - if: always() && (!cancelled()) && needs.activation.result == 'success' - runs-on: ubuntu-slim - permissions: - contents: write - steps: - - name: Checkout actions folder - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - repository: github/gh-aw - sparse-checkout: | - actions - persist-credentials: false - - name: Setup Scripts - id: setup - uses: ./actions/setup - with: - destination: ${{ runner.temp }}/gh-aw/actions - job-name: ${{ github.job }} - trace-id: ${{ needs.activation.outputs.setup-trace-id }} - parent-span-id: ${{ needs.activation.outputs.setup-parent-span-id || needs.activation.outputs.setup-span-id }} - env: - GH_AW_SETUP_WORKFLOW_NAME: "Smoke Pi" - GH_AW_CURRENT_WORKFLOW_REF: ${{ github.repository }}/.github/workflows/smoke-pi.lock.yml@${{ github.ref }} - GH_AW_INFO_VERSION: "0.75.4" - GH_AW_INFO_ENGINE_ID: "pi" - - name: Checkout repository - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - persist-credentials: false - sparse-checkout: . - - name: Configure Git credentials - env: - REPO_NAME: ${{ github.repository }} - SERVER_URL: ${{ github.server_url }} - GITHUB_TOKEN: ${{ github.token }} - run: | - git config --global user.email "github-actions[bot]@users.noreply.github.com" - git config --global user.name "github-actions[bot]" - git config --global am.keepcr true - # Re-authenticate git with GitHub token - SERVER_URL_STRIPPED="${SERVER_URL#https://}" - git remote set-url origin "https://x-access-token:${GITHUB_TOKEN}@${SERVER_URL_STRIPPED}/${REPO_NAME}.git" - echo "Git configured with standard GitHub Actions identity" - - name: Download experiment artifact - uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 - continue-on-error: true - with: - name: smokepi-experiment - path: /tmp/gh-aw/experiments - - name: Push experiment state to git - id: push_experiments_state - if: always() - uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 - env: - GH_TOKEN: ${{ github.token }} - GITHUB_RUN_ID: ${{ github.run_id }} - GITHUB_SERVER_URL: ${{ github.server_url }} - GH_AW_EXPERIMENT_STATE_DIR: /tmp/gh-aw/experiments - GH_AW_EXPERIMENT_BRANCH: experiments/smokepi - with: - script: | - const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); - setupGlobals(core, github, context, exec, io, getOctokit); - const { main } = require('${{ runner.temp }}/gh-aw/actions/push_experiment_state.cjs'); - await main(); - - name: Restore actions folder - if: always() - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - repository: github/gh-aw - sparse-checkout: | - actions/setup - sparse-checkout-cone-mode: true - persist-credentials: false - safe_outputs: needs: - activation diff --git a/.github/workflows/smoke-pi.md b/.github/workflows/smoke-pi.md index 34bf71c1ee8..be286adbb06 100644 --- a/.github/workflows/smoke-pi.md +++ b/.github/workflows/smoke-pi.md @@ -17,24 +17,6 @@ permissions: issues: read pull-requests: read name: Smoke Pi -experiments: - sub_agent_decomposition: - variants: [single_agent, parallel_sub_agents] - description: "Test whether decomposing smoke tests into parallel sub-agents reduces token cost" - hypothesis: "H0: no change in effective token consumption. H1: parallel sub-agents reduce tokens by 15-25% by eliminating unnecessary context sharing" - metric: effective_token_count - secondary_metrics: [run_duration_seconds, test_pass_rate, false_failure_rate] - guardrail_metrics: - - name: test_completion_rate - threshold: ">=0.95" - - name: overall_pass_rate - threshold: ">=0.80" - min_samples: 20 - weight: [50, 50] - start_date: "2026-05-22" - analysis_type: mann_whitney - tags: [cost_optimization, smoke_tests, pi_engine] - # issue: PLACEHOLDER_ISSUE_NUMBER engine: id: pi model: copilot/gpt-5.4 @@ -96,18 +78,6 @@ timeout-minutes: 10 ## Test Requirements -{{#if experiments.sub_agent_decomposition == 'parallel_sub_agents'}} -Launch five parallel `task` agents using mode: "background" to execute each smoke test independently. Use the `task` agent type with `description` field for each: - -1. **GitHub MCP Test Agent**: Fetch 2 merged PR titles from ${{ github.repository }} -2. **Web Fetch Test Agent**: Fetch https://github.com and verify "GitHub" in response using web-fetch MCP -3. **File I/O Test Agent**: Create `/tmp/gh-aw/agent/smoke-test-pi-${{ github.run_id }}.txt` with timestamp -4. **Bash Test Agent**: Verify file creation with `cat` command -5. **Build Test Agent**: Run `GOCACHE=/tmp/gh-aw/agent/go-cache GOMODCACHE=/tmp/gh-aw/agent/go-mod make build` - -Wait for all five agents to complete (you'll receive notifications). Read each agent's result using `read_agent`. Aggregate the results into a unified report with ✅/❌ status for each test. - -{{else}} Execute the following tests sequentially in a single turn: 1. **GitHub MCP Testing**: Use GitHub MCP tools to fetch details of exactly 2 merged pull requests from ${{ github.repository }} (title and number only) @@ -115,7 +85,6 @@ Execute the following tests sequentially in a single turn: 3. **File Writing Testing**: Create a test file `/tmp/gh-aw/agent/smoke-test-pi-${{ github.run_id }}.txt` with content "Smoke test passed for Pi at $(date)" (create the directory if it doesn't exist) 4. **Bash Tool Testing**: Execute bash commands to verify file creation was successful (use `cat` to read the file back) 5. **Build gh-aw**: Run `GOCACHE=/tmp/gh-aw/agent/go-cache GOMODCACHE=/tmp/gh-aw/agent/go-mod make build` to verify the agent can successfully build the gh-aw project. If the command fails, mark this test as ❌ and report the failure. -{{/if}} ## Output