diff --git a/.github/workflows/agent-persona-explorer.lock.yml b/.github/workflows/agent-persona-explorer.lock.yml index 9271d0b4bcf..879f5f24d82 100644 --- a/.github/workflows/agent-persona-explorer.lock.yml +++ b/.github/workflows/agent-persona-explorer.lock.yml @@ -1,4 +1,4 @@ -# gh-aw-metadata: {"schema_version":"v4","frontmatter_hash":"25fe5e7273b1406c47d515f58cbeaf0494139d2224ad1761e4821fc8c42d8d5f","body_hash":"a4f85a8089bdb5f739ae612b04615c1360dec7817e3350094f5cfbb882289349","strict":true,"agent_id":"copilot","agent_model":"gpt-5.4-mini","engine_versions":{"copilot":"1.0.60"}} +# gh-aw-metadata: {"schema_version":"v4","frontmatter_hash":"629ed7e00fec91181d992879919c820de3edb7f7fcce47972b810dfd3c43c8ac","body_hash":"a4f85a8089bdb5f739ae612b04615c1360dec7817e3350094f5cfbb882289349","strict":true,"agent_id":"copilot","agent_model":"gpt-5.4-mini","engine_versions":{"copilot":"1.0.60"}} # gh-aw-manifest: {"version":1,"secrets":["COPILOT_GITHUB_TOKEN","GH_AW_GITHUB_MCP_SERVER_TOKEN","GH_AW_GITHUB_TOKEN","GH_AW_OTEL_GRAFANA_AUTHORIZATION","GH_AW_OTEL_GRAFANA_ENDPOINT","GH_AW_OTEL_SENTRY_AUTHORIZATION","GH_AW_OTEL_SENTRY_ENDPOINT","GITHUB_TOKEN"],"actions":[{"repo":"actions/cache/restore","sha":"27d5ce7f107fe9357f9df03efb73ab90386fccae","version":"v5.0.5"},{"repo":"actions/cache/save","sha":"27d5ce7f107fe9357f9df03efb73ab90386fccae","version":"v5.0.5"},{"repo":"actions/checkout","sha":"df4cb1c069e1874edd31b4311f1884172cec0e10","version":"v6.0.3"},{"repo":"actions/download-artifact","sha":"3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c","version":"v8.0.1"},{"repo":"actions/github-script","sha":"3a2844b7e9c422d3c10d287c895573f7108da1b3","version":"v9.0.0"},{"repo":"actions/setup-go","sha":"4a3601121dd01d1626a1e23e37211e3254c1c06c","version":"v6.4.0"},{"repo":"actions/setup-node","sha":"48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e","version":"v6.4.0"},{"repo":"actions/upload-artifact","sha":"043fb46d1a93c77aae656e7c1c64a875d1fc6a0a","version":"v7.0.1"},{"repo":"docker/build-push-action","sha":"f9f3042f7e2789586610d6e8b85c8f03e5195baf","version":"v7.2.0"},{"repo":"docker/setup-buildx-action","sha":"d7f5e7f509e45cec5c76c4d5afdd7de93d0b3df5","version":"v4.1.0"}],"containers":[{"image":"ghcr.io/github/gh-aw-firewall/agent:0.25.65"},{"image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.25.65"},{"image":"ghcr.io/github/gh-aw-firewall/squid:0.25.65"},{"image":"ghcr.io/github/gh-aw-mcpg:v0.3.23","digest":"sha256:0dd1bd91a41e24a3ccc31b1ec6cb61d36608997fabf91f2d643b64e3fc33180a","pinned_image":"ghcr.io/github/gh-aw-mcpg:v0.3.23@sha256:0dd1bd91a41e24a3ccc31b1ec6cb61d36608997fabf91f2d643b64e3fc33180a"},{"image":"ghcr.io/github/github-mcp-server:v1.1.2","digest":"sha256:30197479d8036c7811892bc07e06f9a05c9ef3cdd79bc59f256d50647f95788c","pinned_image":"ghcr.io/github/github-mcp-server:v1.1.2@sha256:30197479d8036c7811892bc07e06f9a05c9ef3cdd79bc59f256d50647f95788c"},{"image":"node:lts-alpine","digest":"sha256:2bdb65ed1dab192432bc31c95f94155ca5ad7fc1392fb7eb7526ab682fa5bf14","pinned_image":"node:lts-alpine@sha256:2bdb65ed1dab192432bc31c95f94155ca5ad7fc1392fb7eb7526ab682fa5bf14"}]} # ___ _ _ # / _ \ | | (_) @@ -236,7 +236,7 @@ jobs: id: pick-experiment uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 env: - GH_AW_EXPERIMENT_SPEC: '{"sub_agent_strategy":{"variants":["per_scenario","batch"],"description":"Test whether batch scenario testing reduces token costs vs. per-scenario sub-agent calls","hypothesis":"H0: no change in effective_tokens or duration. H1: batch reduces tokens by ≥20% and duration by ≥15% without quality loss","metric":"effective_tokens","secondary_metrics":["run_duration_minutes","scenarios_tested","output_quality_score"],"guardrail_metrics":[{"name":"discussion_created","threshold":"==1"},{"name":"scenarios_analyzed","threshold":"\u003e=3"}],"min_samples":14,"weight":[50,50],"start_date":"2026-05-22","analysis_type":"t_test","tags":["cost_optimization","token_efficiency","sub_agents"]}}' + GH_AW_EXPERIMENT_SPEC: '{"sub_agent_strategy":{"variants":["per_scenario","batch"],"description":"Test whether batch scenario testing reduces token costs vs. per-scenario sub-agent calls","hypothesis":"H0: no change in effective_tokens or duration. H1: batch reduces tokens by ≥20% and duration by ≥15% without quality loss","metric":"effective_tokens","secondary_metrics":["run_duration_minutes","scenarios_tested","output_quality_score"],"guardrail_metrics":[{"name":"issue_created","threshold":"==1"},{"name":"scenarios_analyzed","threshold":"\u003e=3"}],"min_samples":14,"weight":[50,50],"start_date":"2026-05-22","analysis_type":"t_test","tags":["cost_optimization","token_efficiency","sub_agents"]}}' GH_AW_EXPERIMENT_STATE_FILE: /tmp/gh-aw/experiments/state.json GH_AW_EXPERIMENT_STATE_DIR: /tmp/gh-aw/experiments with: @@ -413,7 +413,6 @@ jobs: permissions: actions: read contents: read - discussions: read issues: read pull-requests: read concurrency: diff --git a/.github/workflows/agent-persona-explorer.md b/.github/workflows/agent-persona-explorer.md index c17b1cca5a1..62900211192 100644 --- a/.github/workflows/agent-persona-explorer.md +++ b/.github/workflows/agent-persona-explorer.md @@ -11,7 +11,6 @@ permissions: actions: read issues: read pull-requests: read - discussions: read experiments: sub_agent_strategy: variants: [per_scenario, batch] @@ -20,7 +19,7 @@ experiments: metric: effective_tokens secondary_metrics: [run_duration_minutes, scenarios_tested, output_quality_score] guardrail_metrics: - - name: discussion_created + - name: issue_created threshold: "==1" - name: scenarios_analyzed threshold: ">=3"