diff --git a/.github/workflows/daily-doc-healer.lock.yml b/.github/workflows/daily-doc-healer.lock.yml index 277e5670b46..8001e5d2621 100644 --- a/.github/workflows/daily-doc-healer.lock.yml +++ b/.github/workflows/daily-doc-healer.lock.yml @@ -1,4 +1,4 @@ -# gh-aw-metadata: {"schema_version":"v4","frontmatter_hash":"3d2e47dd11ebabd35323240be1f7ca259ebd0392cc4858fca4aa1d44399be955","body_hash":"17d342d9eb1eb81fe8ee6dc242c2e7dd96fbdcba013840231269807407639fa6","strict":true,"agent_id":"claude","agent_model":"${{ needs.activation.outputs.model_size }}","engine_versions":{"claude":"2.1.165"}} +# gh-aw-metadata: {"schema_version":"v4","frontmatter_hash":"d3f6a69ca5be819b9d74f2f739698f14b229ff5c6249655b6dc8485d335175e5","body_hash":"17d342d9eb1eb81fe8ee6dc242c2e7dd96fbdcba013840231269807407639fa6","strict":true,"agent_id":"claude","agent_model":"${{ needs.activation.outputs.model_size }}","engine_versions":{"claude":"2.1.165"}} # gh-aw-manifest: {"version":1,"secrets":["ANTHROPIC_API_KEY","GH_AW_AGENT_TOKEN","GH_AW_CI_TRIGGER_TOKEN","GH_AW_GITHUB_MCP_SERVER_TOKEN","GH_AW_GITHUB_TOKEN","GH_AW_OTEL_GRAFANA_AUTHORIZATION","GH_AW_OTEL_GRAFANA_ENDPOINT","GH_AW_OTEL_SENTRY_AUTHORIZATION","GH_AW_OTEL_SENTRY_ENDPOINT","GITHUB_TOKEN"],"actions":[{"repo":"actions/cache/restore","sha":"27d5ce7f107fe9357f9df03efb73ab90386fccae","version":"v5.0.5"},{"repo":"actions/cache/save","sha":"27d5ce7f107fe9357f9df03efb73ab90386fccae","version":"v5.0.5"},{"repo":"actions/checkout","sha":"de0fac2e4500dabe0009e67214ff5f5447ce83dd","version":"v6.0.2"},{"repo":"actions/download-artifact","sha":"3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c","version":"v8.0.1"},{"repo":"actions/github-script","sha":"3a2844b7e9c422d3c10d287c895573f7108da1b3","version":"v9.0.0"},{"repo":"actions/setup-node","sha":"48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e","version":"v6.4.0"},{"repo":"actions/upload-artifact","sha":"043fb46d1a93c77aae656e7c1c64a875d1fc6a0a","version":"v7.0.1"}],"containers":[{"image":"ghcr.io/github/gh-aw-firewall/agent:0.25.65"},{"image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.25.65"},{"image":"ghcr.io/github/gh-aw-firewall/cli-proxy:0.25.65"},{"image":"ghcr.io/github/gh-aw-firewall/squid:0.25.65"},{"image":"ghcr.io/github/gh-aw-mcpg:v0.3.23","digest":"sha256:0dd1bd91a41e24a3ccc31b1ec6cb61d36608997fabf91f2d643b64e3fc33180a","pinned_image":"ghcr.io/github/gh-aw-mcpg:v0.3.23@sha256:0dd1bd91a41e24a3ccc31b1ec6cb61d36608997fabf91f2d643b64e3fc33180a"},{"image":"ghcr.io/github/github-mcp-server:v1.1.2","digest":"sha256:30197479d8036c7811892bc07e06f9a05c9ef3cdd79bc59f256d50647f95788c","pinned_image":"ghcr.io/github/github-mcp-server:v1.1.2@sha256:30197479d8036c7811892bc07e06f9a05c9ef3cdd79bc59f256d50647f95788c"},{"image":"node:lts-alpine","digest":"sha256:2bdb65ed1dab192432bc31c95f94155ca5ad7fc1392fb7eb7526ab682fa5bf14","pinned_image":"node:lts-alpine@sha256:2bdb65ed1dab192432bc31c95f94155ca5ad7fc1392fb7eb7526ab682fa5bf14"}]} # ___ _ _ # / _ \ | | (_) @@ -238,7 +238,7 @@ jobs: id: pick-experiment uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 env: - GH_AW_EXPERIMENT_SPEC: '{"model_size":{"variants":["claude-sonnet-4-6","claude-haiku-4-5-20251001"],"description":"Tests whether Claude Haiku detects and corrects documentation gaps with equivalent quality at lower token cost versus Claude Sonnet.","hypothesis":"H0: no change in issue/PR creation rate or run success rate. H1: Claude Haiku reduces effective token usage \u003e=30% with equivalent run success rate (\u003e=0.90).","metric":"effective_tokens_total","secondary_metrics":["run_success_rate","run_duration_ms"],"guardrail_metrics":[{"name":"run_success_rate","threshold":"\u003e=0.90"},{"name":"empty_output_rate","threshold":"\u003c=0.10"}],"min_samples":20,"weight":[50,50],"start_date":"2026-06-04"}}' + GH_AW_EXPERIMENT_SPEC: '{"model_size":{"variants":["claude-sonnet-4.6","claude-haiku-4.5"],"description":"Tests whether Claude Haiku detects and corrects documentation gaps with equivalent quality at lower token cost versus Claude Sonnet.","hypothesis":"H0: no change in issue/PR creation rate or run success rate. H1: Claude Haiku reduces effective token usage \u003e=30% with equivalent run success rate (\u003e=0.90).","metric":"effective_tokens_total","secondary_metrics":["run_success_rate","run_duration_ms"],"guardrail_metrics":[{"name":"run_success_rate","threshold":"\u003e=0.90"},{"name":"empty_output_rate","threshold":"\u003c=0.10"}],"min_samples":20,"weight":[50,50],"start_date":"2026-06-04"}}' GH_AW_EXPERIMENT_STATE_FILE: /tmp/gh-aw/experiments/state.json GH_AW_EXPERIMENT_STATE_DIR: /tmp/gh-aw/experiments with: diff --git a/.github/workflows/daily-doc-healer.md b/.github/workflows/daily-doc-healer.md index dc93bb29ad2..f4dda3c093b 100644 --- a/.github/workflows/daily-doc-healer.md +++ b/.github/workflows/daily-doc-healer.md @@ -43,7 +43,7 @@ name: Daily Documentation Healer strict: true experiments: model_size: - variants: [claude-sonnet-4-6, claude-haiku-4-5-20251001] + variants: [claude-sonnet-4.6, claude-haiku-4.5] description: "Tests whether Claude Haiku detects and corrects documentation gaps with equivalent quality at lower token cost versus Claude Sonnet." hypothesis: "H0: no change in issue/PR creation rate or run success rate. H1: Claude Haiku reduces effective token usage >=30% with equivalent run success rate (>=0.90)." metric: effective_tokens_total