From 7f7120ec31352497a095191430226f88dc20a580 Mon Sep 17 00:00:00 2001 From: Mara Nikola Kiefer Date: Wed, 10 Jun 2026 08:36:35 +0200 Subject: [PATCH 1/7] Add objective mapping constants and tests --- .github/objective-mapping.json | 68 +++ .../objective-impact-report.lock.yml | 48 +- .github/workflows/objective-impact-report.md | 92 ++- pkg/cli/README.md | 1 + pkg/cli/audit_report.go | 6 +- pkg/cli/outcome_domain_breakdown.go | 179 ++++++ pkg/cli/outcome_eval.go | 255 +++++++- pkg/cli/outcome_eval_test.go | 85 ++- pkg/cli/outcomes_command.go | 7 +- pkg/cli/outcomes_history.go | 316 ++++++++++ pkg/cli/outcomes_history_test.go | 138 +++++ pkg/github/label_objective_mapping.go | 223 +++++++ .../label_objective_mapping_constants.go | 139 +++++ pkg/github/label_objective_mapping_test.go | 319 ++++++++++ .../objective-mapping-portfolio-reporting.md | 574 ++++++++++++++++++ 15 files changed, 2374 insertions(+), 76 deletions(-) create mode 100644 .github/objective-mapping.json create mode 100644 pkg/cli/outcome_domain_breakdown.go create mode 100644 pkg/cli/outcomes_history.go create mode 100644 pkg/cli/outcomes_history_test.go create mode 100644 pkg/github/label_objective_mapping.go create mode 100644 pkg/github/label_objective_mapping_constants.go create mode 100644 pkg/github/label_objective_mapping_test.go create mode 100644 specs/objective-mapping-portfolio-reporting.md diff --git a/.github/objective-mapping.json b/.github/objective-mapping.json new file mode 100644 index 00000000000..3e37e710e8e --- /dev/null +++ b/.github/objective-mapping.json @@ -0,0 +1,68 @@ +{ + "_comment": "Objective mapping for GitHub labels -> numeric values. This repo now favors root initiative/campaign labels first, with process/component labels kept as lower-value fallbacks.", + "label_to_value": { + "critical": 100, + "p0": 100, + "z_campaign_security-alert-burndown": 95, + "agentic-campaign": 90, + "security": 85, + "automation": 35, + "observability": 70, + "testing": 65, + "test": 65, + "copilot-opt": 60, + "bug": 60, + "security-fix": 60, + "reliability": 55, + "safe-outputs": 55, + "high-priority": 50, + "tool-improvement": 50, + "workflow-optimization": 50, + "improvement": 45, + "workflow": 45, + "mcp": 45, + "cli": 40, + "actions": 40, + "engine": 40, + "automated-analysis": 35, + "dx": 35, + "developer-experience": 35, + "quick-win": 35, + "performance": 30, + "optimization": 30, + "lint-monster": 25, + "maintenance": 25, + "refactoring": 25, + "code-quality": 25, + "automated-fix": 25, + "telemetry": 20, + "enhancement": 20, + "documentation": 10, + "dependencies": 10, + "experiment": 5, + "question": 0, + "good first issue": 0, + "ai-generated": 0, + "ai-inspected": 0 + }, + "multi_label_logic": "max", + "priority_labels": [ + "critical", + "p0", + "z_campaign_security-alert-burndown", + "agentic-campaign", + "security", + "observability", + "testing", + "test", + "safe-outputs", + "tool-improvement", + "workflow-optimization", + "copilot-opt", + "security-fix", + "reliability", + "bug", + "high-priority", + "automation" + ] +} diff --git a/.github/workflows/objective-impact-report.lock.yml b/.github/workflows/objective-impact-report.lock.yml index 255085af3d0..53482873e38 100644 --- a/.github/workflows/objective-impact-report.lock.yml +++ b/.github/workflows/objective-impact-report.lock.yml @@ -1,4 +1,4 @@ -# gh-aw-metadata: {"schema_version":"v4","frontmatter_hash":"01042cb596225bba99024eabdb9c0470acaf12778986721a2c0fd5cfe35d6918","body_hash":"9376c85494af0d8772c701dbc5d180f1e065e60a56b74200be8eddaa3ac082bc","strict":true,"agent_id":"copilot","engine_versions":{"copilot":"1.0.60"}} +# gh-aw-metadata: {"schema_version":"v4","frontmatter_hash":"d2bca470c27b25256be1071d07254e340589733d170b6c9e9f475369fdf2cf2a","body_hash":"a7ad7feeba080f3bfbe37fdb8d59feb9320d4e5efb9d38579f2ee66116e91c3b","strict":true,"agent_id":"copilot","engine_versions":{"copilot":"1.0.60"}} # gh-aw-manifest: {"version":1,"secrets":["COPILOT_GITHUB_TOKEN","GH_AW_GITHUB_MCP_SERVER_TOKEN","GH_AW_GITHUB_TOKEN","GITHUB_TOKEN"],"actions":[{"repo":"actions/checkout","sha":"df4cb1c069e1874edd31b4311f1884172cec0e10","version":"v6.0.3"},{"repo":"actions/download-artifact","sha":"3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c","version":"v8.0.1"},{"repo":"actions/github-script","sha":"3a2844b7e9c422d3c10d287c895573f7108da1b3","version":"v9.0.0"},{"repo":"actions/setup-node","sha":"48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e","version":"v6.4.0"},{"repo":"actions/upload-artifact","sha":"043fb46d1a93c77aae656e7c1c64a875d1fc6a0a","version":"v7.0.1"}],"containers":[{"image":"ghcr.io/github/gh-aw-firewall/agent:0.25.68"},{"image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.25.68"},{"image":"ghcr.io/github/gh-aw-firewall/squid:0.25.68"},{"image":"ghcr.io/github/gh-aw-mcpg:v0.3.25","digest":"sha256:c10331ad17668ef89f38f5e356678788a40b0cd5fef96e8f92e1d9c1de47cbaa","pinned_image":"ghcr.io/github/gh-aw-mcpg:v0.3.25@sha256:c10331ad17668ef89f38f5e356678788a40b0cd5fef96e8f92e1d9c1de47cbaa"},{"image":"ghcr.io/github/github-mcp-server:v1.1.2","digest":"sha256:30197479d8036c7811892bc07e06f9a05c9ef3cdd79bc59f256d50647f95788c","pinned_image":"ghcr.io/github/github-mcp-server:v1.1.2@sha256:30197479d8036c7811892bc07e06f9a05c9ef3cdd79bc59f256d50647f95788c"},{"image":"node:lts-alpine","digest":"sha256:2bdb65ed1dab192432bc31c95f94155ca5ad7fc1392fb7eb7526ab682fa5bf14","pinned_image":"node:lts-alpine@sha256:2bdb65ed1dab192432bc31c95f94155ca5ad7fc1392fb7eb7526ab682fa5bf14"}]} # ___ _ _ # / _ \ | | (_) @@ -206,20 +206,20 @@ jobs: run: | bash "${RUNNER_TEMP}/gh-aw/actions/create_prompt_first.sh" { - cat << 'GH_AW_PROMPT_3b277dc443ecd9c9_EOF' + cat << 'GH_AW_PROMPT_40dfd44b6bf07647_EOF' - GH_AW_PROMPT_3b277dc443ecd9c9_EOF + GH_AW_PROMPT_40dfd44b6bf07647_EOF cat "${RUNNER_TEMP}/gh-aw/prompts/xpia.md" cat "${RUNNER_TEMP}/gh-aw/prompts/temp_folder_prompt.md" cat "${RUNNER_TEMP}/gh-aw/prompts/markdown.md" cat "${RUNNER_TEMP}/gh-aw/prompts/safe_outputs_prompt.md" - cat << 'GH_AW_PROMPT_3b277dc443ecd9c9_EOF' + cat << 'GH_AW_PROMPT_40dfd44b6bf07647_EOF' - Tools: create_issue, missing_tool, missing_data, noop + Tools: create_issue, close_issue, missing_tool, missing_data, noop - GH_AW_PROMPT_3b277dc443ecd9c9_EOF + GH_AW_PROMPT_40dfd44b6bf07647_EOF cat "${RUNNER_TEMP}/gh-aw/prompts/mcp_cli_tools_prompt.md" - cat << 'GH_AW_PROMPT_3b277dc443ecd9c9_EOF' + cat << 'GH_AW_PROMPT_40dfd44b6bf07647_EOF' The following GitHub context information is available for this workflow: {{#if github.actor}} @@ -248,12 +248,12 @@ jobs: {{/if}} - GH_AW_PROMPT_3b277dc443ecd9c9_EOF + GH_AW_PROMPT_40dfd44b6bf07647_EOF cat "${RUNNER_TEMP}/gh-aw/prompts/github_mcp_tools_with_safeoutputs_prompt.md" - cat << 'GH_AW_PROMPT_3b277dc443ecd9c9_EOF' + cat << 'GH_AW_PROMPT_40dfd44b6bf07647_EOF' {{#runtime-import .github/workflows/objective-impact-report.md}} - GH_AW_PROMPT_3b277dc443ecd9c9_EOF + GH_AW_PROMPT_40dfd44b6bf07647_EOF } > "$GH_AW_PROMPT" - name: Interpolate variables and render templates uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 @@ -451,21 +451,39 @@ jobs: mkdir -p "${RUNNER_TEMP}/gh-aw/safeoutputs" mkdir -p /tmp/gh-aw/safeoutputs mkdir -p /tmp/gh-aw/mcp-logs/safeoutputs - cat > "${RUNNER_TEMP}/gh-aw/safeoutputs/config.json" << 'GH_AW_SAFE_OUTPUTS_CONFIG_225e6cf166622973_EOF' - {"create_issue":{"max":1},"create_report_incomplete_issue":{},"missing_data":{},"missing_tool":{},"noop":{"max":1,"report-as-issue":"true"},"report_incomplete":{}} - GH_AW_SAFE_OUTPUTS_CONFIG_225e6cf166622973_EOF + cat > "${RUNNER_TEMP}/gh-aw/safeoutputs/config.json" << 'GH_AW_SAFE_OUTPUTS_CONFIG_a7adcaa5205f36a9_EOF' + {"close_issue":{"max":1,"required_title_prefix":"Impact Efficiency Report - ","target":"*"},"create_issue":{"max":1,"title_prefix":"Impact Efficiency Report - "},"create_report_incomplete_issue":{},"missing_data":{},"missing_tool":{},"noop":{"max":1,"report-as-issue":"true"},"report_incomplete":{}} + GH_AW_SAFE_OUTPUTS_CONFIG_a7adcaa5205f36a9_EOF - name: Generate Safe Outputs Tools env: GH_AW_TOOLS_META_JSON: | { "description_suffixes": { - "create_issue": " CONSTRAINTS: Maximum 1 issue(s) can be created." + "close_issue": " CONSTRAINTS: Maximum 1 issue(s) can be closed. Target: *. Only issues with title prefix \"Impact Efficiency Report - \" can be closed.", + "create_issue": " CONSTRAINTS: Maximum 1 issue(s) can be created. Title will be prefixed with \"Impact Efficiency Report - \"." }, "repo_params": {}, "dynamic_tools": [] } GH_AW_VALIDATION_JSON: | { + "close_issue": { + "defaultMax": 1, + "fields": { + "body": { + "type": "string", + "sanitize": true, + "maxLength": 65000 + }, + "issue_number": { + "optionalPositiveInteger": true + }, + "repo": { + "type": "string", + "maxLength": 256 + } + } + }, "create_issue": { "defaultMax": 1, "fields": { @@ -1482,7 +1500,7 @@ jobs: GH_AW_ALLOWED_DOMAINS: "api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,github.com,host.docker.internal,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,ppa.launchpad.net,raw.githubusercontent.com,registry.npmjs.org,s.symcb.com,s.symcd.com,security.ubuntu.com,telemetry.enterprise.githubcopilot.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com,www.googleapis.com" GITHUB_SERVER_URL: ${{ github.server_url }} GITHUB_API_URL: ${{ github.api_url }} - GH_AW_SAFE_OUTPUTS_HANDLER_CONFIG: "{\"create_issue\":{\"max\":1},\"create_report_incomplete_issue\":{},\"missing_data\":{},\"missing_tool\":{},\"noop\":{\"max\":1,\"report-as-issue\":\"true\"},\"report_incomplete\":{}}" + GH_AW_SAFE_OUTPUTS_HANDLER_CONFIG: "{\"close_issue\":{\"max\":1,\"required_title_prefix\":\"Impact Efficiency Report - \",\"target\":\"*\"},\"create_issue\":{\"max\":1,\"title_prefix\":\"Impact Efficiency Report - \"},\"create_report_incomplete_issue\":{},\"missing_data\":{},\"missing_tool\":{},\"noop\":{\"max\":1,\"report-as-issue\":\"true\"},\"report_incomplete\":{}}" with: github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} script: | diff --git a/.github/workflows/objective-impact-report.md b/.github/workflows/objective-impact-report.md index ac9fb438bd1..e0a0ee5bf35 100644 --- a/.github/workflows/objective-impact-report.md +++ b/.github/workflows/objective-impact-report.md @@ -6,7 +6,12 @@ on: permissions: issues: read safe-outputs: + close-issue: + required-title-prefix: "Impact Efficiency Report - " + target: "*" + max: 1 create-issue: + title-prefix: "Impact Efficiency Report - " max: 1 --- @@ -21,7 +26,7 @@ Use this model: ```text Outcome = recorded work item produced by a GitHub Agentic Workflow run Objective = issue/epic/work item linked to the outcome -Objective Value = value from planning metadata (priority, severity, milestone, project) +Objective Value = numeric value from the repository objective-mapping configuration applied to traced root labels Outcome Indicator = 1 for accepted/delivered outcomes, 0 otherwise Outcome Value = Outcome Indicator × Objective Value Impact Efficiency = Σ Outcome Value / AI Credits @@ -30,7 +35,10 @@ Impact Efficiency = Σ Outcome Value / AI Credits Treat an outcome as one recorded result item produced by a GitHub Agentic Workflow run (for example, a PR change, completed fix, or report action), which may later be accepted or not accepted. Use workflow run outputs/artifacts and linked GitHub objects (issues, PRs, comments, discussions) as the outcome source of truth. Treat AI Credits as total model-credit cost consumed by the workflow runs that produced the analyzed outcomes. -Retrieve AI Credits from workflow-run usage/billing data available to the run context, and use the same time window as outcomes. +When available, use deterministic precomputed run data that already includes each run's `aic` field. +Prefer existing gh-aw outputs that already surface `aic`, such as pre-downloaded `gh aw logs --json` data or audit/log artifacts derived from the same run summaries. +Only fall back to MCP or other live retrieval if deterministic precomputed AIC inputs are unavailable. +Use the same time window for AIC as for outcomes. Do not perform workflow attribution. Outcomes deliver value. @@ -38,6 +46,16 @@ Objectives provide context and importance. AI Credits provide cost. Do not use an LLM judge. +## AIC Source of Truth + +Resolve AI Credits in this order: + +1. Deterministic precomputed `gh aw logs --json` style workflow-run data with per-run `aic` +2. Pre-downloaded audit/log artifacts that already expose run-level `aic` +3. MCP or other live retrieval only as a documented fallback + +If a run's `aic` field is missing or null, treat it as `0` and count it as missing-cost data in the report. + ## Scope Analyze workflow outcomes and linked objectives from the last 180 days. @@ -46,46 +64,39 @@ Analyze workflow outcomes and linked objectives from the last 180 days. For each outcome, find the associated objective first, then compute `Objective Value`. -Use the first matching priority or severity signal from objective labels or fields as the base value: +Use the repository objective mapping as the source of truth: ```text -P0 / urgent / critical = 100 -P1 / high = 50 -P2 / medium = 20 -P3 / low = 5 -unknown = 1 +.github/objective-mapping.json +or OBJECTIVE_MAPPING_JSON when explicitly provided ``` -Recognize common label forms case-insensitively: +Treat labels on the traced root object as the input to the mapping. +The mapping is label-based and already defines both value and multi-label behavior. ```text -P0, priority:P0, priority/P0, severity:critical, critical, urgent -P1, priority:P1, priority/P1, severity:high, high -P2, priority:P2, priority/P2, severity:medium, medium -P3, priority:P3, priority/P3, severity:low, low +Objective Value = mapping.ComputeObjectiveValue(root_labels) +Objective Labels = mapping.GetObjectiveLabels(root_labels) ``` -Then apply planning context adjustments: +Do not invent fallback scoring rules such as milestone bonuses, project bonuses, or priority-to-points heuristics when the mapping file is present. ```text -Objective is assigned to a milestone = +10 -Objective is assigned to a project = +10 +Examples of mapped labels in this repository include campaign, security, observability, testing, automation, and other configured objective labels. ``` -Cap `Objective Value` at 120 (`100 + 10 + 10` maximum from base plus planning adjustments). - -The cap prevents a small number of heavily tagged objectives from dominating the metric. - -All other labels are classification only. +If a traced root object has no labels that exist in the mapping, mark the outcome as `unmapped`. ## Outcome association rules -For each workflow outcome, associate one objective using this order: +For each workflow outcome, follow the implemented root-tracing behavior: -1. Explicit linked issue or work item reference in the outcome. -2. Issue linked through the related pull request. -3. Parent issue/epic if explicitly linked. -4. If no objective can be found, mark as `unmapped`, exclude it from `Σ Outcome Value`, and report it separately. +1. For pull-request outcomes, trace the PR to its linked closing issue and use that root issue's labels. +2. If PR root tracing fails, or for direct issue outcomes, use labels on the issue itself. +3. Record the traced root URL when one is found so the report preserves an audit trail. +4. If no mapped objective labels can be found, mark the outcome as `unmapped`, exclude it from `Σ Outcome Value`, and report it separately. + +Prefer precomputed outcome evaluation data when available. Do not re-derive a different mapping model inside the report. ## Computation @@ -105,13 +116,26 @@ Then compute: ```text Accepted Outcome Count = count(outcomes where Outcome Indicator = 1) Total Outcome Value = sum(Outcome Value) +AI Credits = sum(run.aic across analyzed runs) Impact Efficiency = Total Outcome Value / AI Credits (value units per AI Credit; undefined when AI Credits = 0) ``` If AI Credits is missing or zero, report that Impact Efficiency is not computable and explain whether credits data was unavailable or no credits were consumed in the analysis window. +If only some runs are missing `aic`, still compute the metric from the available values and explicitly report how many runs had missing cost data. ## Report +Before creating the new report, search for an existing open issue titled: + +```text +Impact Efficiency Report - YYYY-MM-DD +``` + +If one already exists for today: + +1. Close that issue first with a brief comment explaining that it is being replaced by a freshly generated report for the same day. +2. Then create the new report issue. + Create one issue titled: ```text @@ -137,8 +161,8 @@ The report must include: ### Top objectives by delivered value -| Objective | Priority/Severity | Milestone | Project | Delivered Outcome Value | -|---|---|---|---|---:| +| Objective Label | Delivered Outcome Value | Attempted Outcome Value | Delivered Outcomes | Efficiency | +|---|---:|---:|---:|---:| ### Unmapped outcomes @@ -159,12 +183,14 @@ Explain which one better reflects meaningful delivered value relative to cost. Mention missing or weak links in: - outcome-to-objective association -- priority/severity metadata -- milestone/project metadata +- root tracing and linked-object coverage +- label mapping coverage in `.github/objective-mapping.json` - AI Credits availability -## Safe output +State whether AI Credits came from deterministic precomputed data or from a live fallback path. -Use only `create-issue`. +If AI Credits are unavailable, still produce the delivered-value analysis and clearly state that the cost-normalized Impact Efficiency metric could not be computed. + +## Safe output -If a report for today already exists, do nothing. +Use only `close-issue` and `create-issue`. diff --git a/pkg/cli/README.md b/pkg/cli/README.md index 40af6ab46d8..337f19bdf3b 100644 --- a/pkg/cli/README.md +++ b/pkg/cli/README.md @@ -61,6 +61,7 @@ All diagnostic output MUST go to `stderr` using `console` formatting helpers. St | `gh aw trial` | `NewTrialCommand` | Run trial workflow executions | | `gh aw deploy` | `NewDeployCommand` | Deploy agentic workflows to a target repository using a pull request | | `gh aw outcomes` | `NewOutcomesCommand` | Check what happened to a workflow run's safe outputs | +| `gh aw outcomes history` | `NewOutcomesHistorySubcommand` | Score recent closed issues and merged PRs against the objective mapping | | _No `gh aw deps` command_ | `deps_*.go` (internal utilities) | Dependency reporting/advisory helpers used by other commands | | `gh aw version` | `versionCmd` (main.go) | Show version information | | `gh aw completion` | `NewCompletionCommand` | Generate shell completion scripts | diff --git a/pkg/cli/audit_report.go b/pkg/cli/audit_report.go index 417111767da..e1e07b1221b 100644 --- a/pkg/cli/audit_report.go +++ b/pkg/cli/audit_report.go @@ -11,6 +11,7 @@ import ( "time" "github.com/github/gh-aw/pkg/constants" + "github.com/github/gh-aw/pkg/github" "github.com/github/gh-aw/pkg/logger" "github.com/github/gh-aw/pkg/sliceutil" "github.com/github/gh-aw/pkg/timeutil" @@ -431,9 +432,10 @@ func buildAuditData(processedRun ProcessedRun, metrics LogMetrics, mcpToolUsage // Evaluate outcomes for created items if any exist if len(createdItems) > 0 { - outcomeReports := EvaluateOutcomes(createdItems, "") + mapping := github.LoadObjectiveMappingFromConfig() + outcomeReports := EvaluateOutcomes(createdItems, "", mapping) auditData.Outcomes = outcomeReports - outcomeSummary := ComputeOutcomeSummary(outcomeReports) + outcomeSummary := ComputeOutcomeSummary(outcomeReports, mapping) auditData.OutcomeSummary = &outcomeSummary } diff --git a/pkg/cli/outcome_domain_breakdown.go b/pkg/cli/outcome_domain_breakdown.go new file mode 100644 index 00000000000..ae6cad4d018 --- /dev/null +++ b/pkg/cli/outcome_domain_breakdown.go @@ -0,0 +1,179 @@ +package cli + +import ( + "fmt" + "sort" + "strings" + + "github.com/github/gh-aw/pkg/logger" +) + +var domainBreakdownLog = logger.New("cli:domain_breakdown") + +// DomainBreakdown provides metrics for a single label/domain from outcomes. +type DomainBreakdown struct { + Label string `json:"label" console:"header:Domain"` + Attempted int `json:"attempted" console:"header:Attempted"` + Accepted int `json:"accepted" console:"header:Accepted"` + Rejected int `json:"rejected" console:"header:Rejected"` + Pending int `json:"pending" console:"header:Pending"` + TotalObjectiveValue int `json:"total_objective_value" console:"header:Total Value"` + AcceptedObjectiveValue int `json:"accepted_objective_value" console:"header:Accepted Value"` + ObjectiveEfficiency float64 `json:"objective_efficiency,omitempty" console:"header:Efficiency"` + AcceptanceRate float64 `json:"acceptance_rate,omitempty" console:"header:Acceptance Rate"` +} + +// ComputeDomainBreakdowns aggregates outcome metrics by label/domain. +// Returns a slice of DomainBreakdown sorted by total_objective_value descending. +func ComputeDomainBreakdowns(reports []OutcomeReport) []DomainBreakdown { + if len(reports) == 0 { + return []DomainBreakdown{} + } + + // Map domain label → metrics + domains := make(map[string]*DomainBreakdown) + + for _, report := range reports { + // If outcome has objective labels, aggregate by each label + for _, label := range report.ObjectiveLabels { + normalizedLabel := strings.ToLower(strings.TrimSpace(label)) + if _, exists := domains[normalizedLabel]; !exists { + domains[normalizedLabel] = &DomainBreakdown{ + Label: label, + } + } + + domain := domains[normalizedLabel] + domain.Attempted++ + domain.TotalObjectiveValue += report.ObjectiveValue + + switch report.Result { + case OutcomeAccepted: + domain.Accepted++ + domain.AcceptedObjectiveValue += report.ObjectiveValue + case OutcomeRejected: + domain.Rejected++ + case OutcomePending: + domain.Pending++ + } + } + + // If outcome has NO objective labels, create "unmapped" entry + if len(report.ObjectiveLabels) == 0 && report.ObjectiveValue == 0 { + if _, exists := domains["unmapped"]; !exists { + domains["unmapped"] = &DomainBreakdown{ + Label: "unmapped", + } + } + domain := domains["unmapped"] + domain.Attempted++ + + switch report.Result { + case OutcomeAccepted: + domain.Accepted++ + case OutcomeRejected: + domain.Rejected++ + case OutcomePending: + domain.Pending++ + } + } + } + + // Compute efficiency metrics for each domain + result := make([]DomainBreakdown, 0, len(domains)) + for _, domain := range domains { + if domain.Attempted > 0 { + domain.AcceptanceRate = float64(domain.Accepted) / float64(domain.Attempted) + } + if domain.TotalObjectiveValue > 0 { + domain.ObjectiveEfficiency = float64(domain.AcceptedObjectiveValue) / float64(domain.TotalObjectiveValue) + } + result = append(result, *domain) + } + + // Sort by total_objective_value descending + sort.Slice(result, func(i, j int) bool { + if result[i].TotalObjectiveValue != result[j].TotalObjectiveValue { + return result[i].TotalObjectiveValue > result[j].TotalObjectiveValue + } + return result[i].Label < result[j].Label + }) + + domainBreakdownLog.Printf("Computed domain breakdowns: domains=%d, total_attempted=%d", len(result), countTotalAttempted(result)) + return result +} + +func countTotalAttempted(breakdowns []DomainBreakdown) int { + total := 0 + for _, d := range breakdowns { + total += d.Attempted + } + return total +} + +// DomainInsight provides a human-readable assessment of a domain's performance. +type DomainInsight struct { + Label string + Status string // "excellent", "good", "fair", "poor", "new" + Efficiency float64 + Message string + Suggestion string +} + +// AnalyzeDomainPerformance provides strategic insights about domain efficiency. +func AnalyzeDomainPerformance(breakdown DomainBreakdown) DomainInsight { + insight := DomainInsight{ + Label: breakdown.Label, + Efficiency: breakdown.ObjectiveEfficiency, + } + + if breakdown.Attempted == 0 { + insight.Status = "new" + insight.Message = "No outcomes yet" + return insight + } + + switch { + case breakdown.ObjectiveEfficiency >= 0.90: + insight.Status = "excellent" + insight.Message = fmt.Sprintf("✅ %s: %.0f%% efficiency | %d/%d outcomes accepted | %d value delivered", + breakdown.Label, + breakdown.ObjectiveEfficiency*100, + breakdown.Accepted, + breakdown.Attempted, + breakdown.AcceptedObjectiveValue) + insight.Suggestion = "Keep current strategy working well" + + case breakdown.ObjectiveEfficiency >= 0.75: + insight.Status = "good" + insight.Message = fmt.Sprintf("✅ %s: %.0f%% efficiency | %d/%d outcomes accepted | %d value delivered", + breakdown.Label, + breakdown.ObjectiveEfficiency*100, + breakdown.Accepted, + breakdown.Attempted, + breakdown.AcceptedObjectiveValue) + insight.Suggestion = "Good progress; monitor for regressions" + + case breakdown.ObjectiveEfficiency >= 0.50: + insight.Status = "fair" + insight.Message = fmt.Sprintf("⚠️ %s: %.0f%% efficiency | %d/%d outcomes accepted | %d value delivered", + breakdown.Label, + breakdown.ObjectiveEfficiency*100, + breakdown.Accepted, + breakdown.Attempted, + breakdown.AcceptedObjectiveValue) + insight.Suggestion = "Consider reviewing agent strategy or adding human review" + + default: + insight.Status = "poor" + insight.Message = fmt.Sprintf("🔴 %s: %.0f%% efficiency | %d/%d outcomes accepted | %d value delivered", + breakdown.Label, + breakdown.ObjectiveEfficiency*100, + breakdown.Accepted, + breakdown.Attempted, + breakdown.AcceptedObjectiveValue) + insight.Suggestion = "Major issues; investigate root cause or pause automation" + } + + return insight +} diff --git a/pkg/cli/outcome_eval.go b/pkg/cli/outcome_eval.go index 25d8b07cd0b..707d9f3e5e3 100644 --- a/pkg/cli/outcome_eval.go +++ b/pkg/cli/outcome_eval.go @@ -7,12 +7,16 @@ import ( "strings" "time" + "github.com/github/gh-aw/pkg/github" "github.com/github/gh-aw/pkg/logger" "github.com/github/gh-aw/pkg/workflow" ) var outcomeEvalLog = logger.New("cli:outcome_eval") +var objectiveMappingGHAPIGetArray = ghAPIGetArray +var objectiveMappingGHAPIGraphQL = ghAPIGraphQL + // OutcomeResult classifies what happened to a safe output after execution. type OutcomeResult string @@ -32,6 +36,7 @@ type OutcomeReport struct { Type string `json:"type" console:"header:Type"` ObjectURL string `json:"object_url,omitempty" console:"header:URL,omitempty"` ObjectNumber int `json:"object_number,omitempty" console:"header:#,omitempty"` + TracedRootURL string `json:"traced_root_url,omitempty" console:"-"` Repo string `json:"repo,omitempty" console:"header:Repo,omitempty"` Result OutcomeResult `json:"result" console:"header:Outcome"` Detail string `json:"detail,omitempty" console:"header:Detail,omitempty"` @@ -40,6 +45,8 @@ type OutcomeReport struct { HumanEdits int `json:"human_edits,omitempty" console:"header:Edits,omitempty"` HumanReviews int `json:"human_reviews,omitempty" console:"header:Reviews,omitempty"` ZeroTouch bool `json:"zero_touch,omitempty" console:"header:Zero-touch,omitempty"` + ObjectiveValue int `json:"objective_value,omitempty" console:"header:Obj Value,omitempty"` + ObjectiveLabels []string `json:"objective_labels,omitempty" console:"-"` CreatedAt string `json:"created_at" console:"-"` CheckedAt string `json:"checked_at" console:"-"` EvalError string `json:"eval_error,omitempty" console:"-"` @@ -47,23 +54,27 @@ type OutcomeReport struct { // OutcomeSummary aggregates outcomes across multiple safe output items. type OutcomeSummary struct { - Total int `json:"total" console:"header:Total"` - Accepted int `json:"accepted" console:"header:Accepted"` - Rejected int `json:"rejected" console:"header:Rejected"` - Ignored int `json:"ignored" console:"header:Ignored"` - Pending int `json:"pending" console:"header:Pending"` - AcceptedStrong int `json:"accepted_strong,omitempty"` - AcceptedMedium int `json:"accepted_medium,omitempty"` - AcceptedWeak int `json:"accepted_weak,omitempty"` - FallbackExistsOnlyCount int `json:"fallback_exists_only_count,omitempty"` - Lifecycle int `json:"lifecycle" console:"header:Lifecycle"` - Errors int `json:"errors" console:"header:Errors"` - ZeroTouch int `json:"zero_touch" console:"header:Zero-touch"` - AcceptanceRate float64 `json:"acceptance_rate" console:"header:Acceptance Rate"` - WasteRate float64 `json:"waste_rate" console:"header:Waste Rate"` - ZeroTouchRate float64 `json:"zero_touch_rate" console:"header:Zero-touch Rate"` - MedianTimeToOutcome float64 `json:"median_time_to_outcome_hours,omitempty"` - CostPerAcceptedOutcome float64 `json:"cost_per_accepted_outcome,omitempty"` + Total int `json:"total" console:"header:Total"` + Accepted int `json:"accepted" console:"header:Accepted"` + Rejected int `json:"rejected" console:"header:Rejected"` + Ignored int `json:"ignored" console:"header:Ignored"` + Pending int `json:"pending" console:"header:Pending"` + AcceptedStrong int `json:"accepted_strong,omitempty"` + AcceptedMedium int `json:"accepted_medium,omitempty"` + AcceptedWeak int `json:"accepted_weak,omitempty"` + FallbackExistsOnlyCount int `json:"fallback_exists_only_count,omitempty"` + Lifecycle int `json:"lifecycle" console:"header:Lifecycle"` + Errors int `json:"errors" console:"header:Errors"` + ZeroTouch int `json:"zero_touch" console:"header:Zero-touch"` + AcceptanceRate float64 `json:"acceptance_rate" console:"header:Acceptance Rate"` + WasteRate float64 `json:"waste_rate" console:"header:Waste Rate"` + ZeroTouchRate float64 `json:"zero_touch_rate" console:"header:Zero-touch Rate"` + MedianTimeToOutcome float64 `json:"median_time_to_outcome_hours,omitempty"` + CostPerAcceptedOutcome float64 `json:"cost_per_accepted_outcome,omitempty"` + TotalObjectiveValue int `json:"total_objective_value,omitempty" console:"header:Total Obj Value"` + AcceptedObjectiveValue int `json:"accepted_objective_value,omitempty" console:"header:Accepted Obj Value"` + ObjectiveEfficiency float64 `json:"objective_efficiency,omitempty" console:"header:Obj Efficiency"` + DomainBreakdowns []DomainBreakdown `json:"domain_breakdowns,omitempty" console:"-"` } // outcomeEvaluator is a function that evaluates one safe output item. @@ -93,7 +104,8 @@ var outcomeEvaluators = map[string]outcomeEvaluator{ } // EvaluateOutcomes checks the current state of all safe output items from a run. -func EvaluateOutcomes(items []CreatedItemReport, repoOverride string) []OutcomeReport { +// The mapping parameter is required and defines how labels map to objective values. +func EvaluateOutcomes(items []CreatedItemReport, repoOverride string, mapping *github.ObjectiveMapping) []OutcomeReport { outcomeEvalLog.Printf("Evaluating outcomes: items=%d, repo_override=%q", len(items), repoOverride) if repoOverride == "" { slug, err := GetCurrentRepoSlug() @@ -123,6 +135,10 @@ func EvaluateOutcomes(items []CreatedItemReport, repoOverride string) []OutcomeR report.CreatedAt = item.Timestamp report.CheckedAt = time.Now().UTC().Format(time.RFC3339) report.OutcomeEvaluation = normalizeOutcomeEvaluation(report) + + // Compute objective value from issue/PR labels + enrichOutcomeWithObjectiveValue(&report, repo, mapping) + reports = append(reports, report) } outcomeEvalLog.Printf("Outcome evaluation complete: reports=%d, skipped=%d", len(reports), skipped) @@ -130,7 +146,8 @@ func EvaluateOutcomes(items []CreatedItemReport, repoOverride string) []OutcomeR } // ComputeOutcomeSummary aggregates outcome reports into a summary. -func ComputeOutcomeSummary(reports []OutcomeReport) OutcomeSummary { +// The mapping parameter is required and defines how labels map to objective values. +func ComputeOutcomeSummary(reports []OutcomeReport, mapping *github.ObjectiveMapping) OutcomeSummary { s := OutcomeSummary{Total: len(reports)} var times []float64 for _, r := range reports { @@ -168,6 +185,12 @@ func ComputeOutcomeSummary(reports []OutcomeReport) OutcomeSummary { if r.TimeToOutcomeHours > 0 { times = append(times, r.TimeToOutcomeHours) } + + // Aggregate objective values + s.TotalObjectiveValue += r.ObjectiveValue + if eval.OutcomeStatus == OutcomeStatusAccepted { + s.AcceptedObjectiveValue += r.ObjectiveValue + } } resolved := s.Accepted + s.Rejected if resolved > 0 { @@ -182,6 +205,15 @@ func ComputeOutcomeSummary(reports []OutcomeReport) OutcomeSummary { if len(times) > 0 { s.MedianTimeToOutcome = medianFloat(times) } + + // Compute objective efficiency + if s.TotalObjectiveValue > 0 { + s.ObjectiveEfficiency = float64(s.AcceptedObjectiveValue) / float64(s.TotalObjectiveValue) + } + + // Compute domain breakdowns + s.DomainBreakdowns = ComputeDomainBreakdowns(reports) + return s } @@ -241,6 +273,27 @@ func ghAPIGetArray(endpoint string, repo string) ([]map[string]any, error) { return result, nil } +// ghAPIGraphQL calls the GitHub GraphQL API via gh cli and returns the parsed JSON. +func ghAPIGraphQL(query string, repo string) (map[string]any, error) { + ownerRepo, host := normalizeRepoForAPI(repo) + args := []string{"api", "graphql", "-f", "query=" + query} + var output []byte + var err error + if host != "" { + output, err = workflow.RunGHWithHost("Checking outcome...", host, args...) + } else { + output, err = workflow.RunGH("Checking outcome...", args...) + } + if err != nil { + return nil, fmt.Errorf("gh api graphql: %w", err) + } + var result map[string]any + if err := json.Unmarshal(output, &result); err != nil { + return nil, fmt.Errorf("parsing graphql response for %s: %w", ownerRepo, err) + } + return result, nil +} + // timeBetween computes hours between two ISO timestamps. func timeBetween(from, to string) float64 { t1, err1 := time.Parse(time.RFC3339, from) @@ -328,3 +381,167 @@ func resolveItemNumber(item CreatedItemReport) int { } return 0 } + +// enrichOutcomeWithObjectiveValue computes the objective value for an outcome by fetching +// its associated issue/PR labels and applying the label-to-value mapping. +func enrichOutcomeWithObjectiveValue(report *OutcomeReport, repo string, mapping *github.ObjectiveMapping) { + if report == nil || mapping == nil { + return + } + + // Only compute objective values for items that have a GitHub object number + num := report.ObjectNumber + if num == 0 || repo == "" { + return + } + + // Skip types that don't have associated labels on issues/PRs + if report.Type == "noop" || report.Type == "missing_tool" || report.Type == "missing_data" || report.Type == "report_incomplete" { + return + } + + outcomeEvalLog.Printf("Computing objective value: type=%s, repo=%s, number=%d", report.Type, repo, num) + + root, err := traceOutcomeRoot(*report, repo) + if err != nil { + outcomeEvalLog.Printf("Could not trace root for objective value computation: %v", err) + return + } + report.TracedRootURL = root.URL + + labelNames := root.Labels + if len(labelNames) > 0 { + outcomeEvalLog.Printf("Fetched root labels for %s#%d: root=%s labels=%v", repo, num, root.URL, labelNames) + } + + // Compute objective value + objectiveValue := mapping.ComputeObjectiveValue(labelNames) + objectiveLabels := mapping.GetObjectiveLabels(labelNames) + + report.ObjectiveValue = objectiveValue + report.ObjectiveLabels = objectiveLabels + outcomeEvalLog.Printf("Computed objective value for %s#%d: value=%d, labels=%v", repo, num, objectiveValue, objectiveLabels) +} + +type tracedOutcomeRoot struct { + URL string + Number int + Labels []string +} + +func traceOutcomeRoot(report OutcomeReport, repo string) (tracedOutcomeRoot, error) { + if isPullRequestOutcomeType(report.Type) { + root, err := tracePullRequestRoot(report.ObjectNumber, repo) + if err == nil && root.Number > 0 { + return root, nil + } + if err != nil { + outcomeEvalLog.Printf("Falling back to direct labels after PR root trace failure: %v", err) + } + } + + labels, err := objectiveMappingGHAPIGetArray(fmt.Sprintf("issues/%d/labels", report.ObjectNumber), repo) + if err != nil { + return tracedOutcomeRoot{}, err + } + return tracedOutcomeRoot{ + URL: report.ObjectURL, + Number: report.ObjectNumber, + Labels: labelsToStringsFromMaps(labels), + }, nil +} + +func isPullRequestOutcomeType(outcomeType string) bool { + switch outcomeType { + case "create_pull_request", "update_pull_request", "create_pull_request_review_comment", + "resolve_pull_request_review_thread", "mark_pull_request_as_ready_for_review", + "push_to_pull_request_branch", "add_reviewer", "submit_pull_request_review": + return true + default: + return false + } +} + +func tracePullRequestRoot(prNumber int, repo string) (tracedOutcomeRoot, error) { + ownerRepo, _ := normalizeRepoForAPI(repo) + owner, name, found := strings.Cut(ownerRepo, "/") + if !found || owner == "" || name == "" { + return tracedOutcomeRoot{}, fmt.Errorf("invalid repo for root tracing: %s", repo) + } + + query := fmt.Sprintf(`query { + repository(owner: "%s", name: "%s") { + pullRequest(number: %d) { + closingIssuesReferences(first: 10) { + nodes { + number + url + labels(first: 20) { + nodes { name } + } + } + } + } + } + }`, + escapeGraphQLString(owner), + escapeGraphQLString(name), + prNumber, + ) + + result, err := objectiveMappingGHAPIGraphQL(query, repo) + if err != nil { + return tracedOutcomeRoot{}, err + } + data, _ := result["data"].(map[string]any) + repository, _ := data["repository"].(map[string]any) + pullRequest, _ := repository["pullRequest"].(map[string]any) + closingRefs, _ := pullRequest["closingIssuesReferences"].(map[string]any) + nodes, _ := closingRefs["nodes"].([]any) + if len(nodes) == 0 { + return tracedOutcomeRoot{}, fmt.Errorf("no closing issues found for PR #%d", prNumber) + } + firstNode, _ := nodes[0].(map[string]any) + root := tracedOutcomeRoot{} + if url, ok := firstNode["url"].(string); ok { + root.URL = url + } + if number, ok := firstNode["number"].(float64); ok { + root.Number = int(number) + } + if labels, ok := firstNode["labels"].(map[string]any); ok { + if labelNodes, ok := labels["nodes"].([]any); ok { + root.Labels = labelsToStringsFromNodes(labelNodes) + } + } + return root, nil +} + +func labelsToStringsFromNodes(nodes []any) []string { + if len(nodes) == 0 { + return []string{} + } + result := make([]string, 0, len(nodes)) + for _, node := range nodes { + labelMap, _ := node.(map[string]any) + if name, ok := labelMap["name"].(string); ok { + result = append(result, name) + } + } + return result +} + +// labelsToStringsFromMaps converts GitHub API label map objects to string slice. +func labelsToStringsFromMaps(labels []map[string]any) []string { + if len(labels) == 0 { + return []string{} + } + + result := make([]string, 0, len(labels)) + for _, labelMap := range labels { + if name, ok := labelMap["name"].(string); ok { + result = append(result, name) + } + } + return result +} diff --git a/pkg/cli/outcome_eval_test.go b/pkg/cli/outcome_eval_test.go index f179b8e4d4e..9194116f8a2 100644 --- a/pkg/cli/outcome_eval_test.go +++ b/pkg/cli/outcome_eval_test.go @@ -5,10 +5,12 @@ package cli import ( "bytes" "encoding/json" + "fmt" "os" "path/filepath" "testing" + "github.com/github/gh-aw/pkg/github" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -23,7 +25,7 @@ func TestComputeOutcomeSummary(t *testing.T) { {Type: "close_issue", Result: OutcomeLifecycle}, } - s := ComputeOutcomeSummary(reports) + s := ComputeOutcomeSummary(reports, github.DefaultObjectiveMapping()) assert.Equal(t, 6, s.Total, "total should count all reports") assert.Equal(t, 2, s.Accepted, "accepted count") @@ -50,7 +52,7 @@ func TestComputeOutcomeSummary(t *testing.T) { } func TestComputeOutcomeSummaryEmpty(t *testing.T) { - s := ComputeOutcomeSummary(nil) + s := ComputeOutcomeSummary(nil, github.DefaultObjectiveMapping()) assert.Equal(t, 0, s.Total, "empty total") assert.InDelta(t, 0.0, s.AcceptanceRate, 1e-12, "empty acceptance rate") @@ -190,7 +192,7 @@ func TestEvaluateOutcomesSkipsNoopAndMetadata(t *testing.T) { {Type: "report_incomplete", Timestamp: "2026-05-12T00:00:00Z"}, } - reports := EvaluateOutcomes(items, "owner/repo") + reports := EvaluateOutcomes(items, "owner/repo", github.DefaultObjectiveMapping()) assert.Empty(t, reports, "noop and metadata types should be skipped") } @@ -199,11 +201,84 @@ func TestEvaluateOutcomesErrorOnMissingData(t *testing.T) { {Type: "create_pull_request", Timestamp: "2026-05-12T00:00:00Z"}, } - reports := EvaluateOutcomes(items, "") + reports := EvaluateOutcomes(items, "", github.DefaultObjectiveMapping()) assert.Len(t, reports, 1, "should produce one report") assert.Equal(t, OutcomeError, reports[0].Result, "should error on missing repo and number") } +func TestEnrichOutcomeWithObjectiveValue_TracesPullRequestToRootIssue(t *testing.T) { + oldGraphQL := objectiveMappingGHAPIGraphQL + oldGetArray := objectiveMappingGHAPIGetArray + t.Cleanup(func() { + objectiveMappingGHAPIGraphQL = oldGraphQL + objectiveMappingGHAPIGetArray = oldGetArray + }) + + objectiveMappingGHAPIGraphQL = func(query string, repo string) (map[string]any, error) { + return map[string]any{ + "data": map[string]any{ + "repository": map[string]any{ + "pullRequest": map[string]any{ + "closingIssuesReferences": map[string]any{ + "nodes": []any{ + map[string]any{ + "number": float64(1234), + "url": "https://github.com/owner/repo/issues/1234", + "labels": map[string]any{"nodes": []any{ + map[string]any{"name": "agentic-campaign"}, + map[string]any{"name": "security"}, + }}, + }, + }, + }, + }, + }, + }, + }, nil + } + objectiveMappingGHAPIGetArray = func(endpoint string, repo string) ([]map[string]any, error) { + return nil, fmt.Errorf("unexpected fallback label fetch: %s", endpoint) + } + + report := OutcomeReport{Type: "create_pull_request", ObjectURL: "https://github.com/owner/repo/pull/77", ObjectNumber: 77} + mapping := &github.ObjectiveMapping{ + LabelToValue: map[string]int{"agentic-campaign": 90, "security": 85}, + MultiLabelLogic: "max", + PriorityLabels: []string{"agentic-campaign", "security"}, + } + + enrichOutcomeWithObjectiveValue(&report, "owner/repo", mapping) + + assert.Equal(t, 90, report.ObjectiveValue) + assert.Equal(t, []string{"agentic-campaign", "security"}, report.ObjectiveLabels) + assert.Equal(t, "https://github.com/owner/repo/issues/1234", report.TracedRootURL) +} + +func TestEnrichOutcomeWithObjectiveValue_FallsBackToDirectLabels(t *testing.T) { + oldGraphQL := objectiveMappingGHAPIGraphQL + oldGetArray := objectiveMappingGHAPIGetArray + t.Cleanup(func() { + objectiveMappingGHAPIGraphQL = oldGraphQL + objectiveMappingGHAPIGetArray = oldGetArray + }) + + objectiveMappingGHAPIGraphQL = func(query string, repo string) (map[string]any, error) { + return nil, fmt.Errorf("no linked issues") + } + objectiveMappingGHAPIGetArray = func(endpoint string, repo string) ([]map[string]any, error) { + return []map[string]any{{"name": "automation"}, {"name": "testing"}}, nil + } + + report := OutcomeReport{Type: "create_issue", ObjectURL: "https://github.com/owner/repo/issues/42", ObjectNumber: 42} + mapping := &github.ObjectiveMapping{LabelToValue: map[string]int{"automation": 70, "testing": 65}, MultiLabelLogic: "max"} + + enrichOutcomeWithObjectiveValue(&report, "owner/repo", mapping) + + assert.Equal(t, 70, report.ObjectiveValue) + assert.Equal(t, []string{"automation", "testing"}, report.ObjectiveLabels) + assert.Equal(t, "https://github.com/owner/repo/issues/42", report.TracedRootURL) +} + func TestNormalizeOutcomeEvaluationTargetExistsOnly(t *testing.T) { report := OutcomeReport{ Type: "add_labels", @@ -259,7 +334,7 @@ func TestOutcomeSummaryExcludesExistsOnlyFromAccepted(t *testing.T) { }, } - s := ComputeOutcomeSummary(reports) + s := ComputeOutcomeSummary(reports, github.DefaultObjectiveMapping()) assert.Equal(t, 1, s.Accepted) assert.Equal(t, 1, s.AcceptedStrong) assert.Equal(t, 0, s.AcceptedWeak) diff --git a/pkg/cli/outcomes_command.go b/pkg/cli/outcomes_command.go index 191af86741c..6b4e21c95f3 100644 --- a/pkg/cli/outcomes_command.go +++ b/pkg/cli/outcomes_command.go @@ -11,6 +11,7 @@ import ( "github.com/github/gh-aw/pkg/console" "github.com/github/gh-aw/pkg/constants" + "github.com/github/gh-aw/pkg/github" "github.com/github/gh-aw/pkg/logger" "github.com/spf13/cobra" ) @@ -63,6 +64,7 @@ Examples: addRepoFlag(cmd) addOutputFlag(cmd, "") cmd.Flags().String("outcomes-dir", "", "Write outcome JSONL to this directory for OTLP export") + cmd.AddCommand(NewOutcomesHistorySubcommand()) return cmd } @@ -165,8 +167,9 @@ func RunOutcomes(config OutcomesConfig) error { } // Run the evaluations - reports := EvaluateOutcomes(items, repo) - outcomeSummary := ComputeOutcomeSummary(reports) + mapping := github.LoadObjectiveMappingFromConfig() + reports := EvaluateOutcomes(items, repo, mapping) + outcomeSummary := ComputeOutcomeSummary(reports, mapping) // Write outcome JSONL if requested (for OTLP export or downstream processing). // The --outcomes-dir flag takes precedence over the GH_AW_OUTCOMES_DIR env var. diff --git a/pkg/cli/outcomes_history.go b/pkg/cli/outcomes_history.go new file mode 100644 index 00000000000..04812edf7a0 --- /dev/null +++ b/pkg/cli/outcomes_history.go @@ -0,0 +1,316 @@ +package cli + +import ( + "encoding/json" + "fmt" + "os" + "sort" + "strings" + + "github.com/github/gh-aw/pkg/constants" + ghmapping "github.com/github/gh-aw/pkg/github" + "github.com/github/gh-aw/pkg/workflow" + "github.com/spf13/cobra" +) + +const ( + historySourceIssues = "issues" + historySourcePRs = "prs" + historySourceAll = "all" +) + +var outcomesHistoryRunGH = workflow.RunGH + +type OutcomesHistoryConfig struct { + RepoOverride string + JSONOutput bool + Limit int + Source string +} + +type historicalObjectiveItem struct { + Kind string `json:"kind"` + Number int `json:"number"` + Title string `json:"title"` + URL string `json:"url"` + ClosedAt string `json:"closed_at,omitempty"` + MergedAt string `json:"merged_at,omitempty"` + ObjectiveLabels []string `json:"objective_labels"` + ObjectiveValue int `json:"objective_value"` +} + +type historicalObjectiveBucket struct { + Label string `json:"label"` + Count int `json:"count"` + MappedValue int `json:"mapped_value"` + ContributedValue int `json:"contributed_value"` +} + +type historicalObjectiveReport struct { + Source string `json:"source"` + SampleSize int `json:"sample_size"` + ScoredItems int `json:"scored_items"` + TotalObjectiveValue int `json:"total_objective_value"` + ObjectiveBuckets []historicalObjectiveBucket `json:"objective_buckets"` + RepresentativeItems []historicalObjectiveItem `json:"representative_items"` +} + +type historicalObjectivesData struct { + Repo string `json:"repo"` + Limit int `json:"limit"` + Issues *historicalObjectiveReport `json:"issues,omitempty"` + PRs *historicalObjectiveReport `json:"prs,omitempty"` +} + +type historicalGitHubItem struct { + Number int `json:"number"` + Title string `json:"title"` + URL string `json:"url"` + ClosedAt string `json:"closedAt,omitempty"` + MergedAt string `json:"mergedAt,omitempty"` + Labels []struct { + Name string `json:"name"` + } `json:"labels"` +} + +func NewOutcomesHistorySubcommand() *cobra.Command { + cmd := &cobra.Command{ + Use: "history", + Short: "Score recent issues and merged PRs against the objective mapping", + Long: `Score recent issues and merged pull requests against the objective mapping. + +This gives a quick local historical view of what kinds of work the repository +has been closing or merging under the current objective mapping. + +Examples: + ` + string(constants.CLIExtensionPrefix) + ` outcomes history + ` + string(constants.CLIExtensionPrefix) + ` outcomes history --source issues --limit 100 + ` + string(constants.CLIExtensionPrefix) + ` outcomes history --repo owner/repo --json`, + Args: cobra.NoArgs, + RunE: func(cmd *cobra.Command, args []string) error { + jsonOutput, _ := cmd.Flags().GetBool("json") + repoOverride, _ := cmd.Flags().GetString("repo") + limit, _ := cmd.Flags().GetInt("limit") + source, _ := cmd.Flags().GetString("source") + + return RunOutcomesHistory(OutcomesHistoryConfig{ + RepoOverride: repoOverride, + JSONOutput: jsonOutput, + Limit: limit, + Source: source, + }) + }, + } + + addJSONFlag(cmd) + addRepoFlag(cmd) + cmd.Flags().Int("limit", 200, "Maximum number of items to inspect per source") + cmd.Flags().String("source", historySourceAll, "History source to inspect: issues, prs, or all") + + return cmd +} + +func RunOutcomesHistory(config OutcomesHistoryConfig) error { + repo := config.RepoOverride + if repo == "" { + slug, err := GetCurrentRepoSlug() + if err != nil { + return fmt.Errorf("could not determine repository: %w", err) + } + repo = slug + } + + if config.Limit <= 0 { + config.Limit = 200 + } + + source := strings.ToLower(strings.TrimSpace(config.Source)) + if source == "" { + source = historySourceAll + } + if source != historySourceAll && source != historySourceIssues && source != historySourcePRs { + return fmt.Errorf("invalid --source %q: expected issues, prs, or all", config.Source) + } + + mapping := ghmapping.LoadObjectiveMappingFromConfig() + data := historicalObjectivesData{Repo: repo, Limit: config.Limit} + + if source == historySourceAll || source == historySourceIssues { + issues, err := fetchHistoricalGitHubItems(repo, config.Limit, historySourceIssues) + if err != nil { + return err + } + report := buildHistoricalObjectiveReport(historySourceIssues, issues, mapping) + data.Issues = &report + } + + if source == historySourceAll || source == historySourcePRs { + prs, err := fetchHistoricalGitHubItems(repo, config.Limit, historySourcePRs) + if err != nil { + return err + } + report := buildHistoricalObjectiveReport(historySourcePRs, prs, mapping) + data.PRs = &report + } + + if config.JSONOutput { + out, err := json.MarshalIndent(data, "", " ") + if err != nil { + return fmt.Errorf("failed to marshal JSON: %w", err) + } + fmt.Fprintln(os.Stdout, string(out)) + return nil + } + + fmt.Fprintf(os.Stderr, "Objective history for %s (limit %d)\n", repo, config.Limit) + if data.Issues != nil { + renderHistoricalObjectiveReport(*data.Issues) + } + if data.PRs != nil { + renderHistoricalObjectiveReport(*data.PRs) + } + + return nil +} + +func fetchHistoricalGitHubItems(repo string, limit int, source string) ([]historicalGitHubItem, error) { + args := []string{"--repo", repo, "--limit", fmt.Sprintf("%d", limit), "--json", "number,title,labels,url"} + spinner := "Listing closed issues..." + command := []string{"issue", "list", "--state", "closed"} + + if source == historySourcePRs { + spinner = "Listing merged pull requests..." + command = []string{"pr", "list", "--state", "merged"} + args[len(args)-1] = "number,title,labels,url,mergedAt" + } else { + args[len(args)-1] = "number,title,labels,url,closedAt" + } + + output, err := outcomesHistoryRunGH(spinner, append(command, args...)...) + if err != nil { + return nil, fmt.Errorf("failed to list %s for %s: %w", source, repo, err) + } + + var items []historicalGitHubItem + if err := json.Unmarshal(output, &items); err != nil { + return nil, fmt.Errorf("failed to parse %s listing JSON: %w", source, err) + } + return items, nil +} + +func buildHistoricalObjectiveReport(source string, items []historicalGitHubItem, mapping *ghmapping.ObjectiveMapping) historicalObjectiveReport { + rows := make([]historicalObjectiveItem, 0, len(items)) + bucketCounts := map[string]int{} + totalObjectiveValue := 0 + scoredItems := 0 + + for _, item := range items { + labels := make([]string, 0, len(item.Labels)) + for _, label := range item.Labels { + labels = append(labels, label.Name) + } + + objectiveLabels := mapping.GetObjectiveLabels(labels) + objectiveValue := mapping.ComputeObjectiveValue(labels) + if objectiveValue > 0 { + scoredItems++ + } + totalObjectiveValue += objectiveValue + + for _, label := range objectiveLabels { + normalized := strings.ToLower(strings.TrimSpace(label)) + bucketCounts[normalized]++ + } + + rows = append(rows, historicalObjectiveItem{ + Kind: source, + Number: item.Number, + Title: item.Title, + URL: item.URL, + ClosedAt: item.ClosedAt, + MergedAt: item.MergedAt, + ObjectiveLabels: objectiveLabels, + ObjectiveValue: objectiveValue, + }) + } + + buckets := make([]historicalObjectiveBucket, 0, len(bucketCounts)) + for label, count := range bucketCounts { + mappedValue := mapping.LabelToValue[label] + buckets = append(buckets, historicalObjectiveBucket{ + Label: label, + Count: count, + MappedValue: mappedValue, + ContributedValue: mappedValue * count, + }) + } + + sort.Slice(buckets, func(i, j int) bool { + if buckets[i].ContributedValue != buckets[j].ContributedValue { + return buckets[i].ContributedValue > buckets[j].ContributedValue + } + if buckets[i].Count != buckets[j].Count { + return buckets[i].Count > buckets[j].Count + } + if buckets[i].MappedValue != buckets[j].MappedValue { + return buckets[i].MappedValue > buckets[j].MappedValue + } + return buckets[i].Label < buckets[j].Label + }) + + sort.Slice(rows, func(i, j int) bool { + if rows[i].ObjectiveValue != rows[j].ObjectiveValue { + return rows[i].ObjectiveValue > rows[j].ObjectiveValue + } + leftTime := rows[i].ClosedAt + if leftTime == "" { + leftTime = rows[i].MergedAt + } + rightTime := rows[j].ClosedAt + if rightTime == "" { + rightTime = rows[j].MergedAt + } + return leftTime < rightTime + }) + + representative := make([]historicalObjectiveItem, 0, min(len(rows), 15)) + for _, row := range rows { + if row.ObjectiveValue <= 0 { + continue + } + representative = append(representative, row) + if len(representative) == 15 { + break + } + } + + return historicalObjectiveReport{ + Source: source, + SampleSize: len(items), + ScoredItems: scoredItems, + TotalObjectiveValue: totalObjectiveValue, + ObjectiveBuckets: buckets, + RepresentativeItems: representative, + } +} + +func renderHistoricalObjectiveReport(report historicalObjectiveReport) { + fmt.Fprintf(os.Stderr, "\n%s\n", strings.ToUpper(report.Source)) + fmt.Fprintf(os.Stderr, " Sample size: %d\n", report.SampleSize) + fmt.Fprintf(os.Stderr, " Scored items: %d\n", report.ScoredItems) + fmt.Fprintf(os.Stderr, " Total objective value: %d\n", report.TotalObjectiveValue) + + if len(report.ObjectiveBuckets) > 0 { + fmt.Fprintln(os.Stderr, " Top objective buckets:") + for _, bucket := range report.ObjectiveBuckets[:min(len(report.ObjectiveBuckets), 8)] { + fmt.Fprintf(os.Stderr, " %-22s %3d x %3d = %4d\n", bucket.Label, bucket.Count, bucket.MappedValue, bucket.ContributedValue) + } + } + + if len(report.RepresentativeItems) > 0 { + fmt.Fprintln(os.Stderr, " Representative items:") + for _, item := range report.RepresentativeItems[:min(len(report.RepresentativeItems), 5)] { + fmt.Fprintf(os.Stderr, " #%d %-3d %s\n", item.Number, item.ObjectiveValue, item.Title) + } + } +} diff --git a/pkg/cli/outcomes_history_test.go b/pkg/cli/outcomes_history_test.go new file mode 100644 index 00000000000..0f6c88afa86 --- /dev/null +++ b/pkg/cli/outcomes_history_test.go @@ -0,0 +1,138 @@ +//go:build !integration + +package cli + +import ( + "encoding/json" + "io" + "os" + "strings" + "testing" + + ghmapping "github.com/github/gh-aw/pkg/github" + "github.com/spf13/cobra" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestNewOutcomesCommand_AddsHistorySubcommand(t *testing.T) { + cmd := NewOutcomesCommand() + require.NotNil(t, cmd) + + historyCmd, _, err := cmd.Find([]string{"history"}) + require.NoError(t, err) + assert.Equal(t, "history", historyCmd.Name()) +} + +func TestBuildHistoricalObjectiveReport(t *testing.T) { + mapping := &ghmapping.ObjectiveMapping{ + LabelToValue: map[string]int{ + "automation": 40, + "testing": 65, + "observability": 70, + }, + MultiLabelLogic: "max", + } + + items := []historicalGitHubItem{ + { + Number: 1, + Title: "Automation only", + URL: "https://example.com/1", + ClosedAt: "2026-06-01T00:00:00Z", + Labels: []struct { + Name string "json:\"name\"" + }{{Name: "automation"}}, + }, + { + Number: 2, + Title: "Observability with testing", + URL: "https://example.com/2", + ClosedAt: "2026-06-02T00:00:00Z", + Labels: []struct { + Name string "json:\"name\"" + }{{Name: "observability"}, {Name: "testing"}}, + }, + { + Number: 3, + Title: "No mapped labels", + URL: "https://example.com/3", + ClosedAt: "2026-06-03T00:00:00Z", + Labels: []struct { + Name string "json:\"name\"" + }{{Name: "docs"}}, + }, + } + + report := buildHistoricalObjectiveReport(historySourceIssues, items, mapping) + + assert.Equal(t, historySourceIssues, report.Source) + assert.Equal(t, 3, report.SampleSize) + assert.Equal(t, 2, report.ScoredItems) + assert.Equal(t, 110, report.TotalObjectiveValue) + require.Len(t, report.ObjectiveBuckets, 3) + assert.Equal(t, "observability", report.ObjectiveBuckets[0].Label) + assert.Equal(t, 70, report.ObjectiveBuckets[0].ContributedValue) + assert.Equal(t, "testing", report.ObjectiveBuckets[1].Label) + assert.Equal(t, "automation", report.ObjectiveBuckets[2].Label) + require.Len(t, report.RepresentativeItems, 2) + assert.Equal(t, 2, report.RepresentativeItems[0].Number) + assert.Equal(t, 1, report.RepresentativeItems[1].Number) +} + +func TestRunOutcomesHistory_JSON(t *testing.T) { + oldRunGH := outcomesHistoryRunGH + defer func() { outcomesHistoryRunGH = oldRunGH }() + + oldStdout := os.Stdout + r, w, err := os.Pipe() + require.NoError(t, err) + os.Stdout = w + defer func() { os.Stdout = oldStdout }() + + outcomesHistoryRunGH = func(spinnerMessage string, args ...string) ([]byte, error) { + if len(args) >= 2 && args[0] == "issue" && args[1] == "list" { + return []byte(`[ + {"number":101,"title":"Issue one","url":"https://example.com/issues/101","closedAt":"2026-06-08T00:00:00Z","labels":[{"name":"automation"}]} + ]`), nil + } + if len(args) >= 2 && args[0] == "pr" && args[1] == "list" { + return []byte(`[ + {"number":202,"title":"PR two","url":"https://example.com/pull/202","mergedAt":"2026-06-08T00:00:00Z","labels":[{"name":"testing"}]} + ]`), nil + } + return nil, assert.AnError + } + + require.NoError(t, os.Setenv("OBJECTIVE_MAPPING_JSON", `{"label_to_value":{"automation":40,"testing":65},"multi_label_logic":"max"}`)) + defer os.Unsetenv("OBJECTIVE_MAPPING_JSON") + + err = RunOutcomesHistory(OutcomesHistoryConfig{RepoOverride: "owner/repo", JSONOutput: true, Limit: 10, Source: historySourceAll}) + require.NoError(t, err) + require.NoError(t, w.Close()) + + output, err := io.ReadAll(r) + require.NoError(t, err) + + var data historicalObjectivesData + require.NoError(t, json.Unmarshal(output, &data)) + require.NotNil(t, data.Issues) + require.NotNil(t, data.PRs) + assert.Equal(t, 40, data.Issues.TotalObjectiveValue) + assert.Equal(t, 65, data.PRs.TotalObjectiveValue) +} + +func TestNewOutcomesHistorySubcommand_InheritsGlobalVerboseFlag(t *testing.T) { + cmd := NewOutcomesHistorySubcommand() + require.NotNil(t, cmd) + + assert.Nil(t, cmd.Flags().Lookup("verbose")) + + root := &cobra.Command{Use: "gh aw"} + root.PersistentFlags().BoolP("verbose", "v", false, "Enable verbose output showing detailed information") + root.AddCommand(cmd) + + inherited := cmd.InheritedFlags().Lookup("verbose") + require.NotNil(t, inherited) + assert.True(t, strings.Contains(inherited.Usage, "verbose output")) +} diff --git a/pkg/github/label_objective_mapping.go b/pkg/github/label_objective_mapping.go new file mode 100644 index 00000000000..8e9fbab7c78 --- /dev/null +++ b/pkg/github/label_objective_mapping.go @@ -0,0 +1,223 @@ +package github + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "slices" + "strings" + + "github.com/github/gh-aw/pkg/logger" +) + +var labelObjectiveMappingLog = logger.New("github:label_objective_mapping") + +// ObjectiveMapping defines how GitHub labels map to numeric objective values. +// This enables any label to be assigned a configurable numeric value, with one central definition place. +type ObjectiveMapping struct { + // LabelToValue maps label names (case-insensitive) to numeric values. + // Example: {"high-priority": 50, "copilot-opt": 50, "critical": 100, "p0": 100, "p1": 50} + LabelToValue map[string]int `json:"label_to_value"` + + // MultiLabelLogic determines how multiple matching labels are combined: + // "sum" = add all matching label values + // "max" = take the highest value (default) + // "first" = use the first match in priority order + MultiLabelLogic string `json:"multi_label_logic"` + + // PriorityLabels defines evaluation order when logic is "first" + // Used to establish precedence when multiple labels match + PriorityLabels []string `json:"priority_labels,omitempty"` +} + +// ComputeObjectiveValue calculates the numeric value for an issue based on its labels. +// Returns 0 if no labels match or if mapping is nil. +func (om *ObjectiveMapping) ComputeObjectiveValue(issueLabels []string) int { + if om == nil || len(om.LabelToValue) == 0 { + return 0 + } + + if len(issueLabels) == 0 { + return 0 + } + + matchingValues := []int{} + matchedLabels := []string{} + + for _, label := range issueLabels { + normalizedLabel := strings.ToLower(strings.TrimSpace(label)) + if val, ok := om.LabelToValue[normalizedLabel]; ok { + matchingValues = append(matchingValues, val) + matchedLabels = append(matchedLabels, label) + } + } + + if len(matchingValues) == 0 { + return 0 + } + + logic := om.MultiLabelLogic + if logic == "" { + logic = "max" // default + } + + switch logic { + case "sum": + total := 0 + for _, v := range matchingValues { + total += v + } + labelObjectiveMappingLog.Printf("Computed objective value via sum: labels=%v, value=%d", matchedLabels, total) + return total + + case "first": + // Return first issue label that's in priority_labels + if len(om.PriorityLabels) > 0 { + for _, issueLabel := range issueLabels { + for _, priorityLabel := range om.PriorityLabels { + if strings.EqualFold(issueLabel, priorityLabel) { + normalizedIssue := strings.ToLower(strings.TrimSpace(issueLabel)) + if val, ok := om.LabelToValue[normalizedIssue]; ok { + labelObjectiveMappingLog.Printf("Computed objective value via issue label priority: label=%s, value=%d", issueLabel, val) + return val + } + } + } + } + } + // Fallback to first matching label + result := matchingValues[0] + labelObjectiveMappingLog.Printf("Computed objective value via first match: labels=%v, value=%d", matchedLabels, result) + return result + + default: // "max" + maxVal := matchingValues[0] + for _, v := range matchingValues { + if v > maxVal { + maxVal = v + } + } + labelObjectiveMappingLog.Printf("Computed objective value via max: labels=%v, value=%d", matchedLabels, maxVal) + return maxVal + } +} + +// DefaultObjectiveMapping returns the built-in default label-to-value mapping. +func DefaultObjectiveMapping() *ObjectiveMapping { + return &ObjectiveMapping{ + LabelToValue: map[string]int{ + "critical": 100, + "p0": 100, + "high-priority": 50, + "copilot-opt": 50, + "p1": 50, + "security-fix": 75, + "p2": 25, + "medium-priority": 25, + "performance": 30, + "p3": 10, + "low-priority": 10, + "documentation": 5, + }, + MultiLabelLogic: "max", + PriorityLabels: []string{"critical", "p0", "copilot-opt", "high-priority", "security-fix", "p1", "performance"}, + } +} + +// LoadObjectiveMappingFromConfig loads the mapping from environment, config file, or defaults. +// Precedence: +// 1. OBJECTIVE_MAPPING_JSON environment variable +// 2. .github/objective-mapping.json file +// 3. Built-in defaults +func LoadObjectiveMappingFromConfig() *ObjectiveMapping { + labelObjectiveMappingLog.Print("Loading objective mapping configuration") + + // Try loading from OBJECTIVE_MAPPING_JSON env var + if mappingJSON := os.Getenv("OBJECTIVE_MAPPING_JSON"); mappingJSON != "" { + labelObjectiveMappingLog.Print("Attempting to load from OBJECTIVE_MAPPING_JSON env var") + var om ObjectiveMapping + if err := json.Unmarshal([]byte(mappingJSON), &om); err == nil { + labelObjectiveMappingLog.Printf("Loaded mapping from env var: %d labels", len(om.LabelToValue)) + return &om + } else { + labelObjectiveMappingLog.Printf("Failed to parse OBJECTIVE_MAPPING_JSON: %v", err) + } + } + + configPath := filepath.Join(".github", "objective-mapping.json") + if data, err := os.ReadFile(configPath); err == nil { + labelObjectiveMappingLog.Printf("Attempting to load from %s", configPath) + var om ObjectiveMapping + if err := json.Unmarshal(data, &om); err == nil { + labelObjectiveMappingLog.Printf("Loaded mapping from config file: %d labels", len(om.LabelToValue)) + return &om + } + labelObjectiveMappingLog.Printf("Failed to parse config file: %v", err) + } + + // Return default mapping + defaults := DefaultObjectiveMapping() + labelObjectiveMappingLog.Printf("Using default mapping: %d labels", len(defaults.LabelToValue)) + return defaults +} + +// GetObjectiveLabels returns the subset of issue labels that have objective values. +// Also returns the labels in the order they appear in the issue's label list. +func (om *ObjectiveMapping) GetObjectiveLabels(issueLabels []string) []string { + if om == nil || len(om.LabelToValue) == 0 { + return []string{} + } + + result := make([]string, 0) + for _, label := range issueLabels { + normalizedLabel := strings.ToLower(strings.TrimSpace(label)) + if _, ok := om.LabelToValue[normalizedLabel]; ok { + result = append(result, label) + } + } + + return result +} + +// MarshalJSON implements json.Marshaler to ensure consistent output. +func (om *ObjectiveMapping) MarshalJSON() ([]byte, error) { + type Alias ObjectiveMapping + return json.MarshalIndent(&struct { + *Alias + }{ + Alias: (*Alias)(om), + }, "", " ") +} + +// String returns a human-readable summary of the mapping. +func (om *ObjectiveMapping) String() string { + if om == nil { + return "nil ObjectiveMapping" + } + return fmt.Sprintf("ObjectiveMapping{labels: %d, logic: %s, priorities: %d}", + len(om.LabelToValue), om.MultiLabelLogic, len(om.PriorityLabels)) +} + +// ValidateLabelExists checks if a given label has a defined objective value. +func (om *ObjectiveMapping) ValidateLabelExists(label string) bool { + if om == nil { + return false + } + normalizedLabel := strings.ToLower(strings.TrimSpace(label)) + _, exists := om.LabelToValue[normalizedLabel] + return exists +} + +// GetAllLabels returns all labels defined in the mapping (sorted). +func (om *ObjectiveMapping) GetAllLabels() []string { + if om == nil { + return []string{} + } + var labels []string + for label := range om.LabelToValue { + labels = append(labels, label) + } + slices.Sort(labels) + return labels +} diff --git a/pkg/github/label_objective_mapping_constants.go b/pkg/github/label_objective_mapping_constants.go new file mode 100644 index 00000000000..dfb0f66cac1 --- /dev/null +++ b/pkg/github/label_objective_mapping_constants.go @@ -0,0 +1,139 @@ +package github + +// DefaultObjectiveLabelValues defines the built-in label-to-value mappings +// specifically tailored for gh-aw (GitHub Agentic Workflows). +// +// These mappings reflect the actual work domains and priorities: +// - Safety/Reliability: Safe outputs, testing, reliability = critical +// - Core engine: Compilation, parsing, workflow execution = critical +// - Integration: MCP tools, GitHub Actions, CLI = important +// - Quality: Bug fixes, performance, linting = important +// - Enhancement: New features, documentation = valuable but lower impact +// +// To customize these mappings: +// 1. Create .github/objective-mapping.json in your repository root +// 2. Set OBJECTIVE_MAPPING_JSON environment variable +// 3. See docs/label-objective-mapping.md for configuration guide +// +// Critical Priority Labels +const ( + ObjectiveLabelCritical = "critical" + ObjectiveLabelP0 = "p0" + ObjectiveValueCritical = 100 + ObjectiveValueP0 = 100 +) + +// Safety-Critical Work (safe outputs, test failures) +const ( + ObjectiveLabelTesting = "testing" + ObjectiveLabelReliability = "reliability" + ObjectiveValueTesting = 50 + ObjectiveValueReliability = 50 +) + +// Core Engine & Compilation +const ( + ObjectiveLabelWorkflow = "workflow" + ObjectiveLabelEngine = "engine" + ObjectiveValueWorkflow = 45 + ObjectiveValueEngine = 40 +) + +// Integration Points +const ( + ObjectiveLabelMCP = "mcp" + ObjectiveLabelActions = "actions" + ObjectiveLabelCLI = "cli" + ObjectiveValueMCP = 45 + ObjectiveValueActions = 40 + ObjectiveValueCLI = 40 +) + +// Bug Fixes (especially core path) +const ( + ObjectiveLabelBug = "bug" + ObjectiveValueBug = 60 +) + +// Security +const ( + ObjectiveLabelSecurityFix = "security-fix" + ObjectiveValueSecurityFix = 70 +) + +// Copilot-Specific Optimizations +const ( + ObjectiveLabelCopilotOpt = "copilot-opt" + ObjectiveValueCopilotOpt = 75 +) + +// High Priority Work +const ( + ObjectiveLabelHighPriority = "high-priority" + ObjectiveLabelP1 = "p1" + ObjectiveValueHighPriority = 35 + ObjectiveValueP1 = 35 +) + +// Code Quality +const ( + ObjectiveLabelLintMonster = "lint-monster" + ObjectiveValueLintMonster = 25 + ObjectiveLabelPerformance = "performance" + ObjectiveValuePerformance = 30 +) + +// Medium Priority Work +const ( + ObjectiveLabelMediumPriority = "medium-priority" + ObjectiveLabelP2 = "p2" + ObjectiveValueMediumPriority = 20 + ObjectiveValueP2 = 20 +) + +// Dependency Management +const ( + ObjectiveLabelDependencies = "dependencies" + ObjectiveValueDependencies = 10 +) + +// Low Priority Work +const ( + ObjectiveLabelLowPriority = "low-priority" + ObjectiveLabelP3 = "p3" + ObjectiveValueLowPriority = 10 + ObjectiveValueP3 = 10 +) + +// Enhancement & Documentation +const ( + ObjectiveLabelEnhancement = "enhancement" + ObjectiveValueEnhancement = 15 + ObjectiveLabelDocumentation = "documentation" + ObjectiveValueDocumentation = 5 +) + +// Workflow/Automation Labels (no objective value) +const ( + ObjectiveLabelAIGenerated = "ai-generated" + ObjectiveValueAIGenerated = 0 + ObjectiveLabelAIInspected = "ai-inspected" + ObjectiveValueAIInspected = 0 + ObjectiveLabelSmokeCopilot = "smoke-copilot" + ObjectiveValueSmokeCopilot = 0 +) + +// Question & Community Labels (no objective value) +const ( + ObjectiveLabelQuestion = "question" + ObjectiveValueQuestion = 0 + ObjectiveLabelGoodFirstIssue = "good first issue" + ObjectiveValueGoodFirstIssue = 0 +) + +// Combination logic options +const ( + MultiLabelLogicMax = "max" // Use highest value (default) + MultiLabelLogicSum = "sum" // Add all values + MultiLabelLogicFirst = "first" // Use first in priority order +) diff --git a/pkg/github/label_objective_mapping_test.go b/pkg/github/label_objective_mapping_test.go new file mode 100644 index 00000000000..b92ff060f1d --- /dev/null +++ b/pkg/github/label_objective_mapping_test.go @@ -0,0 +1,319 @@ +package github + +import ( + "encoding/json" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestObjectiveMapping_ComputeObjectiveValue_Max(t *testing.T) { + mapping := &ObjectiveMapping{ + LabelToValue: map[string]int{ + "critical": 100, + "high-priority": 50, + "medium": 25, + }, + MultiLabelLogic: "max", + } + + tests := []struct { + name string + labels []string + expected int + }{ + {"no labels", []string{}, 0}, + {"no matches", []string{"unknown"}, 0}, + {"single match", []string{"high-priority"}, 50}, + {"multiple matches - max wins", []string{"medium", "high-priority"}, 50}, + {"all matches - highest wins", []string{"critical", "high-priority", "medium"}, 100}, + {"case insensitive", []string{"Critical", "HIGH-PRIORITY"}, 100}, + {"whitespace trimmed", []string{" high-priority ", " medium "}, 50}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := mapping.ComputeObjectiveValue(tt.labels) + assert.Equal(t, tt.expected, result) + }) + } +} + +func TestObjectiveMapping_ComputeObjectiveValue_Sum(t *testing.T) { + mapping := &ObjectiveMapping{ + LabelToValue: map[string]int{ + "critical": 100, + "high-priority": 50, + "medium": 25, + }, + MultiLabelLogic: "sum", + } + + tests := []struct { + name string + labels []string + expected int + }{ + {"no labels", []string{}, 0}, + {"no matches", []string{"unknown"}, 0}, + {"single match", []string{"high-priority"}, 50}, + {"multiple matches - sum", []string{"medium", "high-priority"}, 75}, + {"all matches - sum all", []string{"critical", "high-priority", "medium"}, 175}, + {"duplicate match", []string{"high-priority", "high-priority"}, 100}, // sum counts both + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := mapping.ComputeObjectiveValue(tt.labels) + assert.Equal(t, tt.expected, result) + }) + } +} + +func TestObjectiveMapping_ComputeObjectiveValue_First(t *testing.T) { + mapping := &ObjectiveMapping{ + LabelToValue: map[string]int{ + "critical": 100, + "high-priority": 50, + "medium": 25, + "low": 10, + }, + MultiLabelLogic: "first", + PriorityLabels: []string{"critical", "high-priority", "medium"}, + } + + tests := []struct { + name string + labels []string + expected int + }{ + {"no labels", []string{}, 0}, + {"critical first in priority", []string{"low", "critical", "high-priority"}, 100}, + {"high-priority first in issue labels", []string{"high-priority", "critical"}, 50}, + {"no match in priority, fallback to first", []string{"medium"}, 25}, + {"no priority match, use any", []string{"low"}, 10}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := mapping.ComputeObjectiveValue(tt.labels) + assert.Equal(t, tt.expected, result) + }) + } +} + +func TestObjectiveMapping_ComputeObjectiveValue_Nil(t *testing.T) { + var mapping *ObjectiveMapping + assert.Equal(t, 0, mapping.ComputeObjectiveValue([]string{"any"})) +} + +func TestObjectiveMapping_ComputeObjectiveValue_Empty(t *testing.T) { + mapping := &ObjectiveMapping{} + assert.Equal(t, 0, mapping.ComputeObjectiveValue([]string{"any"})) +} + +func TestObjectiveMapping_GetObjectiveLabels(t *testing.T) { + mapping := &ObjectiveMapping{ + LabelToValue: map[string]int{ + "critical": 100, + "high-priority": 50, + }, + } + + tests := []struct { + name string + labels []string + expected []string + }{ + {"no labels", []string{}, []string{}}, + {"no matches", []string{"unknown", "other"}, []string{}}, + {"single match", []string{"critical", "unknown"}, []string{"critical"}}, + {"multiple matches", []string{"unknown", "critical", "other", "high-priority"}, []string{"critical", "high-priority"}}, + {"case preserved in output", []string{"CRITICAL", "High-Priority"}, []string{"CRITICAL", "High-Priority"}}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := mapping.GetObjectiveLabels(tt.labels) + assert.Equal(t, tt.expected, result) + }) + } +} + +func TestDefaultObjectiveMapping(t *testing.T) { + mapping := DefaultObjectiveMapping() + require.NotNil(t, mapping) + assert.Greater(t, len(mapping.LabelToValue), 0) + assert.Equal(t, "max", mapping.MultiLabelLogic) + assert.Greater(t, len(mapping.PriorityLabels), 0) + + // Verify some expected labels + assert.Equal(t, 100, mapping.LabelToValue["critical"]) + assert.Equal(t, 100, mapping.LabelToValue["p0"]) + assert.Equal(t, 50, mapping.LabelToValue["high-priority"]) + assert.Equal(t, 50, mapping.LabelToValue["copilot-opt"]) +} + +func TestObjectiveMapping_MarshalJSON(t *testing.T) { + mapping := &ObjectiveMapping{ + LabelToValue: map[string]int{ + "critical": 100, + "high": 50, + }, + MultiLabelLogic: "max", + PriorityLabels: []string{"critical"}, + } + + data, err := json.Marshal(mapping) + require.NoError(t, err) + + var unmarshaled ObjectiveMapping + err = json.Unmarshal(data, &unmarshaled) + require.NoError(t, err) + + assert.Equal(t, mapping.LabelToValue, unmarshaled.LabelToValue) + assert.Equal(t, mapping.MultiLabelLogic, unmarshaled.MultiLabelLogic) + assert.Equal(t, mapping.PriorityLabels, unmarshaled.PriorityLabels) +} + +func TestObjectiveMapping_ValidateLabelExists(t *testing.T) { + mapping := &ObjectiveMapping{ + LabelToValue: map[string]int{ + "critical": 100, + "high": 50, + }, + } + + assert.True(t, mapping.ValidateLabelExists("critical")) + assert.True(t, mapping.ValidateLabelExists("CRITICAL")) + assert.True(t, mapping.ValidateLabelExists(" critical ")) + assert.False(t, mapping.ValidateLabelExists("unknown")) + assert.False(t, (*ObjectiveMapping)(nil).ValidateLabelExists("any")) +} + +func TestObjectiveMapping_GetAllLabels(t *testing.T) { + mapping := &ObjectiveMapping{ + LabelToValue: map[string]int{ + "zebra": 10, + "apple": 20, + "banana": 15, + }, + } + + labels := mapping.GetAllLabels() + assert.Equal(t, []string{"apple", "banana", "zebra"}, labels) // sorted +} + +func TestObjectiveMapping_String(t *testing.T) { + mapping := DefaultObjectiveMapping() + str := mapping.String() + assert.Contains(t, str, "ObjectiveMapping") + assert.Contains(t, str, "max") + + var nilMapping *ObjectiveMapping + assert.Equal(t, "nil ObjectiveMapping", nilMapping.String()) +} + +func TestLoadObjectiveMappingFromConfig_EnvVar(t *testing.T) { + // Save original env + originalEnv := os.Getenv("OBJECTIVE_MAPPING_JSON") + defer os.Setenv("OBJECTIVE_MAPPING_JSON", originalEnv) + + // Set test env var + testMapping := `{"label_to_value": {"test-label": 42}, "multi_label_logic": "sum"}` + os.Setenv("OBJECTIVE_MAPPING_JSON", testMapping) + + mapping := LoadObjectiveMappingFromConfig() + require.NotNil(t, mapping) + assert.Equal(t, 42, mapping.LabelToValue["test-label"]) + assert.Equal(t, "sum", mapping.MultiLabelLogic) +} + +func TestLoadObjectiveMappingFromConfig_Default(t *testing.T) { + // Clear env to ensure fallback to default + originalEnv := os.Getenv("OBJECTIVE_MAPPING_JSON") + defer os.Setenv("OBJECTIVE_MAPPING_JSON", originalEnv) + os.Setenv("OBJECTIVE_MAPPING_JSON", "") + + mapping := LoadObjectiveMappingFromConfig() + require.NotNil(t, mapping) + assert.Greater(t, len(mapping.LabelToValue), 0) + assert.Equal(t, "max", mapping.MultiLabelLogic) +} + +func TestLoadObjectiveMappingFromConfig_GitHubPathPreferred(t *testing.T) { + originalEnv := os.Getenv("OBJECTIVE_MAPPING_JSON") + defer os.Setenv("OBJECTIVE_MAPPING_JSON", originalEnv) + os.Setenv("OBJECTIVE_MAPPING_JSON", "") + + originalWD, err := os.Getwd() + require.NoError(t, err) + defer func() { + require.NoError(t, os.Chdir(originalWD)) + }() + + tempDir := t.TempDir() + require.NoError(t, os.MkdirAll(filepath.Join(tempDir, ".github"), 0o755)) + require.NoError(t, os.WriteFile(filepath.Join(tempDir, ".github", "objective-mapping.json"), []byte(`{"label_to_value": {"github-path": 99}, "multi_label_logic": "max"}`), 0o644)) + require.NoError(t, os.Chdir(tempDir)) + + mapping := LoadObjectiveMappingFromConfig() + require.NotNil(t, mapping) + assert.Equal(t, 99, mapping.LabelToValue["github-path"]) + assert.Equal(t, "max", mapping.MultiLabelLogic) +} + +func TestLoadObjectiveMappingFromConfig_IgnoresLegacyPath(t *testing.T) { + originalEnv := os.Getenv("OBJECTIVE_MAPPING_JSON") + defer os.Setenv("OBJECTIVE_MAPPING_JSON", originalEnv) + os.Setenv("OBJECTIVE_MAPPING_JSON", "") + + originalWD, err := os.Getwd() + require.NoError(t, err) + defer func() { + require.NoError(t, os.Chdir(originalWD)) + }() + + tempDir := t.TempDir() + require.NoError(t, os.MkdirAll(filepath.Join(tempDir, ".gh-aw"), 0o755)) + require.NoError(t, os.WriteFile(filepath.Join(tempDir, ".gh-aw", "objective-mapping.json"), []byte(`{"label_to_value": {"legacy-path": 77}, "multi_label_logic": "sum"}`), 0o644)) + require.NoError(t, os.Chdir(tempDir)) + + mapping := LoadObjectiveMappingFromConfig() + require.NotNil(t, mapping) + assert.NotContains(t, mapping.LabelToValue, "legacy-path") + assert.Equal(t, "max", mapping.MultiLabelLogic) +} + +func TestObjectiveMapping_RealWorldScenario(t *testing.T) { + // Test realistic scenario from impact efficiency report + mapping := DefaultObjectiveMapping() + + // Scenario 1: high-priority issue (like in issue #38040) + labels1 := []string{"high-priority"} + value1 := mapping.ComputeObjectiveValue(labels1) + assert.Equal(t, 50, value1) + + // Scenario 2: critical security fix with multiple labels + labels2 := []string{"security-fix", "bug", "high-priority"} + value2 := mapping.ComputeObjectiveValue(labels2) + assert.Equal(t, 75, value2) // max: security-fix=75 + + // Scenario 3: P0 critical issue + labels3 := []string{"p0", "critical-bug"} + value3 := mapping.ComputeObjectiveValue(labels3) + assert.Equal(t, 100, value3) // p0=100 + + // Scenario 4: low-priority work + labels4 := []string{"documentation", "low-priority"} + value4 := mapping.ComputeObjectiveValue(labels4) + assert.Equal(t, 10, value4) // max: low-priority=10 + + // Scenario 5: no objective labels + labels5 := []string{"type:bug", "component:cli"} + value5 := mapping.ComputeObjectiveValue(labels5) + assert.Equal(t, 0, value5) +} diff --git a/specs/objective-mapping-portfolio-reporting.md b/specs/objective-mapping-portfolio-reporting.md new file mode 100644 index 00000000000..0c1ad01df82 --- /dev/null +++ b/specs/objective-mapping-portfolio-reporting.md @@ -0,0 +1,574 @@ + +--- +title: Objective Mapping & Portfolio Reporting Specification +version: 1.1.0 +status: Partially Implemented +date: 2026-06-09 +last_updated: 2026-06-10 +--- + +# Objective Mapping & Portfolio Reporting Specification + +This specification defines a reusable label-to-objective-value mapping layer for GitHub work. It also outlines later phases where that mapping can be applied to safe output outcomes, root issue tracing, and portfolio-level impact reporting. + +## Implementation Scope + +### Phase 1: Implemented Now + +The current implementation is no longer just the bare mapping layer: + +1. A shared GitHub utility loads `.github/objective-mapping.json` +2. Labels are mapped to numeric objective values through `ObjectiveMapping` +3. CLI outcome reporting enriches outcomes with `objective_value`, `objective_labels`, and `traced_root_url` +4. Pull request outcomes trace to linked closing issues before objective values are computed +5. Outcome summaries and per-objective breakdowns aggregate attempted and accepted objective value + +This phase gives GitHub work a single configurable impact vocabulary and already supports basic root-aware outcome measurement. + +### Phase 2+: Later Extensions + +The rest of this document describes natural extensions that may be added later: + +1. Root tracing beyond the current PR-to-closing-issue path, including epic resolution +2. Campaign-level aggregation and filtering in dedicated reports +3. Portfolio reporting workflows that consume the existing objective-enriched outcome data +4. Cost-aware efficiency metrics using real AI Credits data +5. Strategic analysis over delivered impact + +Those extensions are design targets, not required complexity for trying the mapping itself. + +## Overview: Impact Measurement Through Root Cause Tracing + +Longer term, objective mapping answers: **What impact did we create?** By connecting work back to the root problems it solves. + +For the MVP, the answer is simpler: **what value do these labels represent?** + +For the extended model, that grows into: + +1. **Root Problems** — Issues and epics represent actual business objectives (with PM-assigned impact values) +2. **Tracing** — Safe outputs (PRs, comments) trace backward to root issues/epics +3. **Impact Scoring** — Objectives are assigned to root problems, not intermediate artifacts +4. **Portfolio Reporting** — Aggregates impact by objective to show which business goals are being achieved + +This enables questions like: +- **What high-priority problems did we solve?** (Trace accepted PRs back to root issues, check labels) +- **Which initiatives made the most progress?** (Aggregate impact by epic) +- **What value did we create per objective?** (Sum impact values for accepted outcomes) +- **What's our ROI by problem domain?** (Efficiency = accepted value / attempted value) + +## Extended Architecture: Trace → Root → Map → Aggregate + +This architecture describes the fuller impact model beyond the current MVP mapping layer. + +### Components + +1. **Root Tracing** — Traces safe outputs (PRs, issues) back to root issue or epic +2. **ObjectiveMapping** — Maps root issue/epic labels to numeric impact values +3. **Configuration File** — `.github/objective-mapping.json` with objective values +4. **Outcome Enrichment** — GitHub API queries to fetch root objects and their labels +5. **Portfolio Report** — Aggregates impact by objective, showing what value was created + +### Design Principles + +1. **Root Source of Truth** — Objectives are assigned to root issues/epics, not PRs or comments +2. **Traceability** — All work must trace back through GitHub's native linking (PR → issue) +3. **Centralized Configuration** — Single source of truth at `.github/objective-mapping.json` +4. **PM-Assigned Impact** — Labels on root objects represent business priorities +5. **Portfolio Visibility** — Aggregates show what problems were solved, what impact created + +## Campaigns & Objective Alignment + +### How Campaigns Work With Objectives + +A **campaign** is a bounded initiative organized around specific business objectives. Examples: + +- **"Q2 Performance Month"** — Campaign to improve latency (objective label: `initiative-performance`) +- **"Auth System Redesign"** — Major initiative (objective label: `epic-auth`) +- **"Critical Bug Fixes"** — Campaign to resolve urgent issues (objective label: `critical`) +- **"Testing Infrastructure"** — Initiative to improve test coverage (objective label: `testing`) + +### Campaign → Objectives → Root Issues → Impact Measurement + +``` +Campaign: "Q2 Performance Month" + ↓ +Assigned Objectives: initiative-performance (300 points) + ↓ +Root Issues Created: #1234, #5678, #9012 +(all labeled with "initiative-performance") + ↓ +Agent Creates PRs & Reviews (safe outputs) + ↓ +Safe Outputs Accepted/Rejected + ↓ +[Root Tracing] Trace PRs back to root issues + ↓ +[Objective Mapping] Fetch labels from root issues (initiative-performance) + ↓ +[Portfolio Report] Aggregate by campaign objectives: + Total attempted: 1500 points (5 PRs × 300 points each) + Delivered: 1200 points (4 accepted × 300 points) + Efficiency: 80% → Campaign on track, good progress +``` + +### Campaign Reporting + +Portfolio reports can be filtered by campaign to answer: + +| Question | Answer | Example | +|----------|--------|---------| +| **How is this campaign doing?** | Efficiency metric | "Performance Month: 75% delivered (1500/2000 points)" | +| **Which campaigns succeeded?** | High efficiency campaigns | "Auth redesign: 90% complete, critical bugs: 85% complete" | +| **Which need intervention?** | Low efficiency campaigns | "Low-priority features: only 40% delivered, investigate blocker" | +| **What's total campaign impact?** | Sum all delivered objectives | "This quarter we delivered 12,000 points across 8 campaigns" | + +### Configuration for Campaigns + +Add campaign-level labels to `.github/objective-mapping.json`: + +```json +{ + "label_to_value": { + "epic-auth": 500, + "epic-performance": 300, + "initiative-modernize": 400, + "campaign-q2-testing": 200, + "critical": 100, + "p0": 100, + "p1": 50 + }, + "multi_label_logic": "max", + "priority_labels": ["epic-auth", "epic-performance", "initiative-modernize", "campaign-q2-testing", "critical"] +} +``` + +**Strategy:** Campaigns typically use `multi_label_logic: "max"` so that a PR addressing both a campaign objective and a critical issue gets the higher value (captures the most important aspect). + +### Connecting Campaigns to Execution + +**Setup:** +1. PM defines campaign with clear objectives (e.g., "Ship auth redesign by EOQ") +2. Assign objective label to campaign (e.g., `epic-auth` with value 500) +3. All root issues in the campaign are labeled with that objective +4. Agent runs workflow against those issues + +**Measurement:** +1. Safe outputs traced back to root issues via PR links +2. Root issues have objective label (`epic-auth`) +3. Portfolio report aggregates by campaign objective +4. Shows: "Campaign delivered 500/600 planned points (83% success)" + +**Strategic Alignment:** +- Campaigns are how business divides work into initiatives +- Objectives are the labels that mark root issues as part of that campaign +- Impact measurement answers: "Did the campaign deliver what was planned?" + +## Configuration + +### File Format + +The configuration file maps objective labels (typically on root issues/epics) to impact values: + +```json +{ + "_comment": "Impact mapping for business objectives. Labels are assigned to root issues/epics by PM/team.", + "label_to_value": { + "epic-auth": 500, + "initiative-performance": 300, + "critical": 100, + "p0": 100, + "p1": 50 + }, + "multi_label_logic": "max", + "priority_labels": ["epic-auth", "initiative-performance", "critical", "p0"] +} +``` + +### Location & Precedence + +Objectives are loaded in this order (first found wins): + +1. **Environment Variable** — `OBJECTIVE_MAPPING_JSON` (full JSON string or file path) +2. **Repository File** — `.github/objective-mapping.json` +3. **Built-in Defaults** — Fallback with standard objectives + +### Typical Objective Labels + +These are assigned by PMs/teams to root issues and epics: + +- **Epics** (e.g., `epic-auth`, `initiative-modernize`) — Major initiatives worth 300–500 impact +- **Critical** (e.g., `critical`, `p0`) — Must-fix problems worth 100 impact +- **High-priority** (e.g., `p1`) — Important work worth 50 impact +- **Domains** (e.g., `security`, `performance`) — Strategic focus areas worth 30–80 impact + +### Multi-Label Logic + +When an outcome has multiple objective labels, the system applies one of three strategies: + +| Strategy | Behavior | Use Case | Example | +|----------|----------|----------|---------| +| **max** (default) | Uses highest value | Risk-based prioritization | `[bug, p0]` → 100 | +| **sum** | Adds all values | Cumulative impact | `[performance, workflow]` → 75 | +| **first** | Uses priority order | Organizational hierarchy | `[p0, testing]` → 100 (p0 first) | + +#### Example: Multi-Label Computation + +Given labels `[bug, p0, testing]` with values `{bug: 70, p0: 100, testing: 75}`: + +``` +max: max(70, 100, 75) = 100 +sum: 70 + 100 + 75 = 245 +first: depends on priority_labels order +``` + +## Current Outcome Integration and Remaining Root-Tracing Gaps + +This section describes what is already implemented in the CLI today and what still remains future work. + +### The Problem This Solves + +When a PR is merged (safe output accepted), we need to know: **What business objective did it deliver?** + +- The PR itself may have no labels +- The PR links to one or more issues +- Those issues contain the real business labels +- We must trace PR → issue → get labels → map to impact value + +This is how GitHub always worked: root issue describes the problem, PRs are the solution. + +### Data Flow + +``` +Safe output created (e.g., "create_pull_request") + ↓ +[EvaluateOutcomes] → outcome = "accepted" (merged) or "rejected" (closed) + ↓ +[enrichOutcomeWithObjectiveValue] + 1. For PR outcomes: GitHub API trace via closing issues + 2. For direct issue outcomes, or if PR tracing fails: fetch labels from the issue itself + 3. Use labels from the traced root object, not PR labels + 4. Store traced_root_url for audit trail + ↓ +ObjectiveMapping.ComputeObjectiveValue(root_labels) + ↓ +OutcomeReport populated with: + - objective_value: int + - objective_labels: []string + - traced_root_url: string + ↓ [ComputeOutcomeSummary] +OutcomeSummary aggregates: + - total_objective_value (what we attempted) + - accepted_objective_value (what succeeded) + - objective_efficiency (success rate by value) + ↓ [ComputeDomainBreakdowns] +DomainBreakdown per objective: + - attempted: count of work toward this objective + - accepted: count successfully delivered + - total_objective_value: impact points we attempted + - accepted_objective_value: impact points we delivered + - objective_efficiency: % of value we succeeded on +``` + +Current limitation: epic resolution is still a future extension. The implemented trace path is PR → closing issue, with fallback to direct issue labels. + +### Root Resolution Algorithm + +```go +func traceOutcomeRoot(obj GitHubObject, repo string) GitHubObject { + if obj.Type == "PullRequest" { + if len(obj.ClosingIssues) > 0 { + return obj.ClosingIssues[0] + } + } + + return obj +} +``` + +Future extension: + +```go +func traceToRootIssueOrEpic(obj GitHubObject, repo string) GitHubObject { + root := traceOutcomeRoot(obj, repo) + if root.Type == "Issue" && root.EpicLink != nil { + return root.EpicLink + } + return root +} +``` + +### Why Root Tracing Matters + +**Example: PR for "fix auth bug"** + +Without tracing: +- PR has no labels → objective_value = 0 → shows no impact + +With the currently implemented tracing: +- PR links to issue #1234 (labeled `agentic-campaign`, `security`) +- Root issue labels feed the mapping → objective_value is computed from the configured label map +- Outcome summaries and objective breakdowns reflect delivered value on the root issue labels + +This is the **only way** to measure what business value was created, because PRs don't carry the semantic meaning — issues do. + +## Future Portfolio Reporting: Measuring What Value Was Created + +### The Question It Answers + +Instead of: **"How many PRs did we merge?"** (25 PRs, so what?) + +This asks: **"How much business value did we deliver?"** (We aimed for 1000 impact points on our critical objectives, delivered 750, 75% success rate) + +### Impact Metrics + +Each objective (label assigned to root issues/epics by PM) shows: + +| Metric | Meaning | Example | +|--------|---------|---------| +| **Attempted** | Work started toward this objective | 20 PRs addressing `epic-auth` | +| **Accepted** | Work successfully delivered | 15 of 20 PRs merged | +| **Total Impact** | Value we tried to deliver | 20 attempts × 100 points = 2000 | +| **Delivered Impact** | Value we actually delivered | 15 successes × 100 points = 1500 | +| **Efficiency** | Percentage of value achieved | 1500 / 2000 = 75% ✅ Good progress | + +### Objective Breakdown Metrics + +Each objective is aggregated with these metrics: + +| Field | Type | Meaning | +|-------|------|---------| +| `label` | string | Business objective (e.g., `epic-auth`, `critical`) | +| `attempted` | int | Total work started on this objective | +| `accepted` | int | Work successfully delivered | +| `rejected` | int | Work that failed or was rejected | +| `pending` | int | Work still in progress | +| `total_objective_value` | int | Impact value attempted (sum of all values) | +| `accepted_objective_value` | int | Impact value delivered (sum of accepted values) | +| `objective_efficiency` | float64 | accepted / total (percentage of planned value realized) | +| `acceptance_rate` | float64 | accepted / attempted (percentage of work that succeeded) | + +### Example: Portfolio Impact Report + +```json +{ + "total": 50, + "accepted": 35, + "objective_efficiency": 0.75, + "domain_breakdowns": [ + { + "label": "epic-auth", + "attempted": 20, + "accepted": 15, + "rejected": 4, + "pending": 1, + "total_objective_value": 10000, + "accepted_objective_value": 7500, + "objective_efficiency": 0.75, + "acceptance_rate": 0.75 + }, + { + "label": "critical", + "attempted": 15, + "accepted": 14, + "rejected": 1, + "pending": 0, + "total_objective_value": 1500, + "accepted_objective_value": 1400, + "objective_efficiency": 0.93, + "acceptance_rate": 0.93 + }, + { + "label": "p1", + "attempted": 15, + "accepted": 6, + "rejected": 8, + "pending": 1, + "total_objective_value": 750, + "accepted_objective_value": 300, + "objective_efficiency": 0.40, + "acceptance_rate": 0.40 + } + ] +} +``` + +### What This Report Says + +- **Epic-auth**: Aimed for 10,000 impact on this initiative, delivered 7,500 (75% success) → Continue but monitor +- **Critical**: Aimed for 1,500 impact, delivered 1,400 (93% success) → Excellent, keep strategy +- **P1**: Aimed for 750 impact, delivered only 300 (40% success) → Investigate issues, may need human review + +### Performance Analysis: Impact-Based Insights + +The `AnalyzeDomainPerformance()` function interprets efficiency to answer: **How well are we delivering on this objective?** + +| Efficiency | Status | Meaning | +|-----------|--------|---------| +| ≥ 90% | excellent | Delivering nearly all planned value → Keep strategy, scale if possible | +| ≥ 75% | good | Strong progress on objective → Monitor for regressions, maintain discipline | +| ≥ 50% | fair | Moderate success, room to improve → Review process, may need human guidance | +| < 50% | poor | Failing to deliver value → Investigate root cause, pause or redesign automation | + +**Example Interpretations:** + +- **epic-auth at 75%**: Started with 10,000 impact points planned, delivering 7,500. We're solving most auth problems successfully, but some are slipping. Review what's failing. +- **critical at 93%**: Nearly perfect on critical issues. Strategy is working. Could increase volume. +- **p1 at 40%**: Only delivering 40% of planned p1 work. Major problems here — investigate before continuing. + +## Business Impact Model + +### Key Insight: Root Tracing is Non-Negotiable + +This system only works if we trace back to root issues/epics. Here's why: + +1. **Root issues carry business semantics** — They're labeled by PMs with strategic intent +2. **PRs are tactical** — They're solutions, not problems; they shouldn't carry business labels +3. **GitHub's native model** — Issues represent work-to-do, PRs represent work-done +4. **Audit trail** — We can show exactly which business problems were solved + +### Example: Bad vs. Good Impact Measurement + +**Without Tracing (Bad):** +``` +Safe output: PR #456 merged +PR labels: none +Conclusion: No impact (objectiveValue = 0) +``` + +**With Tracing (Good):** +``` +Safe output: PR #456 merged +PR linked to: Issue #123 "Fix auth token expiry" +Issue #123 labels: epic-auth, critical +Conclusion: 100 impact points delivered (from critical label) +Portfolio: "We delivered a critical fix to the auth epic" +``` + +## Data: How Objectives Should Be Assigned + +PMs assign objectives by labeling root issues/epics. Examples: + +| Root Object | Labels | Impact | Meaning | +|---|---|---|---| +| Issue #1234 | `epic-auth` | 500 | Work toward major auth initiative | +| Issue #5678 | `critical` | 100 | Must-fix bug blocking users | +| Issue #9012 | `p1` | 50 | Important enhancement | +| Epic "Modernize API" | `initiative-api-v2` | 1000 | Major multi-quarter initiative | + +When a PR closes one of these issues, it inherits the impact value. + +## API & Functions + +### ObjectiveMapping + +```go +type ObjectiveMapping struct { + LabelToValue map[string]int `json:"label_to_value"` + MultiLabelLogic string `json:"multi_label_logic"` + PriorityLabels []string `json:"priority_labels"` +} + +// Compute value from labels using configured strategy +func (om *ObjectiveMapping) ComputeObjectiveValue(labels []string) int + +// Get objective labels (mapped labels only) +func (om *ObjectiveMapping) GetObjectiveLabels(labels []string) []string + +// Load from config file, env var, or defaults +func LoadObjectiveMappingFromConfig() *ObjectiveMapping + +// Get built-in defaults +func DefaultObjectiveMapping() *ObjectiveMapping +``` + +### Objective Breakdown + +```go +type DomainBreakdown struct { + Label string `json:"label"` // Objective label (e.g., "epic-auth") + Attempted int `json:"attempted"` // Count of work toward this objective + Accepted int `json:"accepted"` // Count successfully delivered + Rejected int `json:"rejected"` // Count that failed + Pending int `json:"pending"` // Count in progress + TotalObjectiveValue int `json:"total_objective_value"` // Impact attempted + AcceptedObjectiveValue int `json:"accepted_objective_value"` // Impact delivered + ObjectiveEfficiency float64 `json:"objective_efficiency"` // Efficiency % + AcceptanceRate float64 `json:"acceptance_rate"` // Success % +} + +// Aggregate outcomes by objective label +func ComputeDomainBreakdowns(reports []OutcomeReport) []DomainBreakdown + +// Generate strategic insight +func AnalyzeDomainPerformance(breakdown DomainBreakdown) DomainInsight +``` + +## Testing + +### Unit Tests + +The `label_objective_mapping_test.go` covers: + +- Max/sum/first combination logics with multiple label scenarios +- Case insensitivity and whitespace trimming +- Nil and empty slice handling +- Combined label computation +- Real-world scenarios (e.g., `[bug, p0]`, `[performance, workflow]`) + +All tests must pass before deployment: + +```bash +go test ./pkg/github -run TestObjectiveMapping +go test ./pkg/cli -run "TestComputeOutcomeSummary|TestEvaluateOutcomes" +``` + +### Current Integration Tests + +Current tests verify: + +1. Objective values are computed with the configured combination strategy +2. Pull request outcomes trace to closing issues before label evaluation +3. Direct issue-label fallback works when PR tracing is unavailable +4. Objective summaries and breakdowns aggregate correctly +5. Audit trail (`traced_root_url`) is recorded correctly + +### Future Integration Tests + +Additional end-to-end testing should verify: + +1. Root tracing correctly follows PR → issue → epic links once epic support exists +2. Labels fetched from final root objects, not from intermediate artifacts +3. Cost-aware efficiency metrics calculate accurately when AI Credits data is available + +## Extended Performance Considerations + +1. **Mapping Loaded Once** — At CLI startup, reused for all outcomes +2. **GitHub API Calls** — One call per outcome to fetch labels (async batch recommended) +3. **Aggregation** — O(n) scan of outcomes to compute domains +4. **Memory** — Domain map is O(unique labels), typically < 100 entries + +## Error Handling + +| Error | Behavior | Recovery | +|-------|----------|----------| +| Missing config file | Use defaults | Application continues | +| Invalid JSON in config | Use defaults, log error | Application continues | +| GitHub API 404 | Skip enrichment, value = 0 | Outcome evaluates normally | +| GitHub API 5xx | Log error, skip enrichment | Retry on next evaluation cycle | +| Invalid label in config | Ignored | Mapping continues with valid labels | + +## Future Extensions + +1. **Batch Root Tracing** — Async batch fetching of root issues to reduce GitHub API rate limits +2. **Impact Trends** — Track efficiency trends over time (e.g., "epic-auth efficiency improved from 60% to 75%") +3. **Multi-Issue Links** — Handle PRs linked to multiple issues with different objectives +4. **Epic Hierarchies** — Support nested epics (epic → parent epic → get labels from both) +5. **Workflowized Portfolio Reports** — Dedicated reporting workflows built on the existing objective-enriched outcome data +6. **AI Credits Integration** — Factor in real run-cost data for value-per-credit reporting +7. **Predictive Efficiency** — Use historical efficiency to forecast likely delivery rate + +## References + +- [Safe Output Outcome Evaluation Specification](./safe-output-outcome-evaluation.md) +- [AI Credits Specification](../docs/src/content/docs/specs/ai-credits-specification.md) +- Implementation: `pkg/github/label_objective_mapping.go`, `pkg/cli/outcome_domain_breakdown.go` From ecfa1638285069a87f1e2d3964fb5bd8779af5be Mon Sep 17 00:00:00 2001 From: Mara Nikola Kiefer <8320933+mnkiefer@users.noreply.github.com> Date: Wed, 10 Jun 2026 08:45:54 +0200 Subject: [PATCH 2/7] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- pkg/cli/outcomes_history_test.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pkg/cli/outcomes_history_test.go b/pkg/cli/outcomes_history_test.go index 0f6c88afa86..bd75161112c 100644 --- a/pkg/cli/outcomes_history_test.go +++ b/pkg/cli/outcomes_history_test.go @@ -41,9 +41,8 @@ func TestBuildHistoricalObjectiveReport(t *testing.T) { URL: "https://example.com/1", ClosedAt: "2026-06-01T00:00:00Z", Labels: []struct { - Name string "json:\"name\"" + Name string `json:"name"` }{{Name: "automation"}}, - }, { Number: 2, Title: "Observability with testing", From 9673df8bd991d1f983ecb70b79222b5e1607f8ee Mon Sep 17 00:00:00 2001 From: Mara Nikola Kiefer <8320933+mnkiefer@users.noreply.github.com> Date: Wed, 10 Jun 2026 08:46:11 +0200 Subject: [PATCH 3/7] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- pkg/cli/outcomes_history_test.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pkg/cli/outcomes_history_test.go b/pkg/cli/outcomes_history_test.go index bd75161112c..ff42b643f30 100644 --- a/pkg/cli/outcomes_history_test.go +++ b/pkg/cli/outcomes_history_test.go @@ -49,9 +49,8 @@ func TestBuildHistoricalObjectiveReport(t *testing.T) { URL: "https://example.com/2", ClosedAt: "2026-06-02T00:00:00Z", Labels: []struct { - Name string "json:\"name\"" + Name string `json:"name"` }{{Name: "observability"}, {Name: "testing"}}, - }, { Number: 3, Title: "No mapped labels", From 7254f521661b796fe83a9323842348724fcf77e9 Mon Sep 17 00:00:00 2001 From: Mara Nikola Kiefer <8320933+mnkiefer@users.noreply.github.com> Date: Wed, 10 Jun 2026 08:46:25 +0200 Subject: [PATCH 4/7] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- pkg/cli/outcomes_history_test.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pkg/cli/outcomes_history_test.go b/pkg/cli/outcomes_history_test.go index ff42b643f30..249acb136d6 100644 --- a/pkg/cli/outcomes_history_test.go +++ b/pkg/cli/outcomes_history_test.go @@ -57,9 +57,8 @@ func TestBuildHistoricalObjectiveReport(t *testing.T) { URL: "https://example.com/3", ClosedAt: "2026-06-03T00:00:00Z", Labels: []struct { - Name string "json:\"name\"" + Name string `json:"name"` }{{Name: "docs"}}, - }, } report := buildHistoricalObjectiveReport(historySourceIssues, items, mapping) From 0abeaffe5b6d025849cc6c498f0b7d10ec04e47d Mon Sep 17 00:00:00 2001 From: Mara Nikola Kiefer <8320933+mnkiefer@users.noreply.github.com> Date: Wed, 10 Jun 2026 08:46:51 +0200 Subject: [PATCH 5/7] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- pkg/github/label_objective_mapping_constants.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/github/label_objective_mapping_constants.go b/pkg/github/label_objective_mapping_constants.go index dfb0f66cac1..9a5eb250a40 100644 --- a/pkg/github/label_objective_mapping_constants.go +++ b/pkg/github/label_objective_mapping_constants.go @@ -12,8 +12,8 @@ package github // // To customize these mappings: // 1. Create .github/objective-mapping.json in your repository root -// 2. Set OBJECTIVE_MAPPING_JSON environment variable -// 3. See docs/label-objective-mapping.md for configuration guide +// 2. Set OBJECTIVE_MAPPING_JSON environment variable (JSON string or file path) +// 3. See specs/objective-mapping-portfolio-reporting.md for configuration details // // Critical Priority Labels const ( From a2ec98802c37486688fb0734ac62216669eec759 Mon Sep 17 00:00:00 2001 From: Mara Nikola Kiefer <8320933+mnkiefer@users.noreply.github.com> Date: Wed, 10 Jun 2026 08:47:13 +0200 Subject: [PATCH 6/7] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- pkg/github/label_objective_mapping.go | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/pkg/github/label_objective_mapping.go b/pkg/github/label_objective_mapping.go index 8e9fbab7c78..589a98dd2ee 100644 --- a/pkg/github/label_objective_mapping.go +++ b/pkg/github/label_objective_mapping.go @@ -138,10 +138,21 @@ func LoadObjectiveMappingFromConfig() *ObjectiveMapping { labelObjectiveMappingLog.Print("Attempting to load from OBJECTIVE_MAPPING_JSON env var") var om ObjectiveMapping if err := json.Unmarshal([]byte(mappingJSON), &om); err == nil { - labelObjectiveMappingLog.Printf("Loaded mapping from env var: %d labels", len(om.LabelToValue)) + labelObjectiveMappingLog.Printf("Loaded mapping from env var JSON: %d labels", len(om.LabelToValue)) return &om } else { - labelObjectiveMappingLog.Printf("Failed to parse OBJECTIVE_MAPPING_JSON: %v", err) + labelObjectiveMappingLog.Printf("Failed to parse OBJECTIVE_MAPPING_JSON as JSON: %v", err) + } + + // If it's not valid JSON, treat it as a file path. + if data, err := os.ReadFile(mappingJSON); err == nil { + if err := json.Unmarshal(data, &om); err == nil { + labelObjectiveMappingLog.Printf("Loaded mapping from env var file %q: %d labels", mappingJSON, len(om.LabelToValue)) + return &om + } + labelObjectiveMappingLog.Printf("Failed to parse OBJECTIVE_MAPPING_JSON file %q: %v", mappingJSON, err) + } else { + labelObjectiveMappingLog.Printf("Could not read OBJECTIVE_MAPPING_JSON as file %q: %v", mappingJSON, err) } } From 961215408ffe83f2d93186ab8c3e53627825dd45 Mon Sep 17 00:00:00 2001 From: Mara Nikola Kiefer <8320933+mnkiefer@users.noreply.github.com> Date: Wed, 10 Jun 2026 08:47:37 +0200 Subject: [PATCH 7/7] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- pkg/github/label_objective_mapping_constants.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pkg/github/label_objective_mapping_constants.go b/pkg/github/label_objective_mapping_constants.go index 9a5eb250a40..16e256fe8de 100644 --- a/pkg/github/label_objective_mapping_constants.go +++ b/pkg/github/label_objective_mapping_constants.go @@ -1,8 +1,9 @@ package github -// DefaultObjectiveLabelValues defines the built-in label-to-value mappings -// specifically tailored for gh-aw (GitHub Agentic Workflows). +// Objective label/value constants used by the objective-mapping feature. // +// Note: keep these values in sync with DefaultObjectiveMapping (pkg/github/label_objective_mapping.go) +// and/or the repository-level .github/objective-mapping.json to avoid divergent scoring semantics. // These mappings reflect the actual work domains and priorities: // - Safety/Reliability: Safe outputs, testing, reliability = critical // - Core engine: Compilation, parsing, workflow execution = critical