diff --git a/pkg/cli/outcome_eval.go b/pkg/cli/outcome_eval.go index 707d9f3e5e3..648a4340db6 100644 --- a/pkg/cli/outcome_eval.go +++ b/pkg/cli/outcome_eval.go @@ -8,6 +8,7 @@ import ( "time" "github.com/github/gh-aw/pkg/github" + "github.com/github/gh-aw/pkg/intent" "github.com/github/gh-aw/pkg/logger" "github.com/github/gh-aw/pkg/workflow" ) @@ -37,6 +38,8 @@ type OutcomeReport struct { ObjectURL string `json:"object_url,omitempty" console:"header:URL,omitempty"` ObjectNumber int `json:"object_number,omitempty" console:"header:#,omitempty"` TracedRootURL string `json:"traced_root_url,omitempty" console:"-"` + AttributionStatus string `json:"attribution_status,omitempty" console:"-"` + AttributionSource string `json:"attribution_source,omitempty" console:"-"` Repo string `json:"repo,omitempty" console:"header:Repo,omitempty"` Result OutcomeResult `json:"result" console:"header:Outcome"` Detail string `json:"detail,omitempty" console:"header:Detail,omitempty"` @@ -402,16 +405,18 @@ func enrichOutcomeWithObjectiveValue(report *OutcomeReport, repo string, mapping outcomeEvalLog.Printf("Computing objective value: type=%s, repo=%s, number=%d", report.Type, repo, num) - root, err := traceOutcomeRoot(*report, repo) + resolvedIntent, err := resolveOutcomeIntent(*report, repo, mapping) if err != nil { outcomeEvalLog.Printf("Could not trace root for objective value computation: %v", err) return } - report.TracedRootURL = root.URL + report.AttributionStatus = string(resolvedIntent.Status) + report.AttributionSource = string(resolvedIntent.Source) + report.TracedRootURL = resolvedIntent.RootURL - labelNames := root.Labels + labelNames := resolvedIntent.Labels if len(labelNames) > 0 { - outcomeEvalLog.Printf("Fetched root labels for %s#%d: root=%s labels=%v", repo, num, root.URL, labelNames) + outcomeEvalLog.Printf("Fetched root labels for %s#%d: root=%s labels=%v", repo, num, resolvedIntent.RootURL, labelNames) } // Compute objective value @@ -423,17 +428,18 @@ func enrichOutcomeWithObjectiveValue(report *OutcomeReport, repo string, mapping outcomeEvalLog.Printf("Computed objective value for %s#%d: value=%d, labels=%v", repo, num, objectiveValue, objectiveLabels) } -type tracedOutcomeRoot struct { - URL string - Number int - Labels []string -} +func resolveOutcomeIntent(report OutcomeReport, repo string, mapping *github.ObjectiveMapping) (intent.IntentRecord, error) { + resolver := intent.Resolver{ + ResolverVersion: "outcome-eval-v1", + MatchLabels: func(labels []string) []string { + return mapping.GetObjectiveLabels(labels) + }, + } -func traceOutcomeRoot(report OutcomeReport, repo string) (tracedOutcomeRoot, error) { if isPullRequestOutcomeType(report.Type) { - root, err := tracePullRequestRoot(report.ObjectNumber, repo) - if err == nil && root.Number > 0 { - return root, nil + prIntent, err := resolvePullRequestIntent(report, repo, resolver) + if err == nil { + return prIntent, nil } if err != nil { outcomeEvalLog.Printf("Falling back to direct labels after PR root trace failure: %v", err) @@ -442,13 +448,9 @@ func traceOutcomeRoot(report OutcomeReport, repo string) (tracedOutcomeRoot, err labels, err := objectiveMappingGHAPIGetArray(fmt.Sprintf("issues/%d/labels", report.ObjectNumber), repo) if err != nil { - return tracedOutcomeRoot{}, err + return intent.IntentRecord{}, err } - return tracedOutcomeRoot{ - URL: report.ObjectURL, - Number: report.ObjectNumber, - Labels: labelsToStringsFromMaps(labels), - }, nil + return resolver.ResolveIssue("", report.ObjectURL, labelsToStringsFromMaps(labels)), nil } func isPullRequestOutcomeType(outcomeType string) bool { @@ -462,18 +464,29 @@ func isPullRequestOutcomeType(outcomeType string) bool { } } -func tracePullRequestRoot(prNumber int, repo string) (tracedOutcomeRoot, error) { +func resolvePullRequestIntent(report OutcomeReport, repo string, resolver intent.Resolver) (intent.IntentRecord, error) { + prData, err := loadPullRequestIntentData(report, repo) + if err != nil { + return intent.IntentRecord{}, err + } + return resolver.ResolvePullRequest(prData), nil +} + +func loadPullRequestIntentData(report OutcomeReport, repo string) (intent.PullRequestData, error) { + prNumber := report.ObjectNumber ownerRepo, _ := normalizeRepoForAPI(repo) owner, name, found := strings.Cut(ownerRepo, "/") if !found || owner == "" || name == "" { - return tracedOutcomeRoot{}, fmt.Errorf("invalid repo for root tracing: %s", repo) + return intent.PullRequestData{}, fmt.Errorf("invalid repo for root tracing: %s", repo) } query := fmt.Sprintf(`query { repository(owner: "%s", name: "%s") { pullRequest(number: %d) { + id closingIssuesReferences(first: 10) { nodes { + id number url labels(first: 20) { @@ -491,30 +504,45 @@ func tracePullRequestRoot(prNumber int, repo string) (tracedOutcomeRoot, error) result, err := objectiveMappingGHAPIGraphQL(query, repo) if err != nil { - return tracedOutcomeRoot{}, err + return intent.PullRequestData{}, err } data, _ := result["data"].(map[string]any) repository, _ := data["repository"].(map[string]any) pullRequest, _ := repository["pullRequest"].(map[string]any) + prData := intent.PullRequestData{URL: report.ObjectURL} + if nodeID, ok := pullRequest["id"].(string); ok { + prData.NodeID = nodeID + } closingRefs, _ := pullRequest["closingIssuesReferences"].(map[string]any) nodes, _ := closingRefs["nodes"].([]any) if len(nodes) == 0 { - return tracedOutcomeRoot{}, fmt.Errorf("no closing issues found for PR #%d", prNumber) - } - firstNode, _ := nodes[0].(map[string]any) - root := tracedOutcomeRoot{} - if url, ok := firstNode["url"].(string); ok { - root.URL = url - } - if number, ok := firstNode["number"].(float64); ok { - root.Number = int(number) + labels, labelErr := objectiveMappingGHAPIGetArray(fmt.Sprintf("issues/%d/labels", report.ObjectNumber), repo) + if labelErr != nil { + return intent.PullRequestData{}, labelErr + } + prData.Labels = labelsToStringsFromMaps(labels) + return prData, nil } - if labels, ok := firstNode["labels"].(map[string]any); ok { - if labelNodes, ok := labels["nodes"].([]any); ok { - root.Labels = labelsToStringsFromNodes(labelNodes) + + prData.ClosingIssues = make([]intent.RootReference, 0, len(nodes)) + for _, node := range nodes { + rootNode, _ := node.(map[string]any) + root := intent.RootReference{Type: "issue"} + if nodeID, ok := rootNode["id"].(string); ok { + root.NodeID = nodeID } + if url, ok := rootNode["url"].(string); ok { + root.URL = url + } + if labels, ok := rootNode["labels"].(map[string]any); ok { + if labelNodes, ok := labels["nodes"].([]any); ok { + root.Labels = labelsToStringsFromNodes(labelNodes) + } + } + prData.ClosingIssues = append(prData.ClosingIssues, root) } - return root, nil + + return prData, nil } func labelsToStringsFromNodes(nodes []any) []string { diff --git a/pkg/cli/outcome_eval_test.go b/pkg/cli/outcome_eval_test.go index 7d6c7217f97..c89b88b1a69 100644 --- a/pkg/cli/outcome_eval_test.go +++ b/pkg/cli/outcome_eval_test.go @@ -220,9 +220,11 @@ func TestEnrichOutcomeWithObjectiveValue_TracesPullRequestToRootIssue(t *testing "data": map[string]any{ "repository": map[string]any{ "pullRequest": map[string]any{ + "id": "PR_kwDOAAABCD4", "closingIssuesReferences": map[string]any{ "nodes": []any{ map[string]any{ + "id": "I_kwDOAAABCQ4", "number": float64(1234), "url": "https://github.com/owner/repo/issues/1234", "labels": map[string]any{"nodes": []any{ @@ -253,6 +255,8 @@ func TestEnrichOutcomeWithObjectiveValue_TracesPullRequestToRootIssue(t *testing assert.Equal(t, 90, report.ObjectiveValue) assert.Equal(t, []string{"agentic-campaign", "security"}, report.ObjectiveLabels) assert.Equal(t, "https://github.com/owner/repo/issues/1234", report.TracedRootURL) + assert.Equal(t, "mapped", report.AttributionStatus) + assert.Equal(t, "closing_issue", report.AttributionSource) } func TestEnrichOutcomeWithObjectiveValue_FallsBackToDirectLabels(t *testing.T) { @@ -278,6 +282,64 @@ func TestEnrichOutcomeWithObjectiveValue_FallsBackToDirectLabels(t *testing.T) { assert.Equal(t, 70, report.ObjectiveValue) assert.Equal(t, []string{"automation", "testing"}, report.ObjectiveLabels) assert.Equal(t, "https://github.com/owner/repo/issues/42", report.TracedRootURL) + assert.Equal(t, "mapped", report.AttributionStatus) + assert.Equal(t, "issue_labels", report.AttributionSource) +} + +func TestEnrichOutcomeWithObjectiveValue_MultipleClosingIssuesRemainAmbiguous(t *testing.T) { + oldGraphQL := objectiveMappingGHAPIGraphQL + oldGetArray := objectiveMappingGHAPIGetArray + t.Cleanup(func() { + objectiveMappingGHAPIGraphQL = oldGraphQL + objectiveMappingGHAPIGetArray = oldGetArray + }) + + objectiveMappingGHAPIGraphQL = func(query string, repo string) (map[string]any, error) { + return map[string]any{ + "data": map[string]any{ + "repository": map[string]any{ + "pullRequest": map[string]any{ + "id": "PR_kwDOAAABCD4", + "closingIssuesReferences": map[string]any{ + "nodes": []any{ + map[string]any{ + "id": "I_kwDOAAABCQ4", + "url": "https://github.com/owner/repo/issues/1234", + "labels": map[string]any{"nodes": []any{ + map[string]any{"name": "agentic-campaign"}, + }}, + }, + map[string]any{ + "id": "I_kwDOAAABCR4", + "url": "https://github.com/owner/repo/issues/1235", + "labels": map[string]any{"nodes": []any{ + map[string]any{"name": "security"}, + }}, + }, + }, + }, + }, + }, + }, + }, nil + } + objectiveMappingGHAPIGetArray = func(endpoint string, repo string) ([]map[string]any, error) { + return []map[string]any{{"name": "automation"}}, nil + } + + report := OutcomeReport{Type: "create_pull_request", ObjectURL: "https://github.com/owner/repo/pull/77", ObjectNumber: 77} + mapping := &github.ObjectiveMapping{ + LabelToValue: map[string]int{"agentic-campaign": 90, "security": 85, "automation": 70}, + MultiLabelLogic: "max", + } + + enrichOutcomeWithObjectiveValue(&report, "owner/repo", mapping) + + assert.Equal(t, "ambiguous", report.AttributionStatus) + assert.Equal(t, "closing_issue", report.AttributionSource) + assert.Empty(t, report.TracedRootURL) + assert.Zero(t, report.ObjectiveValue) + assert.Empty(t, report.ObjectiveLabels) } func TestNormalizeOutcomeEvaluationTargetExistsOnly(t *testing.T) { diff --git a/pkg/intent/resolver.go b/pkg/intent/resolver.go new file mode 100644 index 00000000000..53005a5bccc --- /dev/null +++ b/pkg/intent/resolver.go @@ -0,0 +1,164 @@ +package intent + +type AttributionStatus string + +const ( + AttributionMapped AttributionStatus = "mapped" + AttributionUnmapped AttributionStatus = "unmapped" + AttributionUnlinked AttributionStatus = "unlinked" + AttributionAmbiguous AttributionStatus = "ambiguous" + AttributionSuggested AttributionStatus = "suggested" +) + +type AttributionSource string + +const ( + SourceExplicitMetadata AttributionSource = "explicit_metadata" + SourceClosingIssue AttributionSource = "closing_issue" + SourceParentIssue AttributionSource = "parent_issue" + SourceReferencedIssue AttributionSource = "referenced_issue" + SourceProject AttributionSource = "project" + SourceMilestone AttributionSource = "milestone" + SourceIssueLabels AttributionSource = "issue_labels" + SourceArtifactLabels AttributionSource = "artifact_labels" + SourceSuggestion AttributionSource = "suggestion" + SourceNone AttributionSource = "none" +) + +type IntentRecord struct { + Status AttributionStatus `json:"status"` + Source AttributionSource `json:"source"` + + RootNodeID string `json:"root_node_id,omitempty"` + RootType string `json:"root_type,omitempty"` + RootURL string `json:"root_url,omitempty"` + + Labels []string `json:"labels,omitempty"` + + Rule string `json:"rule,omitempty"` + ResolverVersion string `json:"resolver_version,omitempty"` +} + +type RootReference struct { + NodeID string + Type string + URL string + Labels []string +} + +type PullRequestData struct { + NodeID string + URL string + Labels []string + ExplicitIntent *IntentRecord + ClosingIssues []RootReference +} + +type Resolver struct { + ResolverVersion string + MatchLabels func(labels []string) []string +} + +func (r Resolver) ResolvePullRequest(pr PullRequestData) IntentRecord { + if pr.ExplicitIntent != nil { + intent := *pr.ExplicitIntent + if intent.ResolverVersion == "" { + intent.ResolverVersion = r.ResolverVersion + } + return intent + } + + switch len(pr.ClosingIssues) { + case 1: + return r.fromRoot(pr.ClosingIssues[0], SourceClosingIssue, "single_closing_issue") + case 0: + if len(pr.Labels) > 0 { + return r.fromLabels(pr.NodeID, pr.URL, pr.Labels, SourceArtifactLabels, "pull_request_label_fallback") + } + return r.unlinked("no_supported_intent_source") + default: + return r.ambiguous(SourceClosingIssue, "multiple_closing_issues") + } +} + +func (r Resolver) ResolveIssue(nodeID, url string, labels []string) IntentRecord { + if len(labels) == 0 { + return r.unlinked("no_supported_intent_source") + } + return IntentRecord{ + Status: r.statusForLabels(labels), + Source: SourceIssueLabels, + RootNodeID: nodeID, + RootType: "issue", + RootURL: url, + Labels: cloneStrings(labels), + Rule: "issue_label_fallback", + ResolverVersion: r.ResolverVersion, + } +} + +func (r Resolver) fromRoot(root RootReference, source AttributionSource, rule string) IntentRecord { + return IntentRecord{ + Status: r.statusForLabels(root.Labels), + Source: source, + RootNodeID: root.NodeID, + RootType: root.Type, + RootURL: root.URL, + Labels: cloneStrings(root.Labels), + Rule: rule, + ResolverVersion: r.ResolverVersion, + } +} + +func (r Resolver) fromLabels(nodeID, url string, labels []string, source AttributionSource, rule string) IntentRecord { + return IntentRecord{ + Status: r.statusForLabels(labels), + Source: source, + RootNodeID: nodeID, + RootType: "artifact", + RootURL: url, + Labels: cloneStrings(labels), + Rule: rule, + ResolverVersion: r.ResolverVersion, + } +} + +func (r Resolver) unlinked(rule string) IntentRecord { + return IntentRecord{ + Status: AttributionUnlinked, + Source: SourceNone, + Rule: rule, + ResolverVersion: r.ResolverVersion, + } +} + +func (r Resolver) ambiguous(source AttributionSource, rule string) IntentRecord { + return IntentRecord{ + Status: AttributionAmbiguous, + Source: source, + Rule: rule, + ResolverVersion: r.ResolverVersion, + } +} + +func (r Resolver) statusForLabels(labels []string) AttributionStatus { + if len(labels) == 0 { + return AttributionUnlinked + } + if r.MatchLabels == nil { + return AttributionUnmapped + } + if len(r.MatchLabels(labels)) > 0 { + return AttributionMapped + } + return AttributionUnmapped +} + +func cloneStrings(values []string) []string { + if len(values) == 0 { + return nil + } + cloned := make([]string, len(values)) + copy(cloned, values) + return cloned +} diff --git a/pkg/intent/resolver_test.go b/pkg/intent/resolver_test.go new file mode 100644 index 00000000000..307f4070978 --- /dev/null +++ b/pkg/intent/resolver_test.go @@ -0,0 +1,118 @@ +package intent + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestResolverResolvePullRequestSingleClosingIssueMapped(t *testing.T) { + resolver := Resolver{ + ResolverVersion: "test-v1", + MatchLabels: func(labels []string) []string { + if len(labels) == 0 { + return nil + } + return []string{"security"} + }, + } + + intent := resolver.ResolvePullRequest(PullRequestData{ + NodeID: "PR_kwDOAAABCD4", + URL: "https://github.com/owner/repo/pull/77", + ClosingIssues: []RootReference{{ + NodeID: "I_kwDOAAABCQ4", + Type: "issue", + URL: "https://github.com/owner/repo/issues/1234", + Labels: []string{"security", "critical"}, + }}, + }) + + assert.Equal(t, AttributionMapped, intent.Status) + assert.Equal(t, SourceClosingIssue, intent.Source) + assert.Equal(t, "I_kwDOAAABCQ4", intent.RootNodeID) + assert.Equal(t, "issue", intent.RootType) + assert.Equal(t, "https://github.com/owner/repo/issues/1234", intent.RootURL) + assert.Equal(t, []string{"security", "critical"}, intent.Labels) + assert.Equal(t, "single_closing_issue", intent.Rule) + assert.Equal(t, "test-v1", intent.ResolverVersion) +} + +func TestResolverResolvePullRequestSingleClosingIssueUnmapped(t *testing.T) { + resolver := Resolver{ + MatchLabels: func(labels []string) []string { + return nil + }, + } + + intent := resolver.ResolvePullRequest(PullRequestData{ + ClosingIssues: []RootReference{{ + Type: "issue", + URL: "https://github.com/owner/repo/issues/1234", + Labels: []string{"triage"}, + }}, + }) + + assert.Equal(t, AttributionUnmapped, intent.Status) + assert.Equal(t, SourceClosingIssue, intent.Source) + assert.Equal(t, "single_closing_issue", intent.Rule) +} + +func TestResolverResolvePullRequestArtifactFallbackMapped(t *testing.T) { + resolver := Resolver{ + MatchLabels: func(labels []string) []string { + return []string{"automation"} + }, + } + + intent := resolver.ResolvePullRequest(PullRequestData{ + NodeID: "PR_kwDOAAABCD4", + URL: "https://github.com/owner/repo/pull/77", + Labels: []string{"automation"}, + }) + + assert.Equal(t, AttributionMapped, intent.Status) + assert.Equal(t, SourceArtifactLabels, intent.Source) + assert.Equal(t, "pull_request_label_fallback", intent.Rule) + assert.Equal(t, "artifact", intent.RootType) + assert.Equal(t, "https://github.com/owner/repo/pull/77", intent.RootURL) +} + +func TestResolverResolvePullRequestNoSourcesUnlinked(t *testing.T) { + resolver := Resolver{} + + intent := resolver.ResolvePullRequest(PullRequestData{}) + + assert.Equal(t, AttributionUnlinked, intent.Status) + assert.Equal(t, SourceNone, intent.Source) + assert.Equal(t, "no_supported_intent_source", intent.Rule) +} + +func TestResolverResolvePullRequestMultipleClosingIssuesAmbiguous(t *testing.T) { + resolver := Resolver{} + + intent := resolver.ResolvePullRequest(PullRequestData{ + ClosingIssues: []RootReference{{URL: "https://github.com/owner/repo/issues/1"}, {URL: "https://github.com/owner/repo/issues/2"}}, + }) + + assert.Equal(t, AttributionAmbiguous, intent.Status) + assert.Equal(t, SourceClosingIssue, intent.Source) + assert.Equal(t, "multiple_closing_issues", intent.Rule) + assert.Empty(t, intent.RootURL) +} + +func TestResolverResolveIssueMapped(t *testing.T) { + resolver := Resolver{ + MatchLabels: func(labels []string) []string { + return []string{"documentation"} + }, + } + + intent := resolver.ResolveIssue("I_kwDOAAABCQ4", "https://github.com/owner/repo/issues/42", []string{"documentation"}) + + assert.Equal(t, AttributionMapped, intent.Status) + assert.Equal(t, SourceIssueLabels, intent.Source) + assert.Equal(t, "issue_label_fallback", intent.Rule) + assert.Equal(t, "issue", intent.RootType) + assert.Equal(t, []string{"documentation"}, intent.Labels) +} diff --git a/specs/intent-attribution-agent-governance.md b/specs/intent-attribution-agent-governance.md new file mode 100644 index 00000000000..0095a785d94 --- /dev/null +++ b/specs/intent-attribution-agent-governance.md @@ -0,0 +1,1218 @@ +--- + +title: Intent Attribution & Agent Governance Specification +version: 2.0.0 +status: Partially Implemented +date: 2026-06-09 +last_updated: 2026-06-12 +replaces: objective-mapping-portfolio-reporting.md +--- + +# Intent Attribution & Agent Governance Specification + +## Summary + +This specification defines a deterministic intent layer for agentic GitHub workflows. + +The system connects GitHub work to structured context such as: + +* priority +* domain +* initiative +* risk +* root issue + +That context can be used for two purposes: + +1. **Attribution and reporting** — explain what work an artifact supported and how reliably that relationship is known +2. **Workflow governance** — determine what an agent may do, which checks are required, and whether human approval is necessary + +The central principle is: + +> **Intent determines authority. Execution produces evidence.** + +The system does not claim that GitHub labels or merged pull requests prove business impact, ROI, or realized customer value. + +## Current implementation + +The existing implementation provides the initial attribution and reporting foundation: + +1. A shared GitHub utility loads `.github/objective-mapping.json` +2. Labels are mapped to numeric weights through `ObjectiveMapping` +3. CLI outcome reports include: + + * `objective_value` + * `objective_labels` + * `traced_root_url` +4. Pull request outcomes trace to closing issues before labels are evaluated +5. Direct issue labels are used as a fallback +6. Outcome summaries aggregate attempted and accepted weights +7. Per-label breakdowns aggregate accepted, rejected, and pending outcomes + +These capabilities remain supported. + +In this specification, they are treated as an early implementation of **intent attribution**, not as proof of business impact. + +## Product boundary + +The system can establish: + +* what GitHub artifact was produced +* whether the artifact was accepted, rejected, or remains pending +* which explicit GitHub relationship connected it to a root object +* which configured labels were found +* which deterministic rule was applied +* which workflow policy should govern the work +* which checks and approvals were required +* which execution evidence was recorded + +The system does not independently establish: + +* financial value +* ROI +* employee productivity +* customer value +* strategic success +* causal business impact + +Those claims require separate evidence. + +## Core model + +```text +Intent + ↓ +Attribution + ↓ +Risk classification + ↓ +Policy compilation + ↓ +Agent execution + ↓ +GitHub artifact + ↓ +Outcome evaluation + ↓ +Evidence +``` + +The model distinguishes five concepts. + +### Intent + +Why the work exists and what context applies. + +Examples: + +* critical +* security +* authentication modernization +* documentation +* production incident + +### Attribution + +How the intent was connected to the work. + +Examples: + +* explicit workflow metadata +* closing issue +* parent issue +* direct issue labels +* pull request labels + +### Policy + +What the agent is permitted and required to do. + +Examples: + +* propose only +* supervised execution +* bounded autonomous execution +* required security tests +* mandatory human approval +* auto-merge prohibited + +### Outcome + +What observable GitHub state occurred. + +Examples: + +* pull request merged +* pull request closed without merge +* issue completed +* work still pending + +### Evidence + +What proves that the required process occurred. + +Examples: + +* test results +* review approval +* policy decision +* workflow trace +* GitHub artifact state + +## Design principles + +### Deterministic authority + +Official attribution and workflow authorization must be derived from: + +* explicit GitHub metadata +* repository or organization configuration +* deterministic precedence rules + +LLMs may propose classifications or relationships, but suggestions must not affect official authorization or reporting until confirmed. + +### Unknown is not zero + +Missing attribution must not be represented as zero importance. + +```json +{ + "status": "unlinked", + "weight": null +} +``` + +A zero value means an explicit configured value of zero. + +A null value means unknown or unavailable. + +### Fail closed + +Unknown, invalid, or ambiguous intent must result in the safest applicable workflow policy. + +### Provenance is required + +Every attribution and policy decision must explain: + +* the source +* the rule +* the root object +* the configuration version +* any overrides + +### Artifacts are not objectives + +A pull request is an execution artifact. + +An issue or explicitly declared objective represents intended work. + +Multiple pull requests connected to one issue must not automatically multiply the number of completed objectives. + +## Intent configuration + +The initial implementation continues to support: + +```text +.github/objective-mapping.json +``` + +A future migration may introduce: + +```text +.github/intent-policy.json +``` + +### Compatibility configuration + +```json +{ + "label_to_value": { + "critical": 100, + "p0": 100, + "p1": 50, + "security": 40 + }, + "multi_label_logic": "max", + "priority_labels": [ + "critical", + "p0", + "p1" + ] +} +``` + +Existing numeric values are interpreted as **relative weights**, not financial value or verified impact. + +### Target configuration + +```json +{ + "version": 1, + "labels": { + "critical": { + "dimension": "priority", + "value": "critical", + "weight": 100 + }, + "p1": { + "dimension": "priority", + "value": "high", + "weight": 50 + }, + "security": { + "dimension": "domain", + "value": "security" + }, + "documentation": { + "dimension": "domain", + "value": "documentation" + }, + "high-risk": { + "dimension": "risk", + "value": "high" + }, + "auth-modernization": { + "dimension": "initiative", + "value": "auth-modernization" + } + }, + "scoring": { + "dimension": "priority", + "strategy": "max" + }, + "attribution": { + "multiple_roots": "ambiguous", + "allow_artifact_label_fallback": true + } +} +``` + +Separating dimensions prevents initiatives, priorities, domains, and risk labels from competing in one flat scoring calculation. + +Only the configured scoring dimension contributes to weight. + +## Intent record + +```go +type IntentRecord struct { + Status AttributionStatus `json:"status"` + Source AttributionSource `json:"source"` + + Objective string `json:"objective,omitempty"` + Initiative string `json:"initiative,omitempty"` + Priority string `json:"priority,omitempty"` + Domains []string `json:"domains,omitempty"` + Risk string `json:"risk,omitempty"` + + RootNodeID string `json:"root_node_id,omitempty"` + RootType string `json:"root_type,omitempty"` + RootURL string `json:"root_url,omitempty"` + + Labels []string `json:"labels,omitempty"` + Weight *int `json:"weight"` + + Rule string `json:"rule"` + ConfigHash string `json:"config_hash"` + ResolverVersion string `json:"resolver_version"` +} +``` + +## Attribution states + +```go +type AttributionStatus string + +const ( + AttributionMapped AttributionStatus = "mapped" + AttributionUnmapped AttributionStatus = "unmapped" + AttributionUnlinked AttributionStatus = "unlinked" + AttributionAmbiguous AttributionStatus = "ambiguous" + AttributionSuggested AttributionStatus = "suggested" +) +``` + +### `mapped` + +A deterministic source was found and at least one configured intent label matched. + +### `unmapped` + +A root object was found, but its labels did not match the configuration. + +### `unlinked` + +No supported root or intent source was found. + +### `ambiguous` + +Multiple candidate roots were found and no deterministic policy selected one. + +### `suggested` + +A heuristic or AI-generated relationship exists but has not been confirmed. + +Suggested attribution does not contribute to official metrics or policy decisions. + +## Attribution sources + +```go +type AttributionSource string + +const ( + SourceExplicitMetadata AttributionSource = "explicit_metadata" + SourceClosingIssue AttributionSource = "closing_issue" + SourceParentIssue AttributionSource = "parent_issue" + SourceReferencedIssue AttributionSource = "referenced_issue" + SourceProject AttributionSource = "project" + SourceMilestone AttributionSource = "milestone" + SourceIssueLabels AttributionSource = "issue_labels" + SourceArtifactLabels AttributionSource = "artifact_labels" + SourceSuggestion AttributionSource = "suggestion" + SourceNone AttributionSource = "none" +) +``` + +## Deterministic resolution + +Resolution order: + +```text +1. Explicit workflow intent +2. Single closing issue +3. Parent or sub-issue relationship +4. Explicit referenced issue +5. Project or campaign context +6. Milestone +7. Direct artifact labels +8. Suggested attribution +9. Unlinked +``` + +Initial implementation: + +```go +func (r *Resolver) Resolve(pr PullRequestData) IntentRecord { + if pr.ExplicitIntent != nil { + return r.fromExplicitIntent(pr.ExplicitIntent) + } + + switch len(pr.ClosingIssues) { + case 1: + return r.fromRoot( + pr.ClosingIssues[0], + SourceClosingIssue, + "single_closing_issue", + ) + + case 0: + if len(pr.Labels) > 0 { + return r.fromLabels( + pr.NodeID, + pr.URL, + pr.Labels, + SourceArtifactLabels, + "pull_request_label_fallback", + ) + } + + return r.unlinked("no_supported_intent_source") + + default: + return r.ambiguous( + SourceClosingIssue, + "multiple_closing_issues", + ) + } +} +``` + +The resolver must not silently select the first of multiple closing issues. + +## Multiple-root policy + +Default: + +```text +0 candidates → continue resolution +1 candidate → use candidate +2+ candidates → ambiguous +``` + +Future supported policies may include: + +* explicit primary root +* highest-priority root +* all roots +* fractional attribution + +The active policy must be recorded in decision provenance. + +## Risk classification + +Risk should be explicit where possible. + +When risk is absent, deterministic rules may derive it. + +Example: + +```text +security + critical → high +production → high +infrastructure → medium +dependency update → medium +documentation → low +unknown → unknown +``` + +```go +func ResolveRisk(intent IntentRecord) string { + if intent.Risk != "" { + return intent.Risk + } + + if contains(intent.Domains, "security") && + intent.Priority == "critical" { + return "high" + } + + if contains(intent.Domains, "production") { + return "high" + } + + if contains(intent.Domains, "infrastructure") { + return "medium" + } + + if contains(intent.Domains, "documentation") { + return "low" + } + + return "unknown" +} +``` + +## Execution policy + +```go +type ExecutionPolicy struct { + Autonomy string `json:"autonomy"` + + AllowedTools []string `json:"allowed_tools"` + DeniedTools []string `json:"denied_tools"` + + WriteScope string `json:"write_scope"` + + RequiredChecks []string `json:"required_checks"` + + HumanApprovalRequired bool `json:"human_approval_required"` + AutoMergeAllowed bool `json:"auto_merge_allowed"` + + MaxAttempts int `json:"max_attempts"` + + RuleIDs []string `json:"rule_ids"` +} +``` + +Supported initial autonomy levels: + +### `propose_only` + +The agent may inspect the repository and propose a plan or patch. + +The agent may not modify the repository. + +### `supervised` + +The agent may create changes on a feature branch and open a pull request. + +Human approval is required before merge. + +### `bounded` + +The agent may complete the workflow within explicitly configured limits. + +Auto-merge may be permitted after required checks pass. + +## Policy precedence + +```text +organization constraints +> repository constraints +> intent-specific rules +> workflow defaults +> agent request +``` + +A lower-precedence rule may not weaken a higher-precedence constraint. + +Example: + +```json +{ + "rules": [ + { + "id": "security-critical", + "when": { + "domain": "security", + "priority": "critical" + }, + "set": { + "autonomy": "supervised", + "write_scope": "feature_branch", + "required_checks": [ + "unit-tests", + "security-tests", + "dependency-review" + ], + "human_approval_required": true, + "auto_merge_allowed": false, + "max_attempts": 2 + } + }, + { + "id": "documentation-low-risk", + "when": { + "domain": "documentation", + "risk": "low" + }, + "set": { + "autonomy": "bounded", + "write_scope": "feature_branch", + "required_checks": [ + "documentation-build" + ], + "human_approval_required": false, + "auto_merge_allowed": true, + "max_attempts": 3 + } + }, + { + "id": "unknown-default", + "when": { + "risk": "unknown" + }, + "set": { + "autonomy": "propose_only", + "write_scope": "none", + "human_approval_required": true, + "auto_merge_allowed": false, + "max_attempts": 1 + } + } + ] +} +``` + +## Safe default + +```go +func safestDefaultPolicy() ExecutionPolicy { + return ExecutionPolicy{ + Autonomy: "propose_only", + WriteScope: "none", + HumanApprovalRequired: true, + AutoMergeAllowed: false, + MaxAttempts: 1, + } +} +``` + +Unknown or ambiguous intent must not grant elevated authority. + +## Policy compilation + +```go +type PolicyCompiler struct { + Rules []PolicyRule +} + +func (c *PolicyCompiler) Compile( + intent IntentRecord, + repository RepositoryContext, +) ExecutionPolicy { + policy := safestDefaultPolicy() + + for _, rule := range c.Rules { + if rule.Matches(intent, repository) { + policy = mergePolicy(policy, rule.Set) + policy.RuleIDs = append( + policy.RuleIDs, + rule.ID, + ) + } + } + + return policy +} +``` + +Policy merging must preserve stricter higher-precedence constraints. + +## Decision provenance + +```go +type PolicyDecision struct { + Intent IntentRecord `json:"intent"` + Policy ExecutionPolicy `json:"policy"` + + AppliedRules []AppliedRule `json:"applied_rules"` + Overrides []PolicyOverride `json:"overrides"` + + ConfigHash string `json:"config_hash"` + CompilerVersion string `json:"compiler_version"` +} +``` + +Example: + +```json +{ + "policy": { + "autonomy": "supervised", + "human_approval_required": true, + "auto_merge_allowed": false + }, + "applied_rules": [ + { + "id": "security-critical", + "reason": "domain=security and priority=critical" + } + ], + "overrides": [ + { + "field": "auto_merge_allowed", + "requested": true, + "effective": false, + "reason": "organization security policy" + } + ] +} +``` + +## Enforcement + +The orchestrator must enforce the compiled policy at runtime. + +Policy must not exist only in an agent prompt. + +```go +func (o *Orchestrator) Execute( + ctx context.Context, + request WorkflowRequest, +) error { + intent := o.intentResolver.Resolve(request.WorkItem) + + intent.Risk = ResolveRisk(intent) + + policy := o.policyCompiler.Compile( + intent, + request.Repository, + ) + + if err := o.authorizer.Validate( + request, + policy, + ); err != nil { + return err + } + + runtime := NewRestrictedRuntime(policy) + + return runtime.Run(ctx, request.Workflow) +} +``` + +Individual tool calls must be authorized: + +```go +func (a *Authorizer) AuthorizeTool( + policy ExecutionPolicy, + tool string, +) error { + if slices.Contains(policy.DeniedTools, tool) { + return ErrToolDenied + } + + if !slices.Contains(policy.AllowedTools, tool) { + return ErrToolNotAllowed + } + + return nil +} +``` + +The agent must not be able to modify or expand its own policy. + +## Outcome evaluation + +Initial observable rules: + +```text +merged pull request → accepted +closed unmerged pull request → rejected +open pull request → pending + +completed issue → accepted +closed as not planned → rejected +open issue → pending +``` + +```go +type OutcomeRecord struct { + ArtifactURL string `json:"artifact_url"` + Status string `json:"status"` + + Intent IntentRecord `json:"intent"` + Policy ExecutionPolicy `json:"policy"` + + EvaluatedAt time.Time `json:"evaluated_at"` +} +``` + +An accepted artifact is evidence of accepted execution. + +It does not independently prove realized business impact. + +## Evidence record + +```go +type EvidenceRecord struct { + WorkflowRunID string `json:"workflow_run_id"` + ArtifactURL string `json:"artifact_url"` + + RequiredChecks []string `json:"required_checks"` + PassedChecks []string `json:"passed_checks"` + FailedChecks []string `json:"failed_checks"` + + HumanApprovalRequired bool `json:"human_approval_required"` + HumanApprovalReceived bool `json:"human_approval_received"` + + Outcome string `json:"outcome"` + + TraceID string `json:"trace_id,omitempty"` +} +``` + +## Attribution reporting + +The existing weighted reporting remains useful when interpreted correctly. + +### Attribution coverage + +```text +mapped outcomes / all evaluated outcomes +``` + +### Acceptance rate + +```text +accepted outcomes / attempted outcomes +``` + +### Weighted acceptance rate + +```text +accepted mapped weight / attempted mapped weight +``` + +Weighted acceptance measures delivery performance across configured relative weights. + +It is not ROI, planned-value completion, or verified business impact. + +Every weighted result must be shown alongside attribution coverage. + +```text +Weighted acceptance: 78% +Attribution coverage: 42% +``` + +## Unique root reporting + +Strategic reporting must deduplicate root work items. + +```text +Five merged pull requests +connected to one issue += +one attributed root +``` + +Root completion must come from the root object's state or explicit completion evidence. + +A merged pull request alone must not automatically mark the full root objective complete. + +## OpenTelemetry + +OpenTelemetry is the execution-observability layer. + +It is not the authoritative intent store. + +One trace should represent one workflow execution: + +```text +agentic.workflow.run +├── intent.resolve +├── risk.resolve +├── policy.compile +├── authorization.validate +├── agent.execute +├── github.artifact.create +├── checks.evaluate +├── approval.evaluate +└── outcome.evaluate +``` + +Recommended span attributes: + +```text +github.intent.status +github.intent.source +github.intent.priority +github.intent.risk +github.intent.domain + +agent.policy.autonomy +agent.policy.write_scope +agent.policy.human_approval_required +agent.policy.auto_merge_allowed + +github.artifact.type +github.artifact.outcome +``` + +High-cardinality identifiers belong on spans or persisted records, not metric dimensions. + +Examples: + +* pull request URL +* issue URL +* node ID +* trace ID +* workflow run ID + +## Metrics + +Initial operational metrics: + +```text +agent.workflow.runs +agent.workflow.denied +agent.workflow.duration + +github.intent.resolutions +github.intent.attribution.coverage + +agent.policy.decisions +agent.policy.overrides + +github.artifacts.created +github.artifacts.accepted +github.artifacts.rejected +``` + +Useful low-cardinality dimensions: + +```text +intent.status +intent.source +priority +risk +domain +autonomy +outcome +``` + +## CLI + +### Explain policy before execution + +```bash +gh aw policy explain \ + --repo acme/platform \ + --issue 123 +``` + +Example: + +```text +Intent + Priority: critical + Domain: security + Risk: high + Source: issue labels + +Execution policy + Autonomy: supervised + Write scope: feature branch + Human approval: required + Auto-merge: prohibited + Maximum attempts: 2 + +Required checks + unit-tests + security-tests + dependency-review + +Applied rules + security-critical + organization-security-baseline +``` + +### Report outcomes + +```bash +gh aw outcomes report \ + --repo acme/platform +``` + +Example: + +```text +Outcomes + Total: 40 + Accepted: 28 + Rejected: 7 + Pending: 5 + +Intent attribution + Mapped: 17 + Unmapped: 6 + Unlinked: 14 + Ambiguous: 3 + Coverage: 42.5% + +Weighted delivery + Attempted weight: 1150 + Accepted weight: 900 + Weighted acceptance: 78.3% + +Unique attributed roots: 12 +``` + +## Implementation phases + +### Phase 1: current foundation + +Already implemented or partially implemented: + +1. Load label-to-weight mapping +2. Compute weights from labels +3. Trace pull requests to closing issues +4. Record root URL +5. Enrich outcome reports +6. Aggregate accepted and attempted weights +7. Produce per-label breakdowns + +### Phase 2: honest attribution model + +Implement: + +1. Rename value semantics to relative weight +2. Add attribution states +3. Add attribution source +4. Make unknown weight nullable +5. Record root node ID +6. Mark multiple roots ambiguous +7. Add attribution coverage +8. Rename objective efficiency to weighted acceptance rate +9. Deduplicate root work items +10. Separate priority, domain, initiative, and risk dimensions + +### Phase 3: prove attribution value + +Before intent affects authority, the system must show that attribution is useful for analysis. + +Validate: + +1. Attribution coverage is high enough to be decision-relevant +2. Mapped categories reveal non-trivial work patterns +3. Unique-root reporting is more informative than raw artifact counts +4. Weighted acceptance is always shown alongside coverage +5. Manual samples show that mapped results are directionally correct + +Deliver: + +1. Coverage reporting by state: mapped, unmapped, unlinked, ambiguous +2. Unique-root reporting +3. Weighted acceptance over mapped items only +4. Representative manual-review samples for correctness checking + +If this phase does not show clear analytical value, intent remains a reporting feature and does not drive policy. + +### Phase 4: prove attribution trustworthiness + +Before intent affects authority, the system must show that attribution is trustworthy enough for control. + +Validate: + +1. Identical GitHub state and configuration produce identical attribution +2. Ambiguous and unlinked cases fail closed +3. Source precedence is understandable and auditable +4. Fallback paths do not silently distort authority decisions +5. Manual validation shows high precision for cases that would change authority + +Deliver: + +1. Attribution provenance for every official decision +2. Determinism tests +3. Ambiguity and fallback reporting +4. Source-quality analysis by attribution source + +If this phase does not show sufficient trustworthiness, intent remains analytics only. + +### Phase 5: minimal intent-aware governance + +Implement: + +1. Safe default for ambiguous and unlinked intent +2. Minimal policy distinctions with clear operational value +3. A narrow policy explanation surface such as `gh aw policy explain` +4. Explicit fail-closed behavior when attribution is missing or disputed + +Suggested initial trial: + +1. Ambiguous or unlinked intent becomes `propose_only` +2. Explicitly high-risk or security-like intent becomes `supervised` +3. Low-risk documentation work may become `bounded` + +This phase should remain intentionally small. It is a controlled trial, not a full policy framework. + +### Phase 6: broader governance and execution evidence + +Implement: + +1. Risk resolution beyond simple deterministic categories +2. General policy compilation +3. Tool authorization +4. Write-scope enforcement +5. Required checks +6. Human approval requirement +7. Auto-merge restrictions +8. Decision provenance +9. OpenTelemetry workflow traces +10. Policy-decision spans +11. Authorization events +12. Evidence records +13. Low-cardinality operational metrics +14. Asynchronous trace links for later GitHub outcomes + +### Phase 7: broader intent relationships + +Potential extensions: + +1. Parent and sub-issue resolution +2. Projects and milestones +3. Cross-repository initiatives +4. Organization-level policies +5. Suggested attribution requiring confirmation +6. External production or customer evidence + +## Testing + +### Attribution tests + +* One mapped closing issue +* One unmapped closing issue +* No closing issue +* Pull request label fallback +* Multiple closing issues +* Null weight for unknown attribution +* Case-insensitive label normalization +* Root deduplication + +### Attribution value tests + +* Coverage is reported alongside weighted acceptance +* Unique-root counts differ from raw artifact counts when many artifacts map to one root +* Mapped-only weighted reporting excludes unlinked and ambiguous items +* Representative samples can be produced for manual correctness review + +### Attribution trust tests + +* Multiple closing issues fail closed as ambiguous +* Artifact-label fallback is reported as fallback provenance +* Identical input state produces identical attribution output +* Missing or ambiguous attribution does not grant elevated authority + +### Policy tests + +* Critical security work becomes supervised +* Low-risk documentation work becomes bounded +* Unknown risk becomes propose-only +* Organization policy overrides repository policy +* Required approval disables auto-merge +* Less restrictive rules cannot weaken stronger constraints + +### Enforcement tests + +* Denied tools cannot execute +* Write scope cannot be expanded +* Agent cannot modify its own policy +* Failed required checks prevent completion +* Missing approval prevents merge when required + +### Determinism test + +Given identical: + +* GitHub state +* configuration +* resolver version +* compiler version + +the normalized intent and policy outputs must be identical. + +## Release gates + +### Gate 1: attribution value proven + +Proceed beyond attribution-only reporting only when: + +1. Coverage is measured and visible beside weighted reporting +2. Root deduplication produces materially different and more honest reporting than artifact counts alone +3. Manual review shows that mapped results are directionally useful +4. The mapped categories reveal meaningful work patterns rather than label noise + +### Gate 2: attribution trustworthy enough for control + +Proceed to governance only when: + +1. Deterministic resolution is demonstrated +2. Ambiguous and unlinked cases fail closed +3. Attribution provenance is available for official decisions +4. Manual validation shows sufficient precision for authority-changing cases +5. Fallback logic is auditable and not silently permissive + +## Definition of done + +### Attribution release done + +The attribution release is complete when: + +1. A GitHub issue or pull request resolves into a normalized intent record +2. Missing and ambiguous intent are represented explicitly +3. Coverage, ambiguity, and unique-root reporting are available +4. Weighted acceptance is reported only with attribution coverage beside it +5. Manual sampling can be used to assess mapping quality + +### Initial governance release done + +The initial governance release is complete when: + +1. The attribution release is already complete +2. Intent deterministically compiles into at least one minimal execution-policy distinction +3. Ambiguous and unlinked intent receive the safest available policy +4. Runtime behavior is restricted by policy for the initial governed cases +5. Human approval and check requirements are enforced for the initial governed cases +6. Every policy decision records enough provenance to audit why authority was granted or denied +7. Every governed run emits execution telemetry +8. Every resulting artifact receives an outcome and evidence record +9. No LLM decision is required for official authorization + +## Product position + +This system is not a business-impact calculator. + +It is a deterministic control layer for agentic GitHub work. + +> **Intent determines authority. Execution produces evidence.** diff --git a/specs/objective-mapping-portfolio-reporting.md b/specs/objective-mapping-portfolio-reporting.md deleted file mode 100644 index 21813d94bd9..00000000000 --- a/specs/objective-mapping-portfolio-reporting.md +++ /dev/null @@ -1,583 +0,0 @@ - ---- -title: Objective Mapping & Portfolio Reporting Specification -version: 1.1.0 -status: Partially Implemented -date: 2026-06-09 -last_updated: 2026-06-10 ---- - -# Objective Mapping & Portfolio Reporting Specification - -This specification defines a reusable label-to-objective-value mapping layer for GitHub work. It also outlines later phases where that mapping can be applied to safe output outcomes, root issue tracing, and portfolio-level impact reporting. - -## Implementation Scope - -### Phase 1: Implemented Now - -The current implementation is no longer just the bare mapping layer: - -1. A shared GitHub utility loads `.github/objective-mapping.json` -2. Labels are mapped to numeric objective values through `ObjectiveMapping` -3. CLI outcome reporting enriches outcomes with `objective_value`, `objective_labels`, and `traced_root_url` -4. Pull request outcomes trace to linked closing issues before objective values are computed -5. Outcome summaries and per-objective breakdowns aggregate attempted and accepted objective value - -This phase gives GitHub work a single configurable impact vocabulary and already supports basic root-aware outcome measurement. - -### Phase 2+: Later Extensions - -The rest of this document describes natural extensions that may be added later: - -1. Root tracing beyond the current PR-to-closing-issue path, including epic resolution -2. Campaign-level aggregation and filtering in dedicated reports -3. Portfolio reporting workflows that consume the existing objective-enriched outcome data -4. Cost-aware efficiency metrics using real AI Credits data -5. Strategic analysis over delivered impact - -Those extensions are design targets, not required complexity for trying the mapping itself. - -## Overview: Impact Measurement Through Root Cause Tracing - -Longer term, objective mapping answers: **What impact did we create?** By connecting work back to the root problems it solves. - -For the MVP, the answer is simpler: **what value do these labels represent?** - -For the extended model, that grows into: - -1. **Root Problems** — Issues and epics represent actual business objectives (with PM-assigned impact values) -2. **Tracing** — Safe outputs (PRs, comments) trace backward to root issues/epics -3. **Impact Scoring** — Objectives are assigned to root problems, not intermediate artifacts -4. **Portfolio Reporting** — Aggregates impact by objective to show which business goals are being achieved - -This enables questions like: -- **What high-priority problems did we solve?** (Trace accepted PRs back to root issues, check labels) -- **Which initiatives made the most progress?** (Aggregate impact by epic) -- **What value did we create per objective?** (Sum impact values for accepted outcomes) -- **What's our ROI by problem domain?** (Efficiency = accepted value / attempted value) - -## Extended Architecture: Trace → Root → Map → Aggregate - -This architecture describes the fuller impact model beyond the current MVP mapping layer. - -### Components - -1. **Root Tracing** — Traces safe outputs (PRs, issues) back to root issue or epic -2. **ObjectiveMapping** — Maps root issue/epic labels to numeric impact values -3. **Configuration File** — `.github/objective-mapping.json` with objective values -4. **Outcome Enrichment** — GitHub API queries to fetch root objects and their labels -5. **Portfolio Report** — Aggregates impact by objective, showing what value was created - -### Design Principles - -1. **Root Source of Truth** — Objectives are assigned to root issues/epics, not PRs or comments -2. **Traceability** — All work must trace back through GitHub's native linking (PR → issue) -3. **Centralized Configuration** — Single source of truth at `.github/objective-mapping.json` -4. **PM-Assigned Impact** — Labels on root objects represent business priorities -5. **Portfolio Visibility** — Aggregates show what problems were solved, what impact created - -## Campaigns & Objective Alignment - -### How Campaigns Work With Objectives - -A **campaign** is a bounded initiative organized around specific business objectives. Examples: - -- **"Q2 Performance Month"** — Campaign to improve latency (objective label: `initiative-performance`) -- **"Auth System Redesign"** — Major initiative (objective label: `epic-auth`) -- **"Critical Bug Fixes"** — Campaign to resolve urgent issues (objective label: `critical`) -- **"Testing Infrastructure"** — Initiative to improve test coverage (objective label: `testing`) - -### Campaign → Objectives → Root Issues → Impact Measurement - -``` -Campaign: "Q2 Performance Month" - ↓ -Assigned Objectives: initiative-performance (300 points) - ↓ -Root Issues Created: #1234, #5678, #9012 -(all labeled with "initiative-performance") - ↓ -Agent Creates PRs & Reviews (safe outputs) - ↓ -Safe Outputs Accepted/Rejected - ↓ -[Root Tracing] Trace PRs back to root issues - ↓ -[Objective Mapping] Fetch labels from root issues (initiative-performance) - ↓ -[Portfolio Report] Aggregate by campaign objectives: - Total attempted: 1500 points (5 PRs × 300 points each) - Delivered: 1200 points (4 accepted × 300 points) - Efficiency: 80% → Campaign on track, good progress -``` - -### Campaign Reporting - -Portfolio reports can be filtered by campaign to answer: - -| Question | Answer | Example | -|----------|--------|---------| -| **How is this campaign doing?** | Efficiency metric | "Performance Month: 75% delivered (1500/2000 points)" | -| **Which campaigns succeeded?** | High efficiency campaigns | "Auth redesign: 90% complete, critical bugs: 85% complete" | -| **Which need intervention?** | Low efficiency campaigns | "Low-priority features: only 40% delivered, investigate blocker" | -| **What's total campaign impact?** | Sum all delivered objectives | "This quarter we delivered 12,000 points across 8 campaigns" | - -### Configuration for Campaigns - -Add campaign-level labels to `.github/objective-mapping.json`: - -```json -{ - "label_to_value": { - "epic-auth": 500, - "epic-performance": 300, - "initiative-modernize": 400, - "campaign-q2-testing": 200, - "critical": 100, - "p0": 100, - "p1": 50 - }, - "multi_label_logic": "max", - "priority_labels": ["epic-auth", "epic-performance", "initiative-modernize", "campaign-q2-testing", "critical"] -} -``` - -**Strategy:** Campaigns typically use `multi_label_logic: "max"` so that a PR addressing both a campaign objective and a critical issue gets the higher value (captures the most important aspect). - -### Connecting Campaigns to Execution - -**Setup:** -1. PM defines campaign with clear objectives (e.g., "Ship auth redesign by EOQ") -2. Assign objective label to campaign (e.g., `epic-auth` with value 500) -3. All root issues in the campaign are labeled with that objective -4. Agent runs workflow against those issues - -**Measurement:** -1. Safe outputs traced back to root issues via PR links -2. Root issues have objective label (`epic-auth`) -3. Portfolio report aggregates by campaign objective -4. Shows: "Campaign delivered 500/600 planned points (83% success)" - -**Strategic Alignment:** -- Campaigns are how business divides work into initiatives -- Objectives are the labels that mark root issues as part of that campaign -- Impact measurement answers: "Did the campaign deliver what was planned?" - -## Configuration - -### File Format - -The configuration file maps objective labels (typically on root issues/epics) to impact values: - -```json -{ - "_comment": "Impact mapping for business objectives. Labels are assigned to root issues/epics by PM/team.", - "label_to_value": { - "epic-auth": 500, - "initiative-performance": 300, - "critical": 100, - "p0": 100, - "p1": 50 - }, - "multi_label_logic": "max", - "priority_labels": ["epic-auth", "initiative-performance", "critical", "p0"] -} -``` - -### Location & Precedence - -Objectives are loaded in this order (first found wins): - -1. **Environment Variable** — `OBJECTIVE_MAPPING_JSON` (full JSON string or file path) -2. **Repository File** — `.github/objective-mapping.json` -3. **Built-in Defaults** — Fallback with standard objectives - -### Typical Objective Labels - -These are assigned by PMs/teams to root issues and epics: - -- **Epics** (e.g., `epic-auth`, `initiative-modernize`) — Major initiatives worth 300–500 impact -- **Critical** (e.g., `critical`, `p0`) — Must-fix problems worth 100 impact -- **High-priority** (e.g., `p1`) — Important work worth 50 impact -- **Domains** (e.g., `security`, `performance`) — Strategic focus areas worth 30–80 impact - -### Repo-Specific Weighting Guidance For gh-aw - -This repository produces a large volume of workflow-generated issues that are useful operationally but should not drown out substantive product and platform work in portfolio reports. - -For `gh-aw`, the objective map should therefore: - -1. **Promote bug, security, incident, reliability, and genuine feature work** so these dominate impact ranking. -2. **Demote generated reporting, smoke-testing, automation bookkeeping, and documentation-only issues** to low fallback values. -3. **Treat process labels as secondary** so a bug labeled `automation` still ranks as a bug, but a report labeled only `automation`, `observability`, `report`, or `testing` does not look like top-tier impact. - -This is especially important because many agentic workflows report by opening issues with markdown report bodies. Those issues are operational evidence and test/reliability signals, not equivalent to shipping feature work or fixing security and correctness problems. - -### Multi-Label Logic - -When an outcome has multiple objective labels, the system applies one of three strategies: - -| Strategy | Behavior | Use Case | Example | -|----------|----------|----------|---------| -| **max** (default) | Uses highest value | Risk-based prioritization | `[bug, p0]` → 100 | -| **sum** | Adds all values | Cumulative impact | `[performance, workflow]` → 75 | -| **first** | Uses priority order | Organizational hierarchy | `[p0, testing]` → 100 (p0 first) | - -#### Example: Multi-Label Computation - -Given labels `[bug, p0, testing]` with values `{bug: 70, p0: 100, testing: 75}`: - -``` -max: max(70, 100, 75) = 100 -sum: 70 + 100 + 75 = 245 -first: depends on priority_labels order -``` - -## Current Outcome Integration and Remaining Root-Tracing Gaps - -This section describes what is already implemented in the CLI today and what still remains future work. - -### The Problem This Solves - -When a PR is merged (safe output accepted), we need to know: **What business objective did it deliver?** - -- The PR itself may have no labels -- The PR links to one or more issues -- Those issues contain the real business labels -- We must trace PR → issue → get labels → map to impact value - -This is how GitHub always worked: root issue describes the problem, PRs are the solution. - -### Data Flow - -``` -Safe output created (e.g., "create_pull_request") - ↓ -[EvaluateOutcomes] → outcome = "accepted" (merged) or "rejected" (closed) - ↓ -[enrichOutcomeWithObjectiveValue] - 1. For PR outcomes: GitHub API trace via closing issues - 2. For direct issue outcomes, or if PR tracing fails: fetch labels from the issue itself - 3. Use labels from the traced root object, not PR labels - 4. Store traced_root_url for audit trail - ↓ -ObjectiveMapping.ComputeObjectiveValue(root_labels) - ↓ -OutcomeReport populated with: - - objective_value: int - - objective_labels: []string - - traced_root_url: string - ↓ [ComputeOutcomeSummary] -OutcomeSummary aggregates: - - total_objective_value (what we attempted) - - accepted_objective_value (what succeeded) - - objective_efficiency (success rate by value) - ↓ [ComputeDomainBreakdowns] -DomainBreakdown per objective: - - attempted: count of work toward this objective - - accepted: count successfully delivered - - total_objective_value: impact points we attempted - - accepted_objective_value: impact points we delivered - - objective_efficiency: % of value we succeeded on -``` - -Current limitation: epic resolution is still a future extension. The implemented trace path is PR → closing issue, with fallback to direct issue labels. - -### Root Resolution Algorithm - -```go -func traceOutcomeRoot(obj GitHubObject, repo string) GitHubObject { - if obj.Type == "PullRequest" { - if len(obj.ClosingIssues) > 0 { - return obj.ClosingIssues[0] - } - } - - return obj -} -``` - -Future extension: - -```go -func traceToRootIssueOrEpic(obj GitHubObject, repo string) GitHubObject { - root := traceOutcomeRoot(obj, repo) - if root.Type == "Issue" && root.EpicLink != nil { - return root.EpicLink - } - return root -} -``` - -### Why Root Tracing Matters - -**Example: PR for "fix auth bug"** - -Without tracing: -- PR has no labels → objective_value = 0 → shows no impact - -With the currently implemented tracing: -- PR links to issue #1234 (labeled `agentic-campaign`, `security`) -- Root issue labels feed the mapping → objective_value is computed from the configured label map -- Outcome summaries and objective breakdowns reflect delivered value on the root issue labels - -This is the **only way** to measure what business value was created, because PRs don't carry the semantic meaning — issues do. - -## Future Portfolio Reporting: Measuring What Value Was Created - -### The Question It Answers - -Instead of: **"How many PRs did we merge?"** (25 PRs, so what?) - -This asks: **"How much business value did we deliver?"** (We aimed for 1000 impact points on our critical objectives, delivered 750, 75% success rate) - -### Impact Metrics - -Each objective (label assigned to root issues/epics by PM) shows: - -| Metric | Meaning | Example | -|--------|---------|---------| -| **Attempted** | Work started toward this objective | 20 PRs addressing `epic-auth` | -| **Accepted** | Work successfully delivered | 15 of 20 PRs merged | -| **Total Impact** | Value we tried to deliver | 20 attempts × 100 points = 2000 | -| **Delivered Impact** | Value we actually delivered | 15 successes × 100 points = 1500 | -| **Efficiency** | Percentage of value achieved | 1500 / 2000 = 75% ✅ Good progress | - -### Objective Breakdown Metrics - -Each objective is aggregated with these metrics: - -| Field | Type | Meaning | -|-------|------|---------| -| `label` | string | Business objective (e.g., `epic-auth`, `critical`) | -| `attempted` | int | Total work started on this objective | -| `accepted` | int | Work successfully delivered | -| `rejected` | int | Work that failed or was rejected | -| `pending` | int | Work still in progress | -| `total_objective_value` | int | Impact value attempted (sum of all values) | -| `accepted_objective_value` | int | Impact value delivered (sum of accepted values) | -| `objective_efficiency` | float64 | accepted / total (percentage of planned value realized) | -| `acceptance_rate` | float64 | accepted / attempted (percentage of work that succeeded) | - -### Example: Portfolio Impact Report - -```json -{ - "total": 50, - "accepted": 35, - "objective_efficiency": 0.75, - "domain_breakdowns": [ - { - "label": "epic-auth", - "attempted": 20, - "accepted": 15, - "rejected": 4, - "pending": 1, - "total_objective_value": 10000, - "accepted_objective_value": 7500, - "objective_efficiency": 0.75, - "acceptance_rate": 0.75 - }, - { - "label": "critical", - "attempted": 15, - "accepted": 14, - "rejected": 1, - "pending": 0, - "total_objective_value": 1500, - "accepted_objective_value": 1400, - "objective_efficiency": 0.93, - "acceptance_rate": 0.93 - }, - { - "label": "p1", - "attempted": 15, - "accepted": 6, - "rejected": 8, - "pending": 1, - "total_objective_value": 750, - "accepted_objective_value": 300, - "objective_efficiency": 0.40, - "acceptance_rate": 0.40 - } - ] -} -``` - -### What This Report Says - -- **Epic-auth**: Aimed for 10,000 impact on this initiative, delivered 7,500 (75% success) → Continue but monitor -- **Critical**: Aimed for 1,500 impact, delivered 1,400 (93% success) → Excellent, keep strategy -- **P1**: Aimed for 750 impact, delivered only 300 (40% success) → Investigate issues, may need human review - -### Performance Analysis: Impact-Based Insights - -The `AnalyzeDomainPerformance()` function interprets efficiency to answer: **How well are we delivering on this objective?** - -| Efficiency | Status | Meaning | -|-----------|--------|---------| -| ≥ 90% | excellent | Delivering nearly all planned value → Keep strategy, scale if possible | -| ≥ 75% | good | Strong progress on objective → Monitor for regressions, maintain discipline | -| ≥ 50% | fair | Moderate success, room to improve → Review process, may need human guidance | -| < 50% | poor | Failing to deliver value → Investigate root cause, pause or redesign automation | - -**Example Interpretations:** - -- **epic-auth at 75%**: Started with 10,000 impact points planned, delivering 7,500. We're solving most auth problems successfully, but some are slipping. Review what's failing. -- **critical at 93%**: Nearly perfect on critical issues. Strategy is working. Could increase volume. -- **p1 at 40%**: Only delivering 40% of planned p1 work. Major problems here — investigate before continuing. - -## Business Impact Model - -### Key Insight: Root Tracing is Non-Negotiable - -This system only works if we trace back to root issues/epics. Here's why: - -1. **Root issues carry business semantics** — They're labeled by PMs with strategic intent -2. **PRs are tactical** — They're solutions, not problems; they shouldn't carry business labels -3. **GitHub's native model** — Issues represent work-to-do, PRs represent work-done -4. **Audit trail** — We can show exactly which business problems were solved - -### Example: Bad vs. Good Impact Measurement - -**Without Tracing (Bad):** -``` -Safe output: PR #456 merged -PR labels: none -Conclusion: No impact (objectiveValue = 0) -``` - -**With Tracing (Good):** -``` -Safe output: PR #456 merged -PR linked to: Issue #123 "Fix auth token expiry" -Issue #123 labels: epic-auth, critical -Conclusion: 100 impact points delivered (from critical label) -Portfolio: "We delivered a critical fix to the auth epic" -``` - -## Data: How Objectives Should Be Assigned - -PMs assign objectives by labeling root issues/epics. Examples: - -| Root Object | Labels | Impact | Meaning | -|---|---|---|---| -| Issue #1234 | `epic-auth` | 500 | Work toward major auth initiative | -| Issue #5678 | `critical` | 100 | Must-fix bug blocking users | -| Issue #9012 | `p1` | 50 | Important enhancement | -| Epic "Modernize API" | `initiative-api-v2` | 1000 | Major multi-quarter initiative | - -When a PR closes one of these issues, it inherits the impact value. - -## API & Functions - -### ObjectiveMapping - -```go -type ObjectiveMapping struct { - LabelToValue map[string]int `json:"label_to_value"` - MultiLabelLogic string `json:"multi_label_logic"` - PriorityLabels []string `json:"priority_labels"` -} - -// Compute value from labels using configured strategy -func (om *ObjectiveMapping) ComputeObjectiveValue(labels []string) int - -// Get objective labels (mapped labels only) -func (om *ObjectiveMapping) GetObjectiveLabels(labels []string) []string - -// Load from config file, env var, or defaults -func LoadObjectiveMappingFromConfig() *ObjectiveMapping - -// Get built-in defaults -func DefaultObjectiveMapping() *ObjectiveMapping -``` - -### Objective Breakdown - -```go -type DomainBreakdown struct { - Label string `json:"label"` // Objective label (e.g., "epic-auth") - Attempted int `json:"attempted"` // Count of work toward this objective - Accepted int `json:"accepted"` // Count successfully delivered - Rejected int `json:"rejected"` // Count that failed - Pending int `json:"pending"` // Count in progress - TotalObjectiveValue int `json:"total_objective_value"` // Impact attempted - AcceptedObjectiveValue int `json:"accepted_objective_value"` // Impact delivered - ObjectiveEfficiency float64 `json:"objective_efficiency"` // Efficiency % - AcceptanceRate float64 `json:"acceptance_rate"` // Success % -} - -// Aggregate outcomes by objective label -func ComputeDomainBreakdowns(reports []OutcomeReport) []DomainBreakdown -``` - -## Testing - -### Unit Tests - -The `label_objective_mapping_test.go` covers: - -- Max/sum/first combination logics with multiple label scenarios -- Case insensitivity and whitespace trimming -- Nil and empty slice handling -- Combined label computation -- Real-world scenarios (e.g., `[bug, p0]`, `[performance, workflow]`) - -All tests must pass before deployment: - -```bash -go test ./pkg/github -run TestObjectiveMapping -go test ./pkg/cli -run "TestComputeOutcomeSummary|TestEvaluateOutcomes" -``` - -### Current Integration Tests - -Current tests verify: - -1. Objective values are computed with the configured combination strategy -2. Pull request outcomes trace to closing issues before label evaluation -3. Direct issue-label fallback works when PR tracing is unavailable -4. Objective summaries and breakdowns aggregate correctly -5. Audit trail (`traced_root_url`) is recorded correctly - -### Future Integration Tests - -Additional end-to-end testing should verify: - -1. Root tracing correctly follows PR → issue → epic links once epic support exists -2. Labels fetched from final root objects, not from intermediate artifacts -3. Cost-aware efficiency metrics calculate accurately when AI Credits data is available - -## Extended Performance Considerations - -1. **Mapping Loaded Once** — At CLI startup, reused for all outcomes -2. **GitHub API Calls** — One call per outcome to fetch labels (async batch recommended) -3. **Aggregation** — O(n) scan of outcomes to compute domains -4. **Memory** — Domain map is O(unique labels), typically < 100 entries - -## Error Handling - -| Error | Behavior | Recovery | -|-------|----------|----------| -| Missing config file | Use defaults | Application continues | -| Invalid JSON in config | Use defaults, log error | Application continues | -| GitHub API 404 | Skip enrichment, value = 0 | Outcome evaluates normally | -| GitHub API 5xx | Log error, skip enrichment | Retry on next evaluation cycle | -| Invalid label in config | Ignored | Mapping continues with valid labels | - -## Future Extensions - -1. **Batch Root Tracing** — Async batch fetching of root issues to reduce GitHub API rate limits -2. **Impact Trends** — Track efficiency trends over time (e.g., "epic-auth efficiency improved from 60% to 75%") -3. **Multi-Issue Links** — Handle PRs linked to multiple issues with different objectives -4. **Epic Hierarchies** — Support nested epics (epic → parent epic → get labels from both) -5. **Workflowized Portfolio Reports** — Dedicated reporting workflows built on the existing objective-enriched outcome data -6. **AI Credits Integration** — Factor in real run-cost data for value-per-credit reporting -7. **Predictive Efficiency** — Use historical efficiency to forecast likely delivery rate - -## References - -- [Safe Output Outcome Evaluation Specification](./safe-output-outcome-evaluation.md) -- [AI Credits Specification](../docs/src/content/docs/specs/ai-credits-specification.md) -- Implementation: `pkg/github/label_objective_mapping.go`, `pkg/cli/outcome_domain_breakdown.go`