diff --git a/docs/adr/40258-cache-repository-owner-type-per-compilation-run.md b/docs/adr/40258-cache-repository-owner-type-per-compilation-run.md new file mode 100644 index 00000000000..b00af2778d3 --- /dev/null +++ b/docs/adr/40258-cache-repository-owner-type-per-compilation-run.md @@ -0,0 +1,39 @@ +# ADR-40258: Cache Repository Owner-Type Lookup Per Compilation Run + +**Date**: 2026-06-19 +**Status**: Draft + +## Context + +When determining whether to suppress the `copilot-requests: write` permission tip for individual-user repositories, the compiler calls `gh api /users/` ("Checking repository owner type...") to classify the owner as a `User` or `Organization`. This classification depends only on the owner login, not on the individual workflow file. A single `gh aw compile` run compiles every workflow in the repository on one shared `Compiler` instance, so a repository with N Copilot workflows previously issued N identical `/users/` round-trips — wasted latency and API quota for a value that never changes within a run. An owner-type cache (`ownerTypeCache map[string]string`, keyed by owner login) was introduced to deduplicate these lookups, and this PR finalizes that strategy by making the cache's initialization eager and unconditional. + +## Decision + +We will cache the repository owner-type lookup once per owner login on the `Compiler` instance, so that `repositoryOwnerIsIndividualUser` performs at most one `gh api /users/` call per owner for the lifetime of a compilation run. We will initialize `ownerTypeCache` eagerly in `NewCompiler()` — alongside the existing `actionPinWarnings` and `priorManifests` caches — rather than lazily on first use. This makes the always-initialized invariant explicit at construction time and lets `repositoryOwnerIsIndividualUser` drop its defensive nil-guard, keeping the lookup path linear and free of incidental initialization branches. + +## Alternatives Considered + +### Alternative 1: Lazy initialization with a nil-guard at the call site +Keep `ownerTypeCache` zero-valued (nil) and initialize it on first access inside `repositoryOwnerIsIndividualUser`. This was the prior behavior. Rejected because it spreads the cache's lifecycle across two locations, forces every reader to re-assert the invariant, and is inconsistent with the sibling caches that are already initialized in `NewCompiler()`. + +### Alternative 2: No caching — look up owner type on every call +Issue `gh api /users/` each time the permission tip is evaluated. Rejected because it reintroduces N redundant identical API calls per multi-workflow compile run, adding network latency and consuming API rate limit for a value that is constant within the run. + +## Consequences + +### Positive +- At most one owner-type API call per owner per compilation run, eliminating redundant round-trips for multi-workflow repositories. +- The always-initialized cache invariant is established in one place (`NewCompiler()`), consistent with the other compiler caches. +- The lookup path in `repositoryOwnerIsIndividualUser` is simpler without the nil-guard branch. + +### Negative +- The cache lives for the full lifetime of the `Compiler` instance; a long-lived or reused compiler would not observe an owner's type changing between `User` and `Organization` mid-process (an acceptable trade-off given owner type is effectively stable). +- A negative result (empty string from a prior API error) is also cached, so a transient `gh api` failure suppresses retries for that owner for the rest of the run. + +### Neutral +- The cache is keyed by owner login only, so two repositories under the same owner share one entry. +- Correctness of the eager-init refactor is covered by `compiler_owner_type_cache_test.go`, which exercises cache-hit, malformed-slug, cross-compilation reuse, and non-nil-map invariants. + +--- + +*This is a DRAFT ADR generated by the [Design Decision Gate](https://github.com/github/gh-aw/actions/runs/27828047748) workflow. The PR author must review, complete, and finalize this document before the PR can merge.* diff --git a/pkg/workflow/compiler_owner_type_cache_test.go b/pkg/workflow/compiler_owner_type_cache_test.go new file mode 100644 index 00000000000..433341988f6 --- /dev/null +++ b/pkg/workflow/compiler_owner_type_cache_test.go @@ -0,0 +1,116 @@ +//go:build !integration + +package workflow + +import ( + "testing" +) + +// TestRepositoryOwnerIsIndividualUser_CacheHit verifies that repositoryOwnerIsIndividualUser +// returns the cached owner type without making a network call when the owner is already +// in the cache. This ensures the owner-type check is performed at most once per repo +// during a single compilation run. +func TestRepositoryOwnerIsIndividualUser_CacheHit(t *testing.T) { + tests := []struct { + name string + cachedType string + expectedResult bool + }{ + { + name: "cached User type returns true", + cachedType: "User", + expectedResult: true, + }, + { + name: "cached Organization type returns false", + cachedType: "Organization", + expectedResult: false, + }, + { + name: "cached empty string (API error) returns false", + cachedType: "", + expectedResult: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + c := NewCompiler() + c.SetRepositorySlug("myowner/myrepo") + + // Pre-populate the cache so no network call is made. + // If the cache is not consulted, RunGH would be called without a real gh binary + // available in unit tests, causing the function to return false even for the + // "User" case — the test would then fail on that case. + c.ownerTypeCache["myowner"] = tt.cachedType + + got := c.repositoryOwnerIsIndividualUser() + if got != tt.expectedResult { + t.Errorf("repositoryOwnerIsIndividualUser() = %v, want %v (cached owner type %q)", + got, tt.expectedResult, tt.cachedType) + } + }) + } +} + +// TestRepositoryOwnerIsIndividualUser_MalformedSlug verifies that the function +// returns false immediately when repositorySlug is missing or malformed, without +// consulting the cache or making a network call. +func TestRepositoryOwnerIsIndividualUser_MalformedSlug(t *testing.T) { + tests := []struct { + name string + slug string + }{ + {name: "empty slug", slug: ""}, + {name: "no slash", slug: "owneronly"}, + {name: "trailing slash only", slug: "/"}, + {name: "missing owner", slug: "/repo"}, + {name: "missing repo", slug: "owner/"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + c := NewCompiler() + c.SetRepositorySlug(tt.slug) + + got := c.repositoryOwnerIsIndividualUser() + if got { + t.Errorf("repositoryOwnerIsIndividualUser() = true for malformed slug %q, want false", tt.slug) + } + }) + } +} + +// TestRepositoryOwnerIsIndividualUser_CacheSharedAcrossCompilations verifies that +// the owner-type cache persists across multiple calls on the same Compiler instance, +// which represents multiple workflow files compiled in a single `gh aw compile` run. +func TestRepositoryOwnerIsIndividualUser_CacheSharedAcrossCompilations(t *testing.T) { + c := NewCompiler() + c.SetRepositorySlug("myorg/repo-a") + + // Seed the cache as if a previous call already resolved the owner type. + c.ownerTypeCache["myorg"] = "Organization" + + // Simulate compiling a second workflow in the same repo (different repo name, same owner). + c.SetRepositorySlugIfUnlocked("myorg/repo-b") + + // The cache entry for "myorg" must be reused — no network call is made. + got := c.repositoryOwnerIsIndividualUser() + if got { + t.Error("repositoryOwnerIsIndividualUser() = true for Organization owner, want false") + } + + // The cache should still hold the original value (not been cleared or overwritten). + if val := c.ownerTypeCache["myorg"]; val != "Organization" { + t.Errorf("ownerTypeCache[myorg] = %q after second call, want %q", val, "Organization") + } +} + +// TestRepositoryOwnerIsIndividualUser_CacheInitializedByNewCompiler verifies that +// NewCompiler initializes ownerTypeCache so callers never encounter a nil map panic. +func TestRepositoryOwnerIsIndividualUser_CacheInitializedByNewCompiler(t *testing.T) { + c := NewCompiler() + if c.ownerTypeCache == nil { + t.Fatal("NewCompiler() left ownerTypeCache nil; expected an initialized map") + } +} diff --git a/pkg/workflow/compiler_types.go b/pkg/workflow/compiler_types.go index f0a00d3a34a..28b83fad1d3 100644 --- a/pkg/workflow/compiler_types.go +++ b/pkg/workflow/compiler_types.go @@ -107,7 +107,7 @@ type Compiler struct { requireDocker bool // If true, fail validation when Docker is not available instead of silently skipping ghesCompatFromCLI bool // If true, GHES compat was requested via --ghes CLI flag (takes precedence over aw.json) ghesArtifactCompat bool // If true, emit GHES-compatible v3.x pins for artifact actions instead of the latest v7/v8 - ownerTypeCache map[string]string // Cached GitHub owner type ("User"/"Organization"/"") keyed by owner login + ownerTypeCache map[string]string // Cached GitHub owner type ("User"/"Organization"/"") keyed by owner login; not goroutine-safe (Compiler is used sequentially) } // NewCompiler creates a new workflow compiler with functional options. @@ -136,7 +136,8 @@ func NewCompiler(opts ...CompilerOption) *Compiler { artifactManager: NewArtifactManager(), actionPinWarnings: make(map[string]bool), // Initialize warning cache priorManifests: make(map[string]*GHAWManifest), - gitRoot: gitRoot, // Auto-detected git root + ownerTypeCache: make(map[string]string), // Initialize owner-type cache (keyed by owner login) + gitRoot: gitRoot, // Auto-detected git root } // Apply functional options diff --git a/pkg/workflow/permissions_compiler_validator.go b/pkg/workflow/permissions_compiler_validator.go index 85f355df541..32e9bdf76f1 100644 --- a/pkg/workflow/permissions_compiler_validator.go +++ b/pkg/workflow/permissions_compiler_validator.go @@ -186,15 +186,15 @@ func (c *Compiler) repositoryOwnerIsIndividualUser() bool { return false } - if c.ownerTypeCache == nil { - c.ownerTypeCache = make(map[string]string) - } ownerType, cached := c.ownerTypeCache[owner] if !cached { workflowLog.Printf("Checking owner type for: %s", owner) output, err := RunGH("Checking repository owner type...", "api", "/users/"+owner, "--jq", ".type") if err != nil { workflowLog.Printf("Could not determine owner type for %q: %v", owner, err) + // Cache the empty string so subsequent calls for the same owner also return false + // without retrying. This is intentional: fail-safe means "show the tip when uncertain" + // and avoids N retry round-trips per run. c.ownerTypeCache[owner] = "" return false } diff --git a/pkg/workflow/safe_jobs.go b/pkg/workflow/safe_jobs.go index 7a2c89d1928..7bf5709accc 100644 --- a/pkg/workflow/safe_jobs.go +++ b/pkg/workflow/safe_jobs.go @@ -325,7 +325,7 @@ func extractSafeJobsFromFrontmatter(frontmatter map[string]any) map[string]*Safe if safeOutputsMap, ok := safeOutputs.(map[string]any); ok { if jobs, exists := safeOutputsMap["jobs"]; exists { if jobsMap, ok := jobs.(map[string]any); ok { - c := &Compiler{} // Create a temporary compiler instance for parsing + c := NewCompiler() // Create a temporary compiler instance for parsing return c.parseSafeJobsConfig(jobsMap) } } diff --git a/pkg/workflow/safe_outputs_config.go b/pkg/workflow/safe_outputs_config.go index ee1166bd750..a64e2170a3f 100644 --- a/pkg/workflow/safe_outputs_config.go +++ b/pkg/workflow/safe_outputs_config.go @@ -722,7 +722,7 @@ func (c *Compiler) extractSafeOutputsConfig(frontmatter map[string]any) *SafeOut // Handle jobs (safe-jobs must be under safe-outputs) if jobs, exists := outputMap["jobs"]; exists { if jobsMap, ok := jobs.(map[string]any); ok { - c := &Compiler{} // Create a temporary compiler instance for parsing + c := NewCompiler() // Create a temporary compiler instance for parsing config.Jobs = c.parseSafeJobsConfig(jobsMap) } }