From 430f6c3772043275fac2afa3b4f4913ba6ae44c8 Mon Sep 17 00:00:00 2001 From: aman Date: Thu, 14 May 2026 18:44:21 +0530 Subject: [PATCH] fix(bootstrap): backfill org policies for legacy service users on startup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds bootstrap.MigrateServiceUserOrgPolicies, which on each server start finds service users that have an org#member@serviceuser SpiceDB relation but no matching Postgres policy row and creates the missing policy. Why: older versions of the membership-creation flow wrote the SpiceDB relation directly without a corresponding policy. Once listing and authorization paths read from Postgres policies (the direction the codebase is moving), those legacy service users silently drop out of "orgs I belong to" results. The backfill makes Postgres policies the authoritative source without losing access for existing data. Mechanics: - internal/store/postgres/serviceuser_repository.go — new ListMissingOrgPolicy set-difference query. Returns []bootstrap.ServiceUserCandidate (id + org_id) for SU rows that have org_id set but no matching policies row. On a clean cluster the result set is empty and startup overhead is one query. - internal/bootstrap/service.go — new MigrateServiceUserOrgPolicies method. Goes through policy.Service.Create so the three SpiceDB rolebinding tuples (rolebinding#bearer@su, rolebinding#role@role, org#granted@rolebinding) are written alongside the Postgres row, preserving Postgres↔SpiceDB consistency. Per-row failures are joined via errors.Join and logged with logger.WarnContext; the loop does not abort on a single bad row. - cmd/serve.go — non-fatal wiring after MakeSuperUsers. Partial failures log a warning and continue, matching the shape of MetaSchemaService.List and SessionService.InitSessions above. The function stays in the codebase permanently so downstream deployments can pick it up on their own timeline. DI additions to bootstrap.Service: logger, policyService (Create+List), serviceuserRepo (ListMissingOrgPolicy). Constructor signature updated in cmd/serve.go to match. Co-Authored-By: Claude Opus 4.7 (1M context) --- cmd/serve.go | 12 +++ internal/bootstrap/service.go | 75 +++++++++++++++++++ .../store/postgres/serviceuser_repository.go | 52 +++++++++++++ 3 files changed, 139 insertions(+) diff --git a/cmd/serve.go b/cmd/serve.go index 61dd1a2c8..d220801de 100644 --- a/cmd/serve.go +++ b/cmd/serve.go @@ -217,6 +217,15 @@ func StartServer(logger *slog.Logger, cfg *config.Frontier) error { return err } + // Backfill legacy SU org policies. Non-fatal: this runs over unbounded + // customer data and a single stuck row shouldn't brick the server. + if err := deps.BootstrapService.MigrateServiceUserOrgPolicies(ctx); err != nil { + logger.Warn("serviceuser org policy backfill had partial failures, continuing", + "err", err) + } else { + logger.Info("backfilled serviceuser org policies") + } + // session service initialization and cleanup if err := deps.SessionService.InitSessions(ctx); err != nil { logger.Warn("sessions initialization failed", "err", err) @@ -548,6 +557,7 @@ func buildAPIDependencies( resourceSchemaRepository := blob.NewSchemaConfigRepository(resourceBlobRepository.Bucket) bootstrapService := bootstrap.NewBootstrapService( + logger, cfg.App.Admin, resourceSchemaRepository, namespaceService, @@ -556,6 +566,8 @@ func buildAPIDependencies( userService, authzSchemaRepository, relationService, + policyService, + svUserRepo, cfg.App.PAT.DeniedPermissionsSet(), planService, planBlobRepository, diff --git a/internal/bootstrap/service.go b/internal/bootstrap/service.go index 7374a1af0..0dcbb6477 100644 --- a/internal/bootstrap/service.go +++ b/internal/bootstrap/service.go @@ -2,6 +2,7 @@ package bootstrap import ( "context" + "errors" "fmt" "log/slog" "strings" @@ -12,6 +13,7 @@ import ( "github.com/raystack/frontier/core/namespace" "github.com/raystack/frontier/core/permission" + "github.com/raystack/frontier/core/policy" "github.com/raystack/frontier/core/relation" "github.com/raystack/frontier/core/role" "github.com/raystack/frontier/internal/bootstrap/schema" @@ -61,6 +63,25 @@ type PlanService interface { UpsertPlans(ctx context.Context, planFile plan.File) error } +// PolicyService is policy.Service narrowed to what backfill needs. Goes through +// Create so the SpiceDB rolebinding tuples land alongside the row. +type PolicyService interface { + Create(ctx context.Context, pol policy.Policy) (policy.Policy, error) + List(ctx context.Context, flt policy.Filter) ([]policy.Policy, error) +} + +// ServiceUserCandidate is a service user missing its owning-org policy row. +type ServiceUserCandidate struct { + ServiceUserID string + OrgID string +} + +// ServiceUserBackfiller exposes the set-difference query. Narrow on purpose — +// bootstrap shouldn't be able to mutate service users. +type ServiceUserBackfiller interface { + ListMissingOrgPolicy(ctx context.Context) ([]ServiceUserCandidate, error) +} + // AdminConfig is platform administration configuration type AdminConfig struct { // Users are a list of email-ids/uuids which needs to be promoted as superusers @@ -69,6 +90,7 @@ type AdminConfig struct { } type Service struct { + logger *slog.Logger adminConfig AdminConfig schemaConfig FileService namespaceService NamespaceService @@ -77,6 +99,8 @@ type Service struct { authzEngine AuthzEngine userService UserService relationService RelationService + policyService PolicyService + serviceuserRepo ServiceUserBackfiller patDeniedPerms map[string]struct{} planService PlanService @@ -84,6 +108,7 @@ type Service struct { } func NewBootstrapService( + logger *slog.Logger, config AdminConfig, schemaConfig FileService, namespaceService NamespaceService, @@ -92,11 +117,14 @@ func NewBootstrapService( userService UserService, authzEngine AuthzEngine, relationService RelationService, + policyService PolicyService, + serviceuserRepo ServiceUserBackfiller, patDeniedPerms map[string]struct{}, planService PlanService, planLocalRepo BillingPlanRepository, ) *Service { return &Service{ + logger: logger, adminConfig: config, schemaConfig: schemaConfig, namespaceService: namespaceService, @@ -107,6 +135,8 @@ func NewBootstrapService( planService: planService, planLocalRepo: planLocalRepo, relationService: relationService, + policyService: policyService, + serviceuserRepo: serviceuserRepo, patDeniedPerms: patDeniedPerms, } } @@ -264,6 +294,51 @@ func (s Service) createRole(ctx context.Context, orgID string, defRole schema.Ro return nil } +// MigrateServiceUserOrgPolicies backfills the org policy for service users that +// have only a SpiceDB member relation (legacy creation flow). Idempotent: on a +// clean cluster the candidate query returns zero rows and this is a no-op. +// Per-row failures are joined into the return value and also logged; the call +// site decides whether to abort or warn-and-continue. +func (s Service) MigrateServiceUserOrgPolicies(ctx context.Context) error { + candidates, err := s.serviceuserRepo.ListMissingOrgPolicy(ctx) + if err != nil { + return fmt.Errorf("list candidates: %w", err) + } + if len(candidates) == 0 { + return nil + } + + viewerRole, err := s.roleService.Get(ctx, schema.RoleOrganizationViewer) + if err != nil { + return fmt.Errorf("get viewer role: %w", err) + } + + var errs error + for _, c := range candidates { + _, createErr := s.policyService.Create(ctx, policy.Policy{ + RoleID: viewerRole.ID, + ResourceID: c.OrgID, + ResourceType: schema.OrganizationNamespace, + PrincipalID: c.ServiceUserID, + PrincipalType: schema.ServiceUserPrincipal, + }) + if createErr != nil { + errs = errors.Join(errs, fmt.Errorf("backfill SU %s on org %s: %w", c.ServiceUserID, c.OrgID, createErr)) + s.logger.WarnContext(ctx, "backfill failed for service user, continuing", + "serviceuser_id", c.ServiceUserID, + "org_id", c.OrgID, + "error", createErr, + ) + continue + } + s.logger.InfoContext(ctx, "backfilled SU org policy", + "serviceuser_id", c.ServiceUserID, + "org_id", c.OrgID, + ) + } + return errs +} + // migratePATRelations ensures app/pat:* wildcard tuples are in sync with the current // denied_permissions config for all existing roles. Runs on every bootstrap: // - Creates app/pat:* for allowed permissions (idempotent via SpiceDB Touch) diff --git a/internal/store/postgres/serviceuser_repository.go b/internal/store/postgres/serviceuser_repository.go index cecce2efe..11b501091 100644 --- a/internal/store/postgres/serviceuser_repository.go +++ b/internal/store/postgres/serviceuser_repository.go @@ -8,6 +8,8 @@ import ( "fmt" "strings" + "github.com/raystack/frontier/internal/bootstrap" + "github.com/raystack/frontier/internal/bootstrap/schema" "github.com/raystack/frontier/pkg/auditrecord" "github.com/doug-martin/goqu/v9" @@ -223,6 +225,56 @@ func (s ServiceUserRepository) GetByIDs(ctx context.Context, ids []string) ([]se return transformedUsers, nil } +// ListMissingOrgPolicy returns service users whose owning org has no matching +// Postgres policy row (set-difference). Drives the backfill in +// bootstrap.MigrateServiceUserOrgPolicies; returns zero rows on a clean cluster. +func (s ServiceUserRepository) ListMissingOrgPolicy(ctx context.Context) ([]bootstrap.ServiceUserCandidate, error) { + policiesSubquery := dialect.From(goqu.T(TABLE_POLICIES).As("p")). + Select(goqu.L("1")). + Where( + goqu.I("p.principal_id").Eq(goqu.I("su.id")), + goqu.I("p.principal_type").Eq(schema.ServiceUserPrincipal), + goqu.I("p.resource_id").Eq(goqu.I("su.org_id")), + goqu.I("p.resource_type").Eq(schema.OrganizationNamespace), + ) + + query, params, err := dialect.From(goqu.T(TABLE_SERVICEUSER).As("su")). + Select( + goqu.I("su.id").As("serviceuser_id"), + goqu.I("su.org_id").As("org_id"), + ). + Where( + goqu.I("su.org_id").IsNotNull(), + goqu.L("NOT EXISTS ?", policiesSubquery), + ).ToSQL() + if err != nil { + return nil, fmt.Errorf("%w: %w", queryErr, err) + } + + type row struct { + ServiceUserID string `db:"serviceuser_id"` + OrgID string `db:"org_id"` + } + var rows []row + if err = s.dbc.WithTimeout(ctx, TABLE_SERVICEUSER, "ListMissingOrgPolicy", func(ctx context.Context) error { + return s.dbc.SelectContext(ctx, &rows, query, params...) + }); err != nil { + if errors.Is(err, sql.ErrNoRows) { + return nil, nil + } + return nil, fmt.Errorf("%w: %w", dbErr, err) + } + + candidates := make([]bootstrap.ServiceUserCandidate, 0, len(rows)) + for _, r := range rows { + candidates = append(candidates, bootstrap.ServiceUserCandidate{ + ServiceUserID: r.ServiceUserID, + OrgID: r.OrgID, + }) + } + return candidates, nil +} + func (s ServiceUserRepository) Delete(ctx context.Context, id string) error { var result serviceUserWithContext