Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions changelog/23902.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
```release-note:bug
core: fix bug where deadlock detection was always on for expiration and quotas.
These can now be configured individually with `detect_deadlocks`.
```

39 changes: 31 additions & 8 deletions vault/core.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"os"
"path/filepath"
"runtime"
"slices"
"strconv"
"strings"
"sync"
Expand Down Expand Up @@ -690,6 +691,9 @@ type Core struct {

// If any role based quota (LCQ or RLQ) is enabled, don't track lease counts by role
impreciseLeaseRoleTracking bool

// Config value for "detect_deadlocks".
detectDeadlocks []string
}

// c.stateLock needs to be held in read mode before calling this function.
Expand Down Expand Up @@ -947,19 +951,28 @@ func CreateCore(conf *CoreConfig) (*Core, error) {
if conf.NumRollbackWorkers == 0 {
conf.NumRollbackWorkers = RollbackDefaultNumWorkers
}
// Use imported logging deadlock if requested
var stateLock locking.RWMutex
if strings.Contains(conf.DetectDeadlocks, "statelock") {
stateLock = &locking.DeadlockRWMutex{}
} else {
stateLock = &locking.SyncRWMutex{}
}

effectiveSDKVersion := conf.EffectiveSDKVersion
if effectiveSDKVersion == "" {
effectiveSDKVersion = version.GetVersion().Version
}

var detectDeadlocks []string
if conf.DetectDeadlocks != "" {
detectDeadlocks = strings.Split(conf.DetectDeadlocks, ",")
for k, v := range detectDeadlocks {
detectDeadlocks[k] = strings.ToLower(strings.TrimSpace(v))
}
}

// Use imported logging deadlock if requested
var stateLock locking.RWMutex
stateLock = &locking.SyncRWMutex{}

if slices.Contains(detectDeadlocks, "statelock") {
stateLock = &locking.DeadlockRWMutex{}
}

// Setup the core
c := &Core{
entCore: entCore{},
Expand Down Expand Up @@ -1033,6 +1046,7 @@ func CreateCore(conf *CoreConfig) (*Core, error) {
rollbackMountPathMetrics: conf.MetricSink.TelemetryConsts.RollbackMetricsIncludeMountPoint,
numRollbackWorkers: conf.NumRollbackWorkers,
impreciseLeaseRoleTracking: conf.ImpreciseLeaseRoleTracking,
detectDeadlocks: detectDeadlocks,
}

c.standbyStopCh.Store(make(chan struct{}))
Expand Down Expand Up @@ -1219,7 +1233,9 @@ func NewCore(conf *CoreConfig) (*Core, error) {

// Quotas
quotasLogger := conf.Logger.Named("quotas")
c.quotaManager, err = quotas.NewManager(quotasLogger, c.quotaLeaseWalker, c.metricSink)

detectDeadlocks := slices.Contains(c.detectDeadlocks, "quotas")
c.quotaManager, err = quotas.NewManager(quotasLogger, c.quotaLeaseWalker, c.metricSink, detectDeadlocks)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -4188,3 +4204,10 @@ func (c *Core) GetRaftAutopilotState(ctx context.Context) (*raft.AutopilotState,
func (c *Core) Events() *eventbus.EventBus {
return c.events
}

func (c *Core) DetectStateLockDeadlocks() bool {
if _, ok := c.stateLock.(*locking.DeadlockRWMutex); ok {
return true
}
return false
}
48 changes: 48 additions & 0 deletions vault/core_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3361,3 +3361,51 @@ func InduceDeadlock(t *testing.T, vaultcore *Core, expected uint32) {
t.Fatalf("expected 1 deadlock, detected %d", deadlocks)
}
}

func TestExpiration_DeadlockDetection(t *testing.T) {
testCore := TestCore(t)
testCoreUnsealed(t, testCore)

if testCore.expiration.DetectDeadlocks() {
t.Fatal("expiration has deadlock detection enabled, it shouldn't")
}

testCore = TestCoreWithDeadlockDetection(t, nil, false)
testCoreUnsealed(t, testCore)

if !testCore.expiration.DetectDeadlocks() {
t.Fatal("expiration doesn't have deadlock detection enabled, it should")
}
}

func TestQuotas_DeadlockDetection(t *testing.T) {
testCore := TestCore(t)
testCoreUnsealed(t, testCore)

if testCore.quotaManager.DetectDeadlocks() {
t.Fatal("quotas has deadlock detection enabled, it shouldn't")
}

testCore = TestCoreWithDeadlockDetection(t, nil, false)
testCoreUnsealed(t, testCore)

if !testCore.quotaManager.DetectDeadlocks() {
t.Fatal("quotas doesn't have deadlock detection enabled, it should")
}
}

func TestStatelock_DeadlockDetection(t *testing.T) {
testCore := TestCore(t)
testCoreUnsealed(t, testCore)

if testCore.DetectStateLockDeadlocks() {
t.Fatal("statelock has deadlock detection enabled, it shouldn't")
}

testCore = TestCoreWithDeadlockDetection(t, nil, false)
testCoreUnsealed(t, testCore)

if !testCore.DetectStateLockDeadlocks() {
t.Fatal("statelock doesn't have deadlock detection enabled, it should")
}
}
22 changes: 19 additions & 3 deletions vault/expiration.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"math/rand"
"os"
"path"
"slices"
"sort"
"strconv"
"strings"
Expand Down Expand Up @@ -114,7 +115,7 @@ type ExpirationManager struct {
pending sync.Map
nonexpiring sync.Map
leaseCount int
pendingLock locking.DeadlockRWMutex
pendingLock locking.RWMutex

// A sync.Lock for every active leaseID
lockPerLease sync.Map
Expand Down Expand Up @@ -327,7 +328,7 @@ func getNumExpirationWorkers(c *Core, l log.Logger) int {

// NewExpirationManager creates a new ExpirationManager that is backed
// using a given view, and uses the provided router for revocation.
func NewExpirationManager(c *Core, view *BarrierView, e ExpireLeaseStrategy, logger log.Logger) *ExpirationManager {
func NewExpirationManager(c *Core, view *BarrierView, e ExpireLeaseStrategy, logger log.Logger, detectDeadlocks bool) *ExpirationManager {
managerLogger := logger.Named("job-manager")
jobManager := fairshare.NewJobManager("expire", getNumExpirationWorkers(c, logger), managerLogger, c.metricSink)
jobManager.Start()
Expand All @@ -340,6 +341,7 @@ func NewExpirationManager(c *Core, view *BarrierView, e ExpireLeaseStrategy, log
tokenStore: c.tokenStore,
logger: logger,
pending: sync.Map{},
pendingLock: &locking.SyncRWMutex{},
nonexpiring: sync.Map{},
leaseCount: 0,
tidyLock: new(int32),
Expand Down Expand Up @@ -375,6 +377,11 @@ func NewExpirationManager(c *Core, view *BarrierView, e ExpireLeaseStrategy, log
exp.logger = log.New(&opts)
}

if detectDeadlocks {
managerLogger.Debug("enabling deadlock detection")
exp.pendingLock = &locking.DeadlockRWMutex{}
}

go exp.uniquePoliciesGc()

return exp
Expand All @@ -390,7 +397,9 @@ func (c *Core) setupExpiration(e ExpireLeaseStrategy) error {

// Create the manager
expLogger := c.baseLogger.Named("expiration")
mgr := NewExpirationManager(c, view, e, expLogger)

detectDeadlocks := slices.Contains(c.detectDeadlocks, "expiration")
mgr := NewExpirationManager(c, view, e, expLogger, detectDeadlocks)
c.expiration = mgr

// Link the token store to this
Expand Down Expand Up @@ -2821,3 +2830,10 @@ func decodeLeaseEntry(buf []byte) (*leaseEntry, error) {
out := new(leaseEntry)
return out, jsonutil.DecodeJSON(buf, out)
}

func (e *ExpirationManager) DetectDeadlocks() bool {
if _, ok := e.pendingLock.(*locking.DeadlockRWMutex); ok {
return true
}
return false
}
28 changes: 21 additions & 7 deletions vault/quotas/quotas.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,13 +170,13 @@ type Manager struct {
metricSink *metricsutil.ClusterMetricSink

// quotaLock is a lock for manipulating quotas and anything not covered by a more specific lock
quotaLock *locking.DeadlockRWMutex
quotaLock locking.RWMutex

// quotaConfigLock is a lock for accessing config items, such as RateLimitExemptPaths
quotaConfigLock *locking.DeadlockRWMutex
quotaConfigLock locking.RWMutex

// dbAndCacheLock is a lock for db and path caches that need to be reset during Reset()
dbAndCacheLock *locking.DeadlockRWMutex
dbAndCacheLock locking.RWMutex
}

// QuotaLeaseInformation contains all of the information lease-count quotas require
Expand Down Expand Up @@ -275,7 +275,7 @@ type Request struct {

// NewManager creates and initializes a new quota manager to hold all the quota
// rules and to process incoming requests.
func NewManager(logger log.Logger, walkFunc leaseWalkFunc, ms *metricsutil.ClusterMetricSink) (*Manager, error) {
func NewManager(logger log.Logger, walkFunc leaseWalkFunc, ms *metricsutil.ClusterMetricSink, detectDeadlocks bool) (*Manager, error) {
db, err := memdb.NewMemDB(dbSchema())
if err != nil {
return nil, err
Expand All @@ -287,9 +287,16 @@ func NewManager(logger log.Logger, walkFunc leaseWalkFunc, ms *metricsutil.Clust
metricSink: ms,
rateLimitPathManager: pathmanager.New(),
config: new(Config),
quotaLock: new(locking.DeadlockRWMutex),
quotaConfigLock: new(locking.DeadlockRWMutex),
dbAndCacheLock: new(locking.DeadlockRWMutex),
quotaLock: &locking.SyncRWMutex{},
quotaConfigLock: &locking.SyncRWMutex{},
dbAndCacheLock: &locking.SyncRWMutex{},
}

if detectDeadlocks {
logger.Debug("enabling deadlock detection")
manager.quotaLock = &locking.DeadlockRWMutex{}
manager.quotaConfigLock = &locking.DeadlockRWMutex{}
manager.dbAndCacheLock = &locking.DeadlockRWMutex{}
}

manager.init(walkFunc)
Expand Down Expand Up @@ -1319,3 +1326,10 @@ func (m *Manager) HandleBackendDisabling(ctx context.Context, nsPath, mountPath

return nil
}

func (m *Manager) DetectDeadlocks() bool {
if _, ok := m.quotaLock.(*locking.DeadlockRWMutex); ok {
return true
}
return false
}
2 changes: 1 addition & 1 deletion vault/quotas/quotas_rate_limit_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ func TestRateLimitQuota_Allow_WithBlock(t *testing.T) {

func TestRateLimitQuota_Update(t *testing.T) {
defer goleak.VerifyNone(t)
qm, err := NewManager(logging.NewVaultLogger(log.Trace), nil, metricsutil.BlackholeSink())
qm, err := NewManager(logging.NewVaultLogger(log.Trace), nil, metricsutil.BlackholeSink(), true)
require.NoError(t, err)

quota := NewRateLimitQuota("quota1", "", "", "", "", false, time.Second, 0, 10)
Expand Down
6 changes: 3 additions & 3 deletions vault/quotas/quotas_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ import (
)

func TestQuotas_MountPathOverwrite(t *testing.T) {
qm, err := NewManager(logging.NewVaultLogger(log.Trace), nil, metricsutil.BlackholeSink())
qm, err := NewManager(logging.NewVaultLogger(log.Trace), nil, metricsutil.BlackholeSink(), true)
require.NoError(t, err)

quota := NewRateLimitQuota("tq", "", "kv1/", "", "", false, time.Second, 0, 10)
Expand All @@ -43,7 +43,7 @@ func TestQuotas_MountPathOverwrite(t *testing.T) {
}

func TestQuotas_Precedence(t *testing.T) {
qm, err := NewManager(logging.NewVaultLogger(log.Trace), nil, metricsutil.BlackholeSink())
qm, err := NewManager(logging.NewVaultLogger(log.Trace), nil, metricsutil.BlackholeSink(), true)
require.NoError(t, err)

setQuotaFunc := func(t *testing.T, name, nsPath, mountPath, pathSuffix, role string, inheritable bool) Quota {
Expand Down Expand Up @@ -142,7 +142,7 @@ func TestQuotas_QueryResolveRole_RateLimitQuotas(t *testing.T) {
leaseWalkFunc := func(context.Context, func(request *Request) bool) error {
return nil
}
qm, err := NewManager(logging.NewVaultLogger(log.Trace), leaseWalkFunc, metricsutil.BlackholeSink())
qm, err := NewManager(logging.NewVaultLogger(log.Trace), leaseWalkFunc, metricsutil.BlackholeSink(), true)
require.NoError(t, err)

rlqReq := &Request{
Expand Down
14 changes: 14 additions & 0 deletions vault/testing.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,20 @@ func TestCoreWithSeal(t testing.T, testSeal Seal, enableRaw bool) *Core {
return TestCoreWithSealAndUI(t, conf)
}

func TestCoreWithDeadlockDetection(t testing.T, testSeal Seal, enableRaw bool) *Core {
conf := &CoreConfig{
Seal: testSeal,
EnableUI: false,
EnableRaw: enableRaw,
BuiltinRegistry: corehelpers.NewMockBuiltinRegistry(),
AuditBackends: map[string]audit.Factory{
"file": auditFile.Factory,
},
DetectDeadlocks: "expiration,quotas,statelock",
}
return TestCoreWithSealAndUI(t, conf)
}

func TestCoreWithCustomResponseHeaderAndUI(t testing.T, CustomResponseHeaders map[string]map[string]string, enableUI bool) (*Core, [][]byte, string) {
confRaw := &server.Config{
SharedConfig: &configutil.SharedConfig{
Expand Down
9 changes: 5 additions & 4 deletions website/content/docs/configuration/index.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -159,10 +159,11 @@ to specify where the configuration is.
maximum request duration allowed before Vault cancels the request. This can
be overridden per listener via the `max_request_duration` value.

- `detect_deadlocks` `(string: "")` - Specifies the internal mutex locks that should be monitored for
potential deadlocks. Currently supported value is `statelock`, which will cause "POTENTIAL DEADLOCK:"
to be logged when an attempt at a core state lock appears to be deadlocked. Enabling this can have
a negative effect on performance due to the tracking of each lock attempt.
- `detect_deadlocks` `(string: "")` - A comma separated string that specifies the internal
mutex locks that should be monitored for potential deadlocks. Currently supported values
include `statelock`, `quotas` and `expiration` which will cause "POTENTIAL DEADLOCK:"
to be logged when an attempt at a core state lock appears to be deadlocked. Enabling this
can have a negative effect on performance due to the tracking of each lock attempt.

- `raw_storage_endpoint` `(bool: false)` – Enables the `sys/raw` endpoint which
allows the decryption/encryption of raw data into and out of the security
Expand Down