Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 9 additions & 11 deletions enterprise/server/raft/driver/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -1189,27 +1189,25 @@ func (rq *Queue) findRebalanceReplicaOp(rd *rfpb.RangeDescriptor, storesWithStat
candidateStores[nhid] = store
}

localNHID := ""
for _, repl := range rd.GetReplicas() {
if repl.GetReplicaId() == localReplicaID {
localNHID = repl.GetNhid()
}
store, ok := candidateStores[repl.GetNhid()]
if !ok {
// The store might not be available rn.
continue
}
delete(candidateStores, repl.GetNhid())
if repl.GetReplicaId() == localReplicaID {
// This is to prevent us from removing the replica on this node. We
// can only support this after we have the ability to transfer the
// leadership away.
continue
}
if store.fullDisk {
// We want to move the replica away from the store with full disk.
needRebalance = true
}
otherReplicaStores[repl.GetNhid()] = store
}
// This is to prevent us from removing the replica on this node. We
// can only support this after we have the ability to transfer the
// leadership away.
delete(otherReplicaStores, localNHID)

// Remove replicas that are in the middle of removal from candidates.
for _, repl := range rd.GetRemoved() {
Expand All @@ -1219,6 +1217,9 @@ func (rq *Queue) findRebalanceReplicaOp(rd *rfpb.RangeDescriptor, storesWithStat
var targetCandidates []*candidate
for _, store := range candidateStores {
if !store.fullDisk {
store.fullDisk = isDiskFullForRebalance(store.usage)
store.replicaCountMeanLevel = replicaCountMeanLevel(storesWithStats, store.usage)
store.replicaCount = store.usage.ReplicaCount
targetCandidates = append(targetCandidates, store)
}
}
Expand All @@ -1229,9 +1230,6 @@ func (rq *Queue) findRebalanceReplicaOp(rd *rfpb.RangeDescriptor, storesWithStat
return nil
}
bestTarget := slices.MaxFunc(targetCandidates, compareByScoreAndID)
bestTarget.fullDisk = isDiskFullForRebalance(bestTarget.usage)
bestTarget.replicaCountMeanLevel = replicaCountMeanLevel(storesWithStats, bestTarget.usage)
bestTarget.replicaCount = bestTarget.usage.ReplicaCount

potentialOps := make([]*rebalanceOp, 0, len(otherReplicaStores))
for _, nhid := range slices.Sorted(maps.Keys(otherReplicaStores)) {
Expand Down
62 changes: 62 additions & 0 deletions enterprise/server/raft/driver/driver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -840,6 +840,68 @@ func TestRebalanceReplica(t *testing.T) {
to: &candidate{nhid: "nhid-5"},
},
},
{
// Two target candidates: nhid-4 (200 replicas, below mean)
// and nhid-5 (600 replicas, above mean). The best target is
// nhid-4 because it has far fewer replicas. This test
// verifies that candidate scoring fields (replicaCount,
// replicaCountMeanLevel) are populated before picking the
// best candidate, not after. Without that, MaxFunc would
// fall through to the nhid string tiebreaker and
// incorrectly pick nhid-5.
desc: "pick-target-with-fewer-replicas",
rd: &rfpb.RangeDescriptor{
RangeId: 1,
Replicas: []*rfpb.ReplicaDescriptor{
{RangeId: 1, ReplicaId: 1, Nhid: proto.String("nhid-1")}, // local
{RangeId: 1, ReplicaId: 2, Nhid: proto.String("nhid-2")},
{RangeId: 1, ReplicaId: 3, Nhid: proto.String("nhid-3")},
},
},
replicasByStatus: &storemap.ReplicasByStatus{
LiveReplicas: []*rfpb.ReplicaDescriptor{
{RangeId: 1, ReplicaId: 1, Nhid: proto.String("nhid-1")}, // local
{RangeId: 1, ReplicaId: 2, Nhid: proto.String("nhid-2")},
{RangeId: 1, ReplicaId: 3, Nhid: proto.String("nhid-3")},
},
},
usages: []*rfpb.StoreUsage{
{
Node: &rfpb.NodeDescriptor{Nhid: "nhid-1"},
ReplicaCount: 500,
TotalBytesUsed: 100,
TotalBytesFree: 900,
},
{
Node: &rfpb.NodeDescriptor{Nhid: "nhid-2"},
ReplicaCount: 600,
TotalBytesUsed: 100,
TotalBytesFree: 900,
},
{
Node: &rfpb.NodeDescriptor{Nhid: "nhid-3"},
ReplicaCount: 500,
TotalBytesUsed: 100,
TotalBytesFree: 900,
},
{
Node: &rfpb.NodeDescriptor{Nhid: "nhid-4"},
ReplicaCount: 200,
TotalBytesUsed: 100,
TotalBytesFree: 900,
},
{
Node: &rfpb.NodeDescriptor{Nhid: "nhid-5"},
ReplicaCount: 600,
TotalBytesUsed: 100,
TotalBytesFree: 900,
},
},
expected: &rebalanceOp{
from: &candidate{nhid: "nhid-2"},
to: &candidate{nhid: "nhid-4"},
},
},
{
desc: "no-reblance-when-around-mean",
rd: &rfpb.RangeDescriptor{
Expand Down
Loading