From b87c0c3d316ba166838aa3109640bd413028cdcb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alberto=20Mart=C3=ADnez=20P=C3=A9rez?= Date: Tue, 27 Jan 2026 17:26:07 +0100 Subject: [PATCH 1/7] Fix robin debug manifests --- .../examples/ephemeral-robin-debug/kustomization.yml | 2 +- .../redis_v1_redkeycluster-ephemeral.yaml | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/config/examples/ephemeral-robin-debug/kustomization.yml b/config/examples/ephemeral-robin-debug/kustomization.yml index e606f11..fee4200 100644 --- a/config/examples/ephemeral-robin-debug/kustomization.yml +++ b/config/examples/ephemeral-robin-debug/kustomization.yml @@ -11,6 +11,6 @@ patches: name: redis-cluster-ephemeral resources: -- ../ephemeral + - redis_v1_redkeycluster-ephemeral.yaml namespace: redkey-operator \ No newline at end of file diff --git a/config/examples/ephemeral-robin-debug/redis_v1_redkeycluster-ephemeral.yaml b/config/examples/ephemeral-robin-debug/redis_v1_redkeycluster-ephemeral.yaml index 2d8ddd7..e4ff76d 100644 --- a/config/examples/ephemeral-robin-debug/redis_v1_redkeycluster-ephemeral.yaml +++ b/config/examples/ephemeral-robin-debug/redis_v1_redkeycluster-ephemeral.yaml @@ -99,12 +99,12 @@ spec: # port: 9090 # targetPort: 9090 # protocol: TCP - # statefulSet: - # metadata: - # annotations: - # traffic.inditex.dev/weight: "10" - # labels: - # inditex.dev/test: "test" + # statefulSet: + # metadata: + # annotations: + # traffic.inditex.dev/weight: "10" + # labels: + # inditex.dev/test: "test" # spec: # template: # metadata: From 77417d037054a54afcc7ad7b55eba804beefdd39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alberto=20Mart=C3=ADnez=20P=C3=A9rez?= Date: Tue, 27 Jan 2026 17:26:52 +0100 Subject: [PATCH 2/7] Fix logging message --- controllers/redis_manager.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/controllers/redis_manager.go b/controllers/redis_manager.go index 13ca72d..06e4fee 100644 --- a/controllers/redis_manager.go +++ b/controllers/redis_manager.go @@ -654,7 +654,7 @@ func (r *RedkeyClusterReconciler) doSlowUpgradeScalingDown(ctx context.Context, return err } if !check { - r.logInfo(redkeyCluster.NamespacedName(), "Waiting for cluster readiness before ending the fast upgrade", "errors", errors, "warnings", warnings) + r.logInfo(redkeyCluster.NamespacedName(), "Waiting for cluster readiness before ending the slow upgrade", "errors", errors, "warnings", warnings) return nil // Cluster not ready --> keep waiting } From a44ceb220ce4cf9737841967f74fd8117385b974 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alberto=20Mart=C3=ADnez=20P=C3=A9rez?= Date: Tue, 27 Jan 2026 17:52:30 +0100 Subject: [PATCH 3/7] Reduce log verbosity when moving slots --- controllers/redis_manager.go | 6 ++++-- internal/robin/robin.go | 2 -- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/controllers/redis_manager.go b/controllers/redis_manager.go index 06e4fee..b61a7ae 100644 --- a/controllers/redis_manager.go +++ b/controllers/redis_manager.go @@ -395,9 +395,10 @@ func (r *RedkeyClusterReconciler) doSlowUpgradeUpgrading(ctx context.Context, re return err } if !completed { - r.logInfo(redkeyCluster.NamespacedName(), "Waiting to complete moving slots", "From node", currentPartition, "To node", currentPartition+1) + r.logInfo(redkeyCluster.NamespacedName(), "Moving slots still in progress", "From node", currentPartition, "To node", currentPartition+1) return nil // Move slots not completed --> keep waiting } + r.logInfo(redkeyCluster.NamespacedName(), "Moving slots completed", "From node", currentPartition, "To node", currentPartition+1) } // Stop Robin reconciliation @@ -568,9 +569,10 @@ func (r *RedkeyClusterReconciler) doSlowUpgradeEnd(ctx context.Context, redkeyCl return err } if !completed { - r.logInfo(redkeyCluster.NamespacedName(), "Waiting to complete moving slots", "From node", extraNodeIndex, "To node", 0) + r.logInfo(redkeyCluster.NamespacedName(), "Moving slots still in progress", "From node", extraNodeIndex, "To node", 0) return nil // Move slots not completed --> keep waiting } + r.logInfo(redkeyCluster.NamespacedName(), "Moving slots completed", "From node", extraNodeIndex, "To node", 0) // ScaleDown the cluster r.logInfo(redkeyCluster.NamespacedName(), "Scaling down the cluster to remove the extra node") diff --git a/internal/robin/robin.go b/internal/robin/robin.go index afa5004..887240d 100644 --- a/internal/robin/robin.go +++ b/internal/robin/robin.go @@ -347,10 +347,8 @@ func (r *Robin) MoveSlots(nodeIndexFrom int, nodeIndexTo int, numSlots int) (boo } if status.Status == "Completed" { - r.Logger.Info("Moving slots completed", "from", nodeIndexFrom, "to", nodeIndexTo, "slots", numSlots) return true, nil } else { - r.Logger.Info("Moving slots still in progress", "from", nodeIndexFrom, "to", nodeIndexTo, "slots", numSlots) return false, nil } } From c83a4fb26234dc38158085918c6f30b8cb62a210 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alberto=20Mart=C3=ADnez=20P=C3=A9rez?= Date: Tue, 27 Jan 2026 20:12:00 +0100 Subject: [PATCH 4/7] Refactoring to suit the Robin regression fix. Substatuses renaming --- api/v1/redkeycluster_types.go | 4 +- controllers/redis_manager.go | 83 ++++++++++++++++------------------- docs/redkey-cluster-status.md | 31 +++++-------- 3 files changed, 53 insertions(+), 65 deletions(-) diff --git a/api/v1/redkeycluster_types.go b/api/v1/redkeycluster_types.go index 27d5381..3890bd1 100644 --- a/api/v1/redkeycluster_types.go +++ b/api/v1/redkeycluster_types.go @@ -71,11 +71,11 @@ const ( SubstatusFastUpgrading = "FastUpgrading" SubstatusEndingFastUpgrading = "EndingFastUpgrading" - SubstatusSlowUpgrading = "SlowUpgrading" + SubstatusEmptyingNode = "EmptyingNode" SubstatusUpgradingScalingUp = "ScalingUp" SubstatusUpgradingScalingDown = "ScalingDown" SubstatusEndingSlowUpgrading = "EndingSlowUpgrading" - SubstatusRollingConfig = "RollingConfig" + SubstatusRollingUpdate = "RollingUpdate" SubstatusFastScaling = "FastScaling" SubstatusEndingFastScaling = "EndingFastScaling" diff --git a/controllers/redis_manager.go b/controllers/redis_manager.go index b61a7ae..6aa85c1 100644 --- a/controllers/redis_manager.go +++ b/controllers/redis_manager.go @@ -231,13 +231,13 @@ func (r *RedkeyClusterReconciler) doSlowUpgrade(ctx context.Context, redkeyClust switch redkeyCluster.Status.Substatus.Status { case redkeyv1.SubstatusUpgradingScalingUp: err = r.doSlowUpgradeScalingUp(ctx, redkeyCluster, existingStatefulSet) - case redkeyv1.SubstatusSlowUpgrading: - err = r.doSlowUpgradeUpgrading(ctx, redkeyCluster, existingStatefulSet) + case redkeyv1.SubstatusEmptyingNode: + err = r.doSlowUpgradeEmptyNode(ctx, redkeyCluster, existingStatefulSet) case redkeyv1.SubstatusEndingSlowUpgrading: err = r.doSlowUpgradeEnd(ctx, redkeyCluster, existingStatefulSet) case redkeyv1.SubstatusUpgradingScalingDown: err = r.doSlowUpgradeScalingDown(ctx, redkeyCluster, existingStatefulSet) - case redkeyv1.SubstatusRollingConfig: + case redkeyv1.SubstatusRollingUpdate: err = r.doSlowUpgradeRollingUpdate(ctx, redkeyCluster, existingStatefulSet) default: err = r.doSlowUpgradeStart(ctx, redkeyCluster, existingStatefulSet) @@ -308,7 +308,7 @@ func (r *RedkeyClusterReconciler) doSlowUpgradeScalingUp(ctx context.Context, re } // Update substatus. - err = r.updateClusterSubStatus(ctx, redkeyCluster, redkeyv1.SubstatusSlowUpgrading, "") + err = r.updateClusterSubStatus(ctx, redkeyCluster, redkeyv1.SubstatusEmptyingNode, "") if err != nil { r.logError(redkeyCluster.NamespacedName(), err, "Error updating substatus") return err @@ -317,7 +317,7 @@ func (r *RedkeyClusterReconciler) doSlowUpgradeScalingUp(ctx context.Context, re return nil } -func (r *RedkeyClusterReconciler) doSlowUpgradeUpgrading(ctx context.Context, redkeyCluster *redkeyv1.RedkeyCluster, existingStatefulSet *v1.StatefulSet) error { +func (r *RedkeyClusterReconciler) doSlowUpgradeEmptyNode(ctx context.Context, redkeyCluster *redkeyv1.RedkeyCluster, existingStatefulSet *v1.StatefulSet) error { // Check Redis node pods rediness. nodePodsReady, err := r.allPodsReady(ctx, redkeyCluster, existingStatefulSet) @@ -365,7 +365,7 @@ func (r *RedkeyClusterReconciler) doSlowUpgradeUpgrading(ctx context.Context, re var currentPartition int if redkeyCluster.Status.Substatus.UpgradingPartition == "" { currentPartition = int(*(existingStatefulSet.Spec.Replicas)) - 1 - err = r.updateClusterSubStatus(ctx, redkeyCluster, redkeyv1.SubstatusSlowUpgrading, strconv.Itoa(currentPartition)) + err = r.updateClusterSubStatus(ctx, redkeyCluster, redkeyv1.SubstatusEmptyingNode, strconv.Itoa(currentPartition)) if err != nil { r.logError(redkeyCluster.NamespacedName(), err, "Error updating substatus") return err @@ -421,7 +421,7 @@ func (r *RedkeyClusterReconciler) doSlowUpgradeUpgrading(ctx context.Context, re return err } - err = r.updateClusterSubStatus(ctx, redkeyCluster, redkeyv1.SubstatusRollingConfig, strconv.Itoa(currentPartition)) + err = r.updateClusterSubStatus(ctx, redkeyCluster, redkeyv1.SubstatusRollingUpdate, strconv.Itoa(currentPartition)) if err != nil { r.logError(redkeyCluster.NamespacedName(), err, "Error updating substatus") return err @@ -493,14 +493,14 @@ func (r *RedkeyClusterReconciler) doSlowUpgradeRollingUpdate(ctx context.Context // If first partition reached, we can move to the next step. // Else step over to the next partition. if currentPartition == 0 { - err = r.updateClusterSubStatus(ctx, redkeyCluster, redkeyv1.SubstatusEndingSlowUpgrading, strconv.Itoa(currentPartition)) + err = r.updateClusterSubStatus(ctx, redkeyCluster, redkeyv1.SubstatusEndingSlowUpgrading, "") if err != nil { r.logError(redkeyCluster.NamespacedName(), err, "Error updating substatus") return err } } else { nextPartition := currentPartition - 1 - err = r.updateClusterSubStatus(ctx, redkeyCluster, redkeyv1.SubstatusSlowUpgrading, strconv.Itoa(nextPartition)) + err = r.updateClusterSubStatus(ctx, redkeyCluster, redkeyv1.SubstatusEmptyingNode, strconv.Itoa(nextPartition)) if err != nil { r.logError(redkeyCluster.NamespacedName(), err, "Error updating substatus") return err @@ -574,23 +574,7 @@ func (r *RedkeyClusterReconciler) doSlowUpgradeEnd(ctx context.Context, redkeyCl } r.logInfo(redkeyCluster.NamespacedName(), "Moving slots completed", "From node", extraNodeIndex, "To node", 0) - // ScaleDown the cluster - r.logInfo(redkeyCluster.NamespacedName(), "Scaling down the cluster to remove the extra node") - *existingStatefulSet.Spec.Replicas = *existingStatefulSet.Spec.Replicas - 1 - _, err = r.updateStatefulSet(ctx, existingStatefulSet, redkeyCluster) - if err != nil { - r.logError(redkeyCluster.NamespacedName(), err, "Failed to update StatefulSet replicas") - return err - } - - // Reset node - err = redkeyRobin.ClusterResetNode(extraNodeIndex) - if err != nil { - r.logError(redkeyCluster.NamespacedName(), err, "Error from Robin forgeting the node", "node index", extraNodeIndex) - return err - } - - err = r.updateClusterSubStatus(ctx, redkeyCluster, redkeyv1.SubstatusUpgradingScalingDown, "0") + err = r.updateClusterSubStatus(ctx, redkeyCluster, redkeyv1.SubstatusUpgradingScalingDown, "") if err != nil { r.logError(redkeyCluster.NamespacedName(), err, "Error updating substatus") return err @@ -601,26 +585,26 @@ func (r *RedkeyClusterReconciler) doSlowUpgradeEnd(ctx context.Context, redkeyCl func (r *RedkeyClusterReconciler) doSlowUpgradeScalingDown(ctx context.Context, redkeyCluster *redkeyv1.RedkeyCluster, existingStatefulSet *v1.StatefulSet) error { - // Check Redis node pods rediness. - nodePodsReady, err := r.allPodsReady(ctx, redkeyCluster, existingStatefulSet) + logger := r.getHelperLogger(redkeyCluster.NamespacedName()) + redkeyRobin, err := robin.NewRobin(ctx, r.Client, redkeyCluster, logger) if err != nil { - r.logError(redkeyCluster.NamespacedName(), err, "Could not check for Redis node pods being ready") + r.logError(redkeyCluster.NamespacedName(), err, "Error getting Robin") return err } - if !nodePodsReady { - r.logInfo(redkeyCluster.NamespacedName(), "Waiting for Redis node pods to become ready") - return nil // Not all pods ready -> keep waiting - } - r.logInfo(redkeyCluster.NamespacedName(), "Redis node pods are ready", "pods", existingStatefulSet.Spec.Replicas) - logger := r.getHelperLogger(redkeyCluster.NamespacedName()) - redkeyRobin, err := robin.NewRobin(ctx, r.Client, redkeyCluster, logger) + // Check cluster status to know if Robin has already scaled down the cluster. + clusterStatus, err := redkeyRobin.GetClusterStatus() if err != nil { - r.logError(redkeyCluster.NamespacedName(), err, "Error getting Robin") + r.logError(redkeyCluster.NamespacedName(), err, "Error getting cluster status from Robin") return err } + if clusterStatus != redkeyv1.RobinStatusReady { + r.logInfo(redkeyCluster.NamespacedName(), "Waiting for cluster to be Ready in Robin", "currentStatus", clusterStatus) + return nil // Cluster not ready --> keep waiting + } - // Set the number of primaries/replicasPerPrimary to Robin to have the new node met to the existing nodes. + // Set the number of primaries/replicasPerPrimary to Robin to start scaling down the cluster. + r.logInfo(redkeyCluster.NamespacedName(), "Scaling down the cluster to remove the extra node") primaries, replicasPerPrimary, err := redkeyRobin.GetReplicas() if err != nil { r.logError(redkeyCluster.NamespacedName(), err, "Error getting primaries/replicasPerPrimary from Robin") @@ -638,15 +622,26 @@ func (r *RedkeyClusterReconciler) doSlowUpgradeScalingDown(ctx context.Context, } } - // Check cluster status to know if Robin has already scaled down the cluster. - clusterStatus, err := redkeyRobin.GetClusterStatus() + // ScaleDown the StatefulSet + if *existingStatefulSet.Spec.Replicas > redkeyCluster.Spec.Primaries { + r.logInfo(redkeyCluster.NamespacedName(), "Updating StatefulSet to remove the extra pod") + *existingStatefulSet.Spec.Replicas = *existingStatefulSet.Spec.Replicas - 1 + _, err = r.updateStatefulSet(ctx, existingStatefulSet, redkeyCluster) + if err != nil { + r.logError(redkeyCluster.NamespacedName(), err, "Failed to update StatefulSet replicas") + return err + } + } + + // Check all cluster nodes are ready from Robin. + clusterNodes, err := redkeyRobin.GetClusterNodes() if err != nil { - r.logError(redkeyCluster.NamespacedName(), err, "Error getting cluster status from Robin") + r.logError(redkeyCluster.NamespacedName(), err, "Error getting cluster nodes from Robin") return err } - if clusterStatus != redkeyv1.RobinStatusReady { - r.logInfo(redkeyCluster.NamespacedName(), "Waiting for cluster to be Ready in Robin", "currentStatus", clusterStatus) - return nil // Cluster not ready --> keep waiting + if len(clusterNodes.Nodes) != int(*existingStatefulSet.Spec.Replicas) { + r.logInfo(redkeyCluster.NamespacedName(), "Cluster not yet scaled from Robin") + return nil // Not all nodes ready --> Keep waiting } // Check all cluster nodes are ready from Robin. diff --git a/docs/redkey-cluster-status.md b/docs/redkey-cluster-status.md index ce350a6..f094065 100644 --- a/docs/redkey-cluster-status.md +++ b/docs/redkey-cluster-status.md @@ -167,12 +167,12 @@ redis-cluster-ephemeral 3 0 true true redis:8 These SubStatus have been defined: * **ScalingUp**: The Operator add one pod that is added as a Primary node to the Redkey Cluster. This node will be used to move slots from the existing nodes before recreating them. -* **SlowUpgrading**: A node is being empty, moving all its slots (and keys) to another node. -* **RollingConfig**: The empty node is being recreated, recreating its pod and asking to Robin to refresh the cluster. +* **EmptyingNode**: A node is being empty, moving all its slots (and keys) to another node. +* **RollingUpdate**: The empty node is being recreated, recreating its pod and asking to Robin to refresh the cluster. * **EndingSlowUpgrading**: Move the slots (and keys) from the extra node. * **ScalingDown**: The extra node added to the cluster is removed. -When Slow upgrading a Redkey Cluster the upgrade is executed from partition to partition, aaplying the **SlowUpgrading** and **RollingConfig** SubStatus to each partition. +When Slow upgrading a Redkey Cluster the upgrade is executed from partition to partition, aaplying the **EmptyingNode** and **RollingUpdate** SubStatus to each partition. Current partition can be shown using `kubectl get rkcl -o wide`. @@ -186,22 +186,15 @@ redis-cluster-ephemeral 3 0 true false redis:8 redis-cluster-ephemeral 3 0 true false redis:8-bookworm Ready redis-cluster-ephemeral 3 0 true false redis:8-bookworm Upgrading redis-cluster-ephemeral 3 0 true false redis:8-bookworm Upgrading ScalingUp -redis-cluster-ephemeral 3 0 true false redis:8-bookworm Upgrading SlowUpgrading -redis-cluster-ephemeral 3 0 true false redis:8-bookworm Upgrading SlowUpgrading -redis-cluster-ephemeral 3 0 true false redis:8-bookworm Upgrading SlowUpgrading 3 -redis-cluster-ephemeral 3 0 true false redis:8-bookworm Upgrading RollingConfig 3 -redis-cluster-ephemeral 3 0 true false redis:8-bookworm Upgrading SlowUpgrading 2 -redis-cluster-ephemeral 3 0 true false redis:8-bookworm Upgrading SlowUpgrading 2 -redis-cluster-ephemeral 3 0 true false redis:8-bookworm Upgrading SlowUpgrading 2 -redis-cluster-ephemeral 3 0 true false redis:8-bookworm Upgrading RollingConfig 2 -redis-cluster-ephemeral 3 0 true false redis:8-bookworm Upgrading RollingConfig 2 -redis-cluster-ephemeral 3 0 true false redis:8-bookworm Upgrading SlowUpgrading 1 -redis-cluster-ephemeral 3 0 true false redis:8-bookworm Upgrading SlowUpgrading 1 -redis-cluster-ephemeral 3 0 true false redis:8-bookworm Upgrading RollingConfig 1 -redis-cluster-ephemeral 3 0 true false redis:8-bookworm Upgrading RollingConfig 1 -redis-cluster-ephemeral 3 0 true false redis:8-bookworm Upgrading SlowUpgrading 0 -redis-cluster-ephemeral 3 0 true false redis:8-bookworm Upgrading RollingConfig 0 -redis-cluster-ephemeral 3 0 true false redis:8-bookworm Upgrading EndingSlowUpgrading 0 +redis-cluster-ephemeral 3 0 true false redis:8-bookworm Upgrading EmptyingNode +redis-cluster-ephemeral 3 0 true false redis:8-bookworm Upgrading EmptyingNode 3 +redis-cluster-ephemeral 3 0 true false redis:8-bookworm Upgrading RollingUpdate 3 +redis-cluster-ephemeral 3 0 true false redis:8-bookworm Upgrading EmptyingNode 2 +redis-cluster-ephemeral 3 0 true false redis:8-bookworm Upgrading RollingUpdate 2 +redis-cluster-ephemeral 3 0 true false redis:8-bookworm Upgrading EmptyingNode 1 +redis-cluster-ephemeral 3 0 true false redis:8-bookworm Upgrading RollingUpdate 1 +redis-cluster-ephemeral 3 0 true false redis:8-bookworm Upgrading EmptyingNode 0 +redis-cluster-ephemeral 3 0 true false redis:8-bookworm Upgrading RollingUpdate 0 redis-cluster-ephemeral 3 0 true false redis:8-bookworm Upgrading EndingSlowUpgrading 0 redis-cluster-ephemeral 3 0 true false redis:8-bookworm Upgrading ScalingDown 0 redis-cluster-ephemeral 3 0 true false redis:8-bookworm Ready From 7d19f0c8701552c7134dc164daccdc29b99ad64b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alberto=20Mart=C3=ADnez=20P=C3=A9rez?= Date: Wed, 28 Jan 2026 16:09:14 +0100 Subject: [PATCH 5/7] Removing unnecessary checks when slow upgrading --- controllers/redis_manager.go | 77 +++--------------------------------- 1 file changed, 6 insertions(+), 71 deletions(-) diff --git a/controllers/redis_manager.go b/controllers/redis_manager.go index 6aa85c1..293fff2 100644 --- a/controllers/redis_manager.go +++ b/controllers/redis_manager.go @@ -255,7 +255,7 @@ func (r *RedkeyClusterReconciler) doSlowUpgradeScalingUp(ctx context.Context, re return err } if !nodePodsReady { - r.logInfo(redkeyCluster.NamespacedName(), "Waiting for Redis node pods to become ready") + r.logInfo(redkeyCluster.NamespacedName(), "Waiting for Redis node pods to become ready", "primariesRequired", existingStatefulSet.Spec.Replicas, "primariesReady", existingStatefulSet.Status.ReadyReplicas) return nil // Not all pods ready -> keep waiting } r.logInfo(redkeyCluster.NamespacedName(), "Redis node pods are ready", "pods", existingStatefulSet.Spec.Replicas) @@ -274,6 +274,7 @@ func (r *RedkeyClusterReconciler) doSlowUpgradeScalingUp(ctx context.Context, re return err } if primaries != int(redkeyCluster.Spec.Primaries+1) || replicasPerPrimary != int(redkeyCluster.Spec.ReplicasPerPrimary) { + r.logInfo(redkeyCluster.NamespacedName(), "Updating Robin primaries/replicasPerPrimary", "primaries", redkeyCluster.Spec.Primaries+1, "replicasPerPrimary", redkeyCluster.Spec.ReplicasPerPrimary) err = robin.PersistRobinReplicas(ctx, r.Client, redkeyCluster, int(redkeyCluster.Spec.Primaries)+1, int(redkeyCluster.Spec.ReplicasPerPrimary)) if err != nil { r.logError(redkeyCluster.NamespacedName(), err, "Error persisting Robin primaries/replicasPerPrimary") @@ -296,17 +297,6 @@ func (r *RedkeyClusterReconciler) doSlowUpgradeScalingUp(ctx context.Context, re return nil // Cluster not ready --> keep waiting } - // Check all cluster nodes are ready from Robin. - clusterNodes, err := redkeyRobin.GetClusterNodes() - if err != nil { - r.logError(redkeyCluster.NamespacedName(), err, "Error getting cluster nodes from Robin") - return err - } - if len(clusterNodes.Nodes) != int(*existingStatefulSet.Spec.Replicas) { - r.logInfo(redkeyCluster.NamespacedName(), "Not all cluster nodes are yet ready from Robin") - return nil // Not all nodes ready --> Keep waiting - } - // Update substatus. err = r.updateClusterSubStatus(ctx, redkeyCluster, redkeyv1.SubstatusEmptyingNode, "") if err != nil { @@ -319,18 +309,6 @@ func (r *RedkeyClusterReconciler) doSlowUpgradeScalingUp(ctx context.Context, re func (r *RedkeyClusterReconciler) doSlowUpgradeEmptyNode(ctx context.Context, redkeyCluster *redkeyv1.RedkeyCluster, existingStatefulSet *v1.StatefulSet) error { - // Check Redis node pods rediness. - nodePodsReady, err := r.allPodsReady(ctx, redkeyCluster, existingStatefulSet) - if err != nil { - r.logError(redkeyCluster.NamespacedName(), err, "Could not check for Redis node pods being ready") - return err - } - if !nodePodsReady { - r.logInfo(redkeyCluster.NamespacedName(), "Waiting for Redis node pods to become ready") - return nil // Not all pods ready -> keep waiting - } - r.logInfo(redkeyCluster.NamespacedName(), "Redis node pods are ready", "pods", existingStatefulSet.Spec.Replicas) - // Get Robin. logger := r.getHelperLogger(redkeyCluster.NamespacedName()) redkeyRobin, err := robin.NewRobin(ctx, r.Client, redkeyCluster, logger) @@ -350,17 +328,6 @@ func (r *RedkeyClusterReconciler) doSlowUpgradeEmptyNode(ctx context.Context, re return nil // Cluster not ready --> keep waiting } - // Check all cluster nodes are ready from Robin. - clusterNodes, err := redkeyRobin.GetClusterNodes() - if err != nil { - r.logError(redkeyCluster.NamespacedName(), err, "Error getting cluster nodes from Robin") - return err - } - if len(clusterNodes.Nodes) != int(*existingStatefulSet.Spec.Replicas) { - r.logInfo(redkeyCluster.NamespacedName(), "Not all cluster nodes are yet ready from Robin") - return nil // Not all nodes ready --> Keep waiting - } - // Get the current partition and update Upgrading Partition in RedkeyCluster Status if starting iterating over partitions. var currentPartition int if redkeyCluster.Status.Substatus.UpgradingPartition == "" { @@ -378,10 +345,11 @@ func (r *RedkeyClusterReconciler) doSlowUpgradeEmptyNode(ctx context.Context, re } } - // If first iteration over partitions: update configuration. + // If first iteration over partitions: update configuration. This partition is empty: no need to move slots. // Else: Move slots away from partition and rolling update (don't do over the extra node to optimize). if currentPartition == int(*(existingStatefulSet.Spec.Replicas))-1 { // Update configuration: changes in configuration, labels and overrides are persisted before upgrading + r.logInfo(redkeyCluster.NamespacedName(), "Last partition: updating configuration before rolling config") existingStatefulSet, err = r.upgradeClusterConfigurationUpdate(ctx, redkeyCluster) if err != nil { r.logError(redkeyCluster.NamespacedName(), err, "Error updating Cluster configuration") @@ -389,6 +357,7 @@ func (r *RedkeyClusterReconciler) doSlowUpgradeEmptyNode(ctx context.Context, re } } else { // Move slots from partition before rolling update. + r.logInfo(redkeyCluster.NamespacedName(), "Moving slots from partition before rolling config", "partition", currentPartition) completed, err := redkeyRobin.MoveSlots(currentPartition, currentPartition+1, 0) if err != nil { r.logError(redkeyCluster.NamespacedName(), err, "Error moving slots", "From node", currentPartition, "To node", currentPartition+1) @@ -479,17 +448,6 @@ func (r *RedkeyClusterReconciler) doSlowUpgradeRollingUpdate(ctx context.Context return nil // Cluster not ready --> keep waiting } - // Check all cluster nodes are ready from Robin. - clusterNodes, err := redkeyRobin.GetClusterNodes() - if err != nil { - r.logError(redkeyCluster.NamespacedName(), err, "Error getting cluster nodes from Robin") - return err - } - if len(clusterNodes.Nodes) != int(*existingStatefulSet.Spec.Replicas) { - r.logInfo(redkeyCluster.NamespacedName(), "Not all cluster nodes are yet ready from Robin") - return nil // Not all nodes ready --> Keep waiting - } - // If first partition reached, we can move to the next step. // Else step over to the next partition. if currentPartition == 0 { @@ -519,18 +477,6 @@ func (r *RedkeyClusterReconciler) doSlowUpgradeRollingUpdate(ctx context.Context func (r *RedkeyClusterReconciler) doSlowUpgradeEnd(ctx context.Context, redkeyCluster *redkeyv1.RedkeyCluster, existingStatefulSet *v1.StatefulSet) error { - // Check Redis node pods rediness (pod from last rolling update could be not ready yet). - nodePodsReady, err := r.allPodsReady(ctx, redkeyCluster, existingStatefulSet) - if err != nil { - r.logError(redkeyCluster.NamespacedName(), err, "Could not check for Redis node pods being ready") - return err - } - if !nodePodsReady { - r.logInfo(redkeyCluster.NamespacedName(), "Waiting for Redis node pods to become ready") - return nil // Not all pods ready -> keep waiting - } - r.logInfo(redkeyCluster.NamespacedName(), "Redis node pods are ready", "pods", existingStatefulSet.Spec.Replicas) - // Get Robin. logger := r.getHelperLogger(redkeyCluster.NamespacedName()) redkeyRobin, err := robin.NewRobin(ctx, r.Client, redkeyCluster, logger) @@ -550,17 +496,6 @@ func (r *RedkeyClusterReconciler) doSlowUpgradeEnd(ctx context.Context, redkeyCl return nil // Cluster not ready --> keep waiting } - // Check all cluster nodes are ready from Robin. - clusterNodes, err := redkeyRobin.GetClusterNodes() - if err != nil { - r.logError(redkeyCluster.NamespacedName(), err, "Error getting cluster nodes from Robin") - return err - } - if len(clusterNodes.Nodes) != int(*existingStatefulSet.Spec.Replicas) { - r.logInfo(redkeyCluster.NamespacedName(), "Not all cluster nodes are yet ready from Robin") - return nil // Not all nodes ready --> Keep waiting - } - // Move slots from extra node to node 0. extraNodeIndex := int(*(existingStatefulSet.Spec.Replicas)) - 1 completed, err := redkeyRobin.MoveSlots(extraNodeIndex, 0, 0) @@ -644,7 +579,7 @@ func (r *RedkeyClusterReconciler) doSlowUpgradeScalingDown(ctx context.Context, return nil // Not all nodes ready --> Keep waiting } - // Check all cluster nodes are ready from Robin. + // Check cluster status from Robin. check, errors, warnings, err := redkeyRobin.ClusterCheck() if err != nil { r.logError(redkeyCluster.NamespacedName(), err, "Error checking the cluster readiness over Robin") From 84af8b982f20a4f1f0e2489b0e3c16b0090ae890 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alberto=20Mart=C3=ADnez=20P=C3=A9rez?= Date: Thu, 29 Jan 2026 11:51:39 +0100 Subject: [PATCH 6/7] Renaming substatus --- api/v1/redkeycluster_types.go | 2 +- controllers/redis_manager.go | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/api/v1/redkeycluster_types.go b/api/v1/redkeycluster_types.go index 3890bd1..89d42fd 100644 --- a/api/v1/redkeycluster_types.go +++ b/api/v1/redkeycluster_types.go @@ -71,7 +71,7 @@ const ( SubstatusFastUpgrading = "FastUpgrading" SubstatusEndingFastUpgrading = "EndingFastUpgrading" - SubstatusEmptyingNode = "EmptyingNode" + SubstatusResharding = "Resharding" SubstatusUpgradingScalingUp = "ScalingUp" SubstatusUpgradingScalingDown = "ScalingDown" SubstatusEndingSlowUpgrading = "EndingSlowUpgrading" diff --git a/controllers/redis_manager.go b/controllers/redis_manager.go index 293fff2..8eddf94 100644 --- a/controllers/redis_manager.go +++ b/controllers/redis_manager.go @@ -231,8 +231,8 @@ func (r *RedkeyClusterReconciler) doSlowUpgrade(ctx context.Context, redkeyClust switch redkeyCluster.Status.Substatus.Status { case redkeyv1.SubstatusUpgradingScalingUp: err = r.doSlowUpgradeScalingUp(ctx, redkeyCluster, existingStatefulSet) - case redkeyv1.SubstatusEmptyingNode: - err = r.doSlowUpgradeEmptyNode(ctx, redkeyCluster, existingStatefulSet) + case redkeyv1.SubstatusResharding: + err = r.doSlowUpgradeResharding(ctx, redkeyCluster, existingStatefulSet) case redkeyv1.SubstatusEndingSlowUpgrading: err = r.doSlowUpgradeEnd(ctx, redkeyCluster, existingStatefulSet) case redkeyv1.SubstatusUpgradingScalingDown: @@ -298,7 +298,7 @@ func (r *RedkeyClusterReconciler) doSlowUpgradeScalingUp(ctx context.Context, re } // Update substatus. - err = r.updateClusterSubStatus(ctx, redkeyCluster, redkeyv1.SubstatusEmptyingNode, "") + err = r.updateClusterSubStatus(ctx, redkeyCluster, redkeyv1.SubstatusResharding, "") if err != nil { r.logError(redkeyCluster.NamespacedName(), err, "Error updating substatus") return err @@ -307,7 +307,7 @@ func (r *RedkeyClusterReconciler) doSlowUpgradeScalingUp(ctx context.Context, re return nil } -func (r *RedkeyClusterReconciler) doSlowUpgradeEmptyNode(ctx context.Context, redkeyCluster *redkeyv1.RedkeyCluster, existingStatefulSet *v1.StatefulSet) error { +func (r *RedkeyClusterReconciler) doSlowUpgradeResharding(ctx context.Context, redkeyCluster *redkeyv1.RedkeyCluster, existingStatefulSet *v1.StatefulSet) error { // Get Robin. logger := r.getHelperLogger(redkeyCluster.NamespacedName()) @@ -332,7 +332,7 @@ func (r *RedkeyClusterReconciler) doSlowUpgradeEmptyNode(ctx context.Context, re var currentPartition int if redkeyCluster.Status.Substatus.UpgradingPartition == "" { currentPartition = int(*(existingStatefulSet.Spec.Replicas)) - 1 - err = r.updateClusterSubStatus(ctx, redkeyCluster, redkeyv1.SubstatusEmptyingNode, strconv.Itoa(currentPartition)) + err = r.updateClusterSubStatus(ctx, redkeyCluster, redkeyv1.SubstatusResharding, strconv.Itoa(currentPartition)) if err != nil { r.logError(redkeyCluster.NamespacedName(), err, "Error updating substatus") return err @@ -458,7 +458,7 @@ func (r *RedkeyClusterReconciler) doSlowUpgradeRollingUpdate(ctx context.Context } } else { nextPartition := currentPartition - 1 - err = r.updateClusterSubStatus(ctx, redkeyCluster, redkeyv1.SubstatusEmptyingNode, strconv.Itoa(nextPartition)) + err = r.updateClusterSubStatus(ctx, redkeyCluster, redkeyv1.SubstatusResharding, strconv.Itoa(nextPartition)) if err != nil { r.logError(redkeyCluster.NamespacedName(), err, "Error updating substatus") return err From 755e56ecd89a9430d5ffd1c78deb044560e5d824 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alberto=20Mart=C3=ADnez=20P=C3=A9rez?= Date: Thu, 29 Jan 2026 12:54:56 +0100 Subject: [PATCH 7/7] Update docs to suit substatus renaming --- docs/redkey-cluster-status.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/redkey-cluster-status.md b/docs/redkey-cluster-status.md index f094065..d39c8da 100644 --- a/docs/redkey-cluster-status.md +++ b/docs/redkey-cluster-status.md @@ -167,12 +167,12 @@ redis-cluster-ephemeral 3 0 true true redis:8 These SubStatus have been defined: * **ScalingUp**: The Operator add one pod that is added as a Primary node to the Redkey Cluster. This node will be used to move slots from the existing nodes before recreating them. -* **EmptyingNode**: A node is being empty, moving all its slots (and keys) to another node. -* **RollingUpdate**: The empty node is being recreated, recreating its pod and asking to Robin to refresh the cluster. +* **Resharding**: A node is being resharded, moving all its slots (and keys) to another node. +* **RollingUpdate**: The resharded node is being recreated, recreating its pod and asking to Robin to refresh the cluster. * **EndingSlowUpgrading**: Move the slots (and keys) from the extra node. * **ScalingDown**: The extra node added to the cluster is removed. -When Slow upgrading a Redkey Cluster the upgrade is executed from partition to partition, aaplying the **EmptyingNode** and **RollingUpdate** SubStatus to each partition. +When Slow upgrading a Redkey Cluster the upgrade is executed from partition to partition, applying the **Resharding** and **RollingUpdate** SubStatus to each partition. Current partition can be shown using `kubectl get rkcl -o wide`. @@ -186,14 +186,14 @@ redis-cluster-ephemeral 3 0 true false redis:8 redis-cluster-ephemeral 3 0 true false redis:8-bookworm Ready redis-cluster-ephemeral 3 0 true false redis:8-bookworm Upgrading redis-cluster-ephemeral 3 0 true false redis:8-bookworm Upgrading ScalingUp -redis-cluster-ephemeral 3 0 true false redis:8-bookworm Upgrading EmptyingNode -redis-cluster-ephemeral 3 0 true false redis:8-bookworm Upgrading EmptyingNode 3 +redis-cluster-ephemeral 3 0 true false redis:8-bookworm Upgrading Resharding +redis-cluster-ephemeral 3 0 true false redis:8-bookworm Upgrading Resharding 3 redis-cluster-ephemeral 3 0 true false redis:8-bookworm Upgrading RollingUpdate 3 -redis-cluster-ephemeral 3 0 true false redis:8-bookworm Upgrading EmptyingNode 2 +redis-cluster-ephemeral 3 0 true false redis:8-bookworm Upgrading Resharding 2 redis-cluster-ephemeral 3 0 true false redis:8-bookworm Upgrading RollingUpdate 2 -redis-cluster-ephemeral 3 0 true false redis:8-bookworm Upgrading EmptyingNode 1 +redis-cluster-ephemeral 3 0 true false redis:8-bookworm Upgrading Resharding 1 redis-cluster-ephemeral 3 0 true false redis:8-bookworm Upgrading RollingUpdate 1 -redis-cluster-ephemeral 3 0 true false redis:8-bookworm Upgrading EmptyingNode 0 +redis-cluster-ephemeral 3 0 true false redis:8-bookworm Upgrading Resharding 0 redis-cluster-ephemeral 3 0 true false redis:8-bookworm Upgrading RollingUpdate 0 redis-cluster-ephemeral 3 0 true false redis:8-bookworm Upgrading EndingSlowUpgrading 0 redis-cluster-ephemeral 3 0 true false redis:8-bookworm Upgrading ScalingDown 0