diff --git a/controllers/redis_manager.go b/controllers/redis_manager.go index 2c7eaf1..13ca72d 100644 --- a/controllers/redis_manager.go +++ b/controllers/redis_manager.go @@ -285,6 +285,17 @@ func (r *RedkeyClusterReconciler) doSlowUpgradeScalingUp(ctx context.Context, re } } + // Check cluster status to know if Robin has already met the new node. + clusterStatus, err := redkeyRobin.GetClusterStatus() + if err != nil { + r.logError(redkeyCluster.NamespacedName(), err, "Error getting cluster status from Robin") + return err + } + if clusterStatus != redkeyv1.RobinStatusReady { + r.logInfo(redkeyCluster.NamespacedName(), "Waiting for cluster to be Ready in Robin", "currentStatus", clusterStatus) + return nil // Cluster not ready --> keep waiting + } + // Check all cluster nodes are ready from Robin. clusterNodes, err := redkeyRobin.GetClusterNodes() if err != nil { @@ -296,17 +307,6 @@ func (r *RedkeyClusterReconciler) doSlowUpgradeScalingUp(ctx context.Context, re return nil // Not all nodes ready --> Keep waiting } - // Check cluster health. - check, errors, warnings, err := redkeyRobin.ClusterCheck() - if err != nil { - r.logError(redkeyCluster.NamespacedName(), err, "Error checking the cluster readiness over Robin") - return err - } - if !check { - r.logInfo(redkeyCluster.NamespacedName(), "Waiting for cluster readiness", "errors", errors, "warnings", warnings) - return nil // Cluster not ready --> keep waiting - } - // Update substatus. err = r.updateClusterSubStatus(ctx, redkeyCluster, redkeyv1.SubstatusSlowUpgrading, "") if err != nil { @@ -339,6 +339,17 @@ func (r *RedkeyClusterReconciler) doSlowUpgradeUpgrading(ctx context.Context, re return err } + // Get cluster status to know if the cluster is ready. + clusterStatus, err := redkeyRobin.GetClusterStatus() + if err != nil { + r.logError(redkeyCluster.NamespacedName(), err, "Error getting cluster status from Robin") + return err + } + if clusterStatus != redkeyv1.RobinStatusReady { + r.logInfo(redkeyCluster.NamespacedName(), "Waiting for cluster to be Ready in Robin", "currentStatus", clusterStatus) + return nil // Cluster not ready --> keep waiting + } + // Check all cluster nodes are ready from Robin. clusterNodes, err := redkeyRobin.GetClusterNodes() if err != nil { @@ -350,17 +361,6 @@ func (r *RedkeyClusterReconciler) doSlowUpgradeUpgrading(ctx context.Context, re return nil // Not all nodes ready --> Keep waiting } - // Check cluster health. - check, errors, warnings, err := redkeyRobin.ClusterCheck() - if err != nil { - r.logError(redkeyCluster.NamespacedName(), err, "Error checking the cluster readiness over Robin") - return err - } - if !check { - r.logInfo(redkeyCluster.NamespacedName(), "Waiting for cluster readiness", "errors", errors, "warnings", warnings) - return nil // Cluster not ready --> keep waiting - } - // Get the current partition and update Upgrading Partition in RedkeyCluster Status if starting iterating over partitions. var currentPartition int if redkeyCluster.Status.Substatus.UpgradingPartition == "" { @@ -467,6 +467,17 @@ func (r *RedkeyClusterReconciler) doSlowUpgradeRollingUpdate(ctx context.Context return err } + // Get cluster status to know if Robin has already resetted the node. + clusterStatus, err := redkeyRobin.GetClusterStatus() + if err != nil { + r.logError(redkeyCluster.NamespacedName(), err, "Error getting cluster status from Robin") + return err + } + if clusterStatus != redkeyv1.RobinStatusReady { + r.logInfo(redkeyCluster.NamespacedName(), "Waiting for cluster to be Ready in Robin", "currentStatus", clusterStatus) + return nil // Cluster not ready --> keep waiting + } + // Check all cluster nodes are ready from Robin. clusterNodes, err := redkeyRobin.GetClusterNodes() if err != nil { @@ -478,17 +489,6 @@ func (r *RedkeyClusterReconciler) doSlowUpgradeRollingUpdate(ctx context.Context return nil // Not all nodes ready --> Keep waiting } - // Check cluster health. - check, errors, warnings, err := redkeyRobin.ClusterCheck() - if err != nil { - r.logError(redkeyCluster.NamespacedName(), err, "Error checking the cluster readiness over Robin") - return err - } - if !check { - r.logInfo(redkeyCluster.NamespacedName(), "Waiting for cluster readiness", "errors", errors, "warnings", warnings) - return nil // Cluster not ready --> keep waiting - } - // If first partition reached, we can move to the next step. // Else step over to the next partition. if currentPartition == 0 { @@ -538,6 +538,17 @@ func (r *RedkeyClusterReconciler) doSlowUpgradeEnd(ctx context.Context, redkeyCl return err } + // Get cluster status to know if Robin is ready after the last rolling update. + clusterStatus, err := redkeyRobin.GetClusterStatus() + if err != nil { + r.logError(redkeyCluster.NamespacedName(), err, "Error getting cluster status from Robin") + return err + } + if clusterStatus != redkeyv1.RobinStatusReady { + r.logInfo(redkeyCluster.NamespacedName(), "Waiting for cluster to be Ready in Robin", "currentStatus", clusterStatus) + return nil // Cluster not ready --> keep waiting + } + // Check all cluster nodes are ready from Robin. clusterNodes, err := redkeyRobin.GetClusterNodes() if err != nil { @@ -549,17 +560,6 @@ func (r *RedkeyClusterReconciler) doSlowUpgradeEnd(ctx context.Context, redkeyCl return nil // Not all nodes ready --> Keep waiting } - // Check cluster health. - check, errors, warnings, err := redkeyRobin.ClusterCheck() - if err != nil { - r.logError(redkeyCluster.NamespacedName(), err, "Error checking the cluster readiness over Robin") - return err - } - if !check { - r.logInfo(redkeyCluster.NamespacedName(), "Waiting for cluster readiness", "errors", errors, "warnings", warnings) - return nil // Cluster not ready --> keep waiting - } - // Move slots from extra node to node 0. extraNodeIndex := int(*(existingStatefulSet.Spec.Replicas)) - 1 completed, err := redkeyRobin.MoveSlots(extraNodeIndex, 0, 0) @@ -636,6 +636,17 @@ func (r *RedkeyClusterReconciler) doSlowUpgradeScalingDown(ctx context.Context, } } + // Check cluster status to know if Robin has already scaled down the cluster. + clusterStatus, err := redkeyRobin.GetClusterStatus() + if err != nil { + r.logError(redkeyCluster.NamespacedName(), err, "Error getting cluster status from Robin") + return err + } + if clusterStatus != redkeyv1.RobinStatusReady { + r.logInfo(redkeyCluster.NamespacedName(), "Waiting for cluster to be Ready in Robin", "currentStatus", clusterStatus) + return nil // Cluster not ready --> keep waiting + } + // Check all cluster nodes are ready from Robin. check, errors, warnings, err := redkeyRobin.ClusterCheck() if err != nil { @@ -827,6 +838,15 @@ func (r *RedkeyClusterReconciler) doFastScaling(ctx context.Context, redkeyClust r.logError(redkeyCluster.NamespacedName(), err, "Error getting Robin to check its readiness") return true, err } + clusterStatus, err := robin.GetClusterStatus() + if err != nil { + r.logError(redkeyCluster.NamespacedName(), err, "Error getting cluster status from Robin") + return true, err + } + if clusterStatus != redkeyv1.RobinStatusReady { + r.logInfo(redkeyCluster.NamespacedName(), "Waiting for cluster to be Ready in Robin", "currentStatus", clusterStatus) + return true, nil + } check, errors, warnings, err := robin.ClusterCheck() if err != nil { r.logError(redkeyCluster.NamespacedName(), err, "Error checking the cluster readiness over Robin") @@ -1238,7 +1258,7 @@ func (r *RedkeyClusterReconciler) completeClusterScaleUp(ctx context.Context, re return true, err } if status != redkeyv1.RobinStatusReady { - r.logInfo(redkeyCluster.NamespacedName(), "Waiting for Robin to end scaling up...") + r.logInfo(redkeyCluster.NamespacedName(), "Waiting for Robin to end scaling up...", "robin cluster status", status) return true, nil // Cluster scaling not completed -> requeue } r.logInfo(redkeyCluster.NamespacedName(), "Robin reports cluster is ready after scaling up")