From 721d2a40fefd85cc03c843dcdad7141bf8c48bf9 Mon Sep 17 00:00:00 2001 From: Yossi Segev <40713576+yossisegev@users.noreply.github.com> Date: Wed, 19 Mar 2025 18:34:15 +0200 Subject: [PATCH 1/2] fix: Avoid race when removing interfaces via NNCP [4.17] Removing an interface that was created using an NNCP, is done by editing the same NNCP. This sometimes resulted in a race, in which the NNCP success status actually presented the prvious status, leading to deleting the NNCP before the configuration was completed, leaving hanging interfaces in the cluster nodes, with node native interfaces occupied as the ports of these tests-created interfaces. A recent PR made this failed flow to always occur. This PR aims to assure that the timestamp of the AVAIALBLE status is updated for the recent change (the interface removal) and not the previous change (setup or modification). --- .../node_network_configuration_policy.py | 38 ++++++++++++++++++- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/ocp_resources/node_network_configuration_policy.py b/ocp_resources/node_network_configuration_policy.py index 298fc08256..b7f771b8d1 100644 --- a/ocp_resources/node_network_configuration_policy.py +++ b/ocp_resources/node_network_configuration_policy.py @@ -1,15 +1,16 @@ import re +from datetime import datetime from kubernetes.dynamic.exceptions import ConflictError -from ocp_resources.constants import TIMEOUT_4MINUTES +from ocp_resources.constants import TIMEOUT_1MINUTE, TIMEOUT_4MINUTES, TIMEOUT_5SEC from ocp_resources.node import Node from ocp_resources.node_network_configuration_enactment import ( NodeNetworkConfigurationEnactment, ) from ocp_resources.node_network_state import NodeNetworkState from ocp_resources.resource import Resource, ResourceEditor -from timeout_sampler import TimeoutExpiredError, TimeoutSampler, TimeoutWatch +from timeout_sampler import TimeoutExpiredError, TimeoutSampler, TimeoutWatch, retry IPV4_STR = "ipv4" IPV6_STR = "ipv6" @@ -325,10 +326,43 @@ def _absent_interface(self): if self.ports: self.add_ports() + # The current time-stamp of the NNCP's available status will change after the NNCP is updated, therefore + # it must be fetched and stored before the update, and compared with the new time-stamp after. + initial_success_status_time = self._get_last_successful_transition_time() ResourceEditor( patches={self: {"spec": {"desiredState": {"interfaces": self.desired_state["interfaces"]}}}} ).update() + # If the NNCP failed on setup, then its tear-down AVAIALBLE status will necessarily be the first. + if initial_success_status_time: + self._wait_for_nncp_status_update(initial_transition_time=initial_success_status_time) + + def _get_last_successful_transition_time(self) -> str | None: + for condition in self.instance.status.conditions: + if ( + condition["type"] == self.Conditions.Type.AVAILABLE + and condition["status"] == Resource.Condition.Status.TRUE + and condition["reason"] == self.Conditions.Reason.SUCCESSFULLY_CONFIGURED + ): + return condition["lastTransitionTime"] + return None + + @retry( + wait_timeout=TIMEOUT_1MINUTE, + sleep=TIMEOUT_5SEC, + ) + def _wait_for_nncp_status_update(self, initial_transition_time: str) -> bool: + date_format = "%Y-%m-%dT%H:%M:%SZ" + formatted_initial_transition_time = datetime.strptime(initial_transition_time, date_format) + for condition in self.instance.get("status", {}).get("conditions", []): + if ( + condition["type"] == self.Conditions.Type.AVAILABLE + and condition["status"] == Resource.Condition.Status.TRUE + and datetime.strptime(condition["lastTransitionTime"], date_format) > formatted_initial_transition_time + ): + return True + return False + @property def status(self): for condition in self.instance.status.conditions: From e7436da574bbd57d4c6d39f2a6c7f709997a2bd5 Mon Sep 17 00:00:00 2001 From: Yossi Segev Date: Sun, 4 Jan 2026 14:08:45 +0200 Subject: [PATCH 2/2] Satisfy pre-commit and flake requirements --- .flake8 | 1 + pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.flake8 b/.flake8 index b18869d0ac..293153ac57 100644 --- a/.flake8 +++ b/.flake8 @@ -55,6 +55,7 @@ fcn_exclude_functions = Path, writelines, submit, + datetime, enable-extensions = FCN, diff --git a/pyproject.toml b/pyproject.toml index a17c3b4310..ae86d8dc1d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,7 +60,7 @@ requires = ["poetry-core>=1.0.0", "poetry-dynamic-versioning"] build-backend = "poetry_dynamic_versioning.backend" [tool.poetry.dependencies] -python = "^3.9" +python = ">=3.9, <3.14" xmltodict = "^0.13.0" colorlog = "^6.7.0" kubernetes = "30.*.*"