diff --git a/Makefile.kube_git.var b/Makefile.kube_git.var index 2c2111869c..b881305d2b 100644 --- a/Makefile.kube_git.var +++ b/Makefile.kube_git.var @@ -1,5 +1,5 @@ KUBE_GIT_MAJOR=1 KUBE_GIT_MINOR=35 -KUBE_GIT_VERSION=v1.35.4 -KUBE_GIT_COMMIT=6b9ddbe6e09aa225e949baecde4145208bf94c8e +KUBE_GIT_VERSION=v1.35.5 +KUBE_GIT_COMMIT=c5ea727d19837f8a22cda5c0ba8899960b7777ff KUBE_GIT_TREE_STATE=clean diff --git a/Makefile.version.aarch64.var b/Makefile.version.aarch64.var index 59910102c1..861b472c45 100644 --- a/Makefile.version.aarch64.var +++ b/Makefile.version.aarch64.var @@ -1 +1 @@ -OCP_VERSION := 4.22.0-0.nightly-arm64-2026-05-18-015115 +OCP_VERSION := 4.22.0-0.nightly-arm64-2026-05-18-195115 diff --git a/Makefile.version.x86_64.var b/Makefile.version.x86_64.var index 2bcde195c6..ffb17e6ba5 100644 --- a/Makefile.version.x86_64.var +++ b/Makefile.version.x86_64.var @@ -1 +1 @@ -OCP_VERSION := 4.22.0-0.nightly-2026-05-13-154759 +OCP_VERSION := 4.22.0-0.nightly-2026-05-18-130120 diff --git a/assets/components/multus/kustomization.x86_64.yaml b/assets/components/multus/kustomization.x86_64.yaml index a120349348..0f6ec7e882 100644 --- a/assets/components/multus/kustomization.x86_64.yaml +++ b/assets/components/multus/kustomization.x86_64.yaml @@ -2,7 +2,7 @@ images: - name: multus-cni-microshift newName: quay.io/openshift-release-dev/ocp-v4.0-art-dev - digest: sha256:05e3dd857008fb90e2e4ccdeb26dfafd39ddcca397283c6f8563bc13192da4cd + digest: sha256:748798f55aed433eef0e451283359084fa1e74adf11cd4d89ee1403774f8fdd3 - name: containernetworking-plugins-microshift newName: quay.io/openshift-release-dev/ocp-v4.0-art-dev - digest: sha256:e68c577994350df7d0ff57649f24daaf0e28c8e5b7afadfd5357fe90e8387e39 + digest: sha256:3a8acf3de0289a32423b00c259f436684cf06f83e7d933a31857129f507122de diff --git a/assets/components/multus/release-multus-aarch64.json b/assets/components/multus/release-multus-aarch64.json index daa022b111..25cda445ba 100644 --- a/assets/components/multus/release-multus-aarch64.json +++ b/assets/components/multus/release-multus-aarch64.json @@ -1,6 +1,6 @@ { "release": { - "base": "4.22.0-0.nightly-arm64-2026-05-18-015115" + "base": "4.22.0-0.nightly-arm64-2026-05-18-195115" }, "images": { "multus-cni-microshift": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:655a9d04510059e5a465b9c38875eccd27dd26cfb9ef8bcecd3c4966fe071798", diff --git a/assets/components/multus/release-multus-x86_64.json b/assets/components/multus/release-multus-x86_64.json index 7bc93adc8c..fd7b621d5b 100644 --- a/assets/components/multus/release-multus-x86_64.json +++ b/assets/components/multus/release-multus-x86_64.json @@ -1,9 +1,9 @@ { "release": { - "base": "4.22.0-0.nightly-2026-05-13-154759" + "base": "4.22.0-0.nightly-2026-05-18-130120" }, "images": { - "multus-cni-microshift": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:05e3dd857008fb90e2e4ccdeb26dfafd39ddcca397283c6f8563bc13192da4cd", - "containernetworking-plugins-microshift": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:e68c577994350df7d0ff57649f24daaf0e28c8e5b7afadfd5357fe90e8387e39" + "multus-cni-microshift": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:748798f55aed433eef0e451283359084fa1e74adf11cd4d89ee1403774f8fdd3", + "containernetworking-plugins-microshift": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:3a8acf3de0289a32423b00c259f436684cf06f83e7d933a31857129f507122de" } } diff --git a/assets/optional/operator-lifecycle-manager/kustomization.x86_64.yaml b/assets/optional/operator-lifecycle-manager/kustomization.x86_64.yaml index d5c0cd1a87..caa1ae4722 100644 --- a/assets/optional/operator-lifecycle-manager/kustomization.x86_64.yaml +++ b/assets/optional/operator-lifecycle-manager/kustomization.x86_64.yaml @@ -2,13 +2,13 @@ images: - name: quay.io/operator-framework/olm newName: quay.io/openshift-release-dev/ocp-v4.0-art-dev - digest: sha256:8f7ffbbc2881ab733a7380b809a7da79885d4ae4786b2d2e72a9184801caddf3 + digest: sha256:524b90fabbab585fab59f637700ab445ee08640aee67d714ae1fb2783af2cb3d - name: quay.io/operator-framework/configmap-operator-registry newName: quay.io/openshift-release-dev/ocp-v4.0-art-dev - digest: sha256:813b4852e9d4f61ea9557ed27e2fdff2d06edc735e569fcb7cb0417c0e68b7bc + digest: sha256:38163ba23e846b6710480ba348e415d47082b5cd4880926e024c21e30f903eb9 - name: quay.io/openshift/origin-kube-rbac-proxy newName: quay.io/openshift-release-dev/ocp-v4.0-art-dev - digest: sha256:28ce4fb536ad0b695620ffcd026ff3284b6aee36821ffce0e291159d7ae78804 + digest: sha256:76f1ea773c20eeb9d3e07f98f522854c573c4f285193d643e904fedf0f766614 patches: - patch: |- @@ -16,12 +16,12 @@ patches: path: /spec/template/spec/containers/0/env/- value: name: OPERATOR_REGISTRY_IMAGE - value: quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:813b4852e9d4f61ea9557ed27e2fdff2d06edc735e569fcb7cb0417c0e68b7bc + value: quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:38163ba23e846b6710480ba348e415d47082b5cd4880926e024c21e30f903eb9 - op: add path: /spec/template/spec/containers/0/env/- value: name: OLM_IMAGE - value: quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:8f7ffbbc2881ab733a7380b809a7da79885d4ae4786b2d2e72a9184801caddf3 + value: quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:524b90fabbab585fab59f637700ab445ee08640aee67d714ae1fb2783af2cb3d target: kind: Deployment labelSelector: app=catalog-operator diff --git a/assets/optional/operator-lifecycle-manager/release-olm-aarch64.json b/assets/optional/operator-lifecycle-manager/release-olm-aarch64.json index fe93ad3b0a..d1b1877095 100644 --- a/assets/optional/operator-lifecycle-manager/release-olm-aarch64.json +++ b/assets/optional/operator-lifecycle-manager/release-olm-aarch64.json @@ -1,6 +1,6 @@ { "release": { - "base": "4.22.0-0.nightly-arm64-2026-05-18-015115" + "base": "4.22.0-0.nightly-arm64-2026-05-18-195115" }, "images": { "operator-lifecycle-manager": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:197e653ef871c5df107f7899a64b0575bd33537d053f77ed062679c208d53835", diff --git a/assets/optional/operator-lifecycle-manager/release-olm-x86_64.json b/assets/optional/operator-lifecycle-manager/release-olm-x86_64.json index 5f6e8eb6eb..b58c781daf 100644 --- a/assets/optional/operator-lifecycle-manager/release-olm-x86_64.json +++ b/assets/optional/operator-lifecycle-manager/release-olm-x86_64.json @@ -1,10 +1,10 @@ { "release": { - "base": "4.22.0-0.nightly-2026-05-13-154759" + "base": "4.22.0-0.nightly-2026-05-18-130120" }, "images": { - "operator-lifecycle-manager": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:8f7ffbbc2881ab733a7380b809a7da79885d4ae4786b2d2e72a9184801caddf3", - "operator-registry": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:813b4852e9d4f61ea9557ed27e2fdff2d06edc735e569fcb7cb0417c0e68b7bc", - "kube-rbac-proxy": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:28ce4fb536ad0b695620ffcd026ff3284b6aee36821ffce0e291159d7ae78804" + "operator-lifecycle-manager": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:524b90fabbab585fab59f637700ab445ee08640aee67d714ae1fb2783af2cb3d", + "operator-registry": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:38163ba23e846b6710480ba348e415d47082b5cd4880926e024c21e30f903eb9", + "kube-rbac-proxy": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:76f1ea773c20eeb9d3e07f98f522854c573c4f285193d643e904fedf0f766614" } } diff --git a/assets/release/release-aarch64.json b/assets/release/release-aarch64.json index 04cebb7e9f..525065fadb 100644 --- a/assets/release/release-aarch64.json +++ b/assets/release/release-aarch64.json @@ -1,6 +1,6 @@ { "release": { - "base": "4.22.0-0.nightly-arm64-2026-05-18-015115" + "base": "4.22.0-0.nightly-arm64-2026-05-18-195115" }, "images": { "cli": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:88b77851bd769ebeecc9eaa52168e8f995002dcac011e16dfb2c18a44c1c92bc", diff --git a/assets/release/release-x86_64.json b/assets/release/release-x86_64.json index 2f5dd1666d..608d5315bd 100644 --- a/assets/release/release-x86_64.json +++ b/assets/release/release-x86_64.json @@ -1,16 +1,16 @@ { "release": { - "base": "4.22.0-0.nightly-2026-05-13-154759" + "base": "4.22.0-0.nightly-2026-05-18-130120" }, "images": { - "cli": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:8762deff566a93d3d6fa9869748f50e22bbadf0e8979898803cc290ef5a91cd0", - "coredns": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:25272a0944e00a27ce1d63e2cc1dfba29b896b89890fe191f394f67b6e13ae05", - "haproxy-router": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:2f11b7a4f6cc23e213a30566cab93ac01d347a49aa40825feab57ba5010a66f6", - "kube-rbac-proxy": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:28ce4fb536ad0b695620ffcd026ff3284b6aee36821ffce0e291159d7ae78804", - "ovn-kubernetes-microshift": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:f73f7295fcaf42ab336cd98782f802945dd1fac1771ca48f29bfaf7e1036ce46", - "pod": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:40cf94e00c8e84d6c613e38795d0742bffbec39264acfe450eefd7d26b75c784", - "service-ca-operator": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:51e4e2a749cfa068c41775652adbcb1ffad0a0264b6121e7590a9406cbc86574", + "cli": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:570e8cf68b924824188ac24a813c85b845d826d0b84989edfbceb14d78887608", + "coredns": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:a2f9930d88ba4077266d69229bb21c193386441bb7e3309eb2e6b1a75028c0af", + "haproxy-router": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:06458beb6ba34213c83f5c66fab748b647bbafc9930ce0e322d0e5a1da3867f0", + "kube-rbac-proxy": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:76f1ea773c20eeb9d3e07f98f522854c573c4f285193d643e904fedf0f766614", + "ovn-kubernetes-microshift": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:0bfafdd8708d19724c6ec44e84f572f507462e71ceb023433c6cf2265080b8a8", + "pod": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:f5958e314edca4fee07140d284af68c62fbbb4ae9379ee0504a1ea1018f04068", + "service-ca-operator": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:1b4f6ddffd62789e06d4d3e28501ed9b794d2a4eae139944eb6590a09839b861", "lvms_operator": "registry.redhat.io/lvms4/lvms-rhel9-operator@sha256:58804d8baf922927b66cec9424d431a3bdb341d207024ce40cc8f0123bac03ee", - "csi-snapshot-controller": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:02bdca6af3c8aa48ecfe1f24259cb24850e28b1614727e0e6abb7a47732d3237" + "csi-snapshot-controller": "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:88b5ed553911b7180f09027ce1ad5c4e5135bc969bad4c6720dc6ad967337e20" } } diff --git a/deps/github.com/openshift/kubernetes/CHANGELOG/CHANGELOG-1.35.md b/deps/github.com/openshift/kubernetes/CHANGELOG/CHANGELOG-1.35.md index 30e97b6315..6b38da9b9b 100644 --- a/deps/github.com/openshift/kubernetes/CHANGELOG/CHANGELOG-1.35.md +++ b/deps/github.com/openshift/kubernetes/CHANGELOG/CHANGELOG-1.35.md @@ -1,177 +1,293 @@ -- [v1.35.3](#v1353) - - [Downloads for v1.35.3](#downloads-for-v1353) +- [v1.35.4](#v1354) + - [Downloads for v1.35.4](#downloads-for-v1354) - [Source Code](#source-code) - [Client Binaries](#client-binaries) - [Server Binaries](#server-binaries) - [Node Binaries](#node-binaries) - [Container Images](#container-images) - - [Changelog since v1.35.2](#changelog-since-v1352) + - [Changelog since v1.35.3](#changelog-since-v1353) - [Changes by Kind](#changes-by-kind) + - [Feature](#feature) - [Bug or Regression](#bug-or-regression) - - [Other (Cleanup or Flake)](#other-cleanup-or-flake) - [Dependencies](#dependencies) - [Added](#added) - [Changed](#changed) - [Removed](#removed) -- [v1.35.2](#v1352) - - [Downloads for v1.35.2](#downloads-for-v1352) +- [v1.35.3](#v1353) + - [Downloads for v1.35.3](#downloads-for-v1353) - [Source Code](#source-code-1) - [Client Binaries](#client-binaries-1) - [Server Binaries](#server-binaries-1) - [Node Binaries](#node-binaries-1) - [Container Images](#container-images-1) - - [Changelog since v1.35.1](#changelog-since-v1351) + - [Changelog since v1.35.2](#changelog-since-v1352) - [Changes by Kind](#changes-by-kind-1) - - [Feature](#feature) + - [Bug or Regression](#bug-or-regression-1) + - [Other (Cleanup or Flake)](#other-cleanup-or-flake) - [Dependencies](#dependencies-1) - [Added](#added-1) - [Changed](#changed-1) - [Removed](#removed-1) -- [v1.35.1](#v1351) - - [Downloads for v1.35.1](#downloads-for-v1351) +- [v1.35.2](#v1352) + - [Downloads for v1.35.2](#downloads-for-v1352) - [Source Code](#source-code-2) - [Client Binaries](#client-binaries-2) - [Server Binaries](#server-binaries-2) - [Node Binaries](#node-binaries-2) - [Container Images](#container-images-2) - - [Changelog since v1.35.0](#changelog-since-v1350) + - [Changelog since v1.35.1](#changelog-since-v1351) - [Changes by Kind](#changes-by-kind-2) - [Feature](#feature-1) - - [Bug or Regression](#bug-or-regression-1) - - [Other (Cleanup or Flake)](#other-cleanup-or-flake-1) - [Dependencies](#dependencies-2) - [Added](#added-2) - [Changed](#changed-2) - [Removed](#removed-2) -- [v1.35.0](#v1350) - - [Downloads for v1.35.0](#downloads-for-v1350) +- [v1.35.1](#v1351) + - [Downloads for v1.35.1](#downloads-for-v1351) - [Source Code](#source-code-3) - [Client Binaries](#client-binaries-3) - [Server Binaries](#server-binaries-3) - [Node Binaries](#node-binaries-3) - [Container Images](#container-images-3) - - [Changelog since v1.34.0](#changelog-since-v1340) - - [Urgent Upgrade Notes](#urgent-upgrade-notes) - - [(No, really, you MUST read this before you upgrade)](#no-really-you-must-read-this-before-you-upgrade) + - [Changelog since v1.35.0](#changelog-since-v1350) - [Changes by Kind](#changes-by-kind-3) - - [Deprecation](#deprecation) - - [API Change](#api-change) - [Feature](#feature-2) - - [Documentation](#documentation) - [Bug or Regression](#bug-or-regression-2) - - [Other (Cleanup or Flake)](#other-cleanup-or-flake-2) + - [Other (Cleanup or Flake)](#other-cleanup-or-flake-1) - [Dependencies](#dependencies-3) - [Added](#added-3) - [Changed](#changed-3) - [Removed](#removed-3) -- [v1.35.0-rc.1](#v1350-rc1) - - [Downloads for v1.35.0-rc.1](#downloads-for-v1350-rc1) +- [v1.35.0](#v1350) + - [Downloads for v1.35.0](#downloads-for-v1350) - [Source Code](#source-code-4) - [Client Binaries](#client-binaries-4) - [Server Binaries](#server-binaries-4) - [Node Binaries](#node-binaries-4) - [Container Images](#container-images-4) - - [Changelog since v1.35.0-rc.0](#changelog-since-v1350-rc0) + - [Changelog since v1.34.0](#changelog-since-v1340) + - [Urgent Upgrade Notes](#urgent-upgrade-notes) + - [(No, really, you MUST read this before you upgrade)](#no-really-you-must-read-this-before-you-upgrade) - [Changes by Kind](#changes-by-kind-4) + - [Deprecation](#deprecation) + - [API Change](#api-change) - [Feature](#feature-3) + - [Documentation](#documentation) - [Bug or Regression](#bug-or-regression-3) - - [Other (Cleanup or Flake)](#other-cleanup-or-flake-3) + - [Other (Cleanup or Flake)](#other-cleanup-or-flake-2) - [Dependencies](#dependencies-4) - [Added](#added-4) - [Changed](#changed-4) - [Removed](#removed-4) -- [v1.35.0-rc.0](#v1350-rc0) - - [Downloads for v1.35.0-rc.0](#downloads-for-v1350-rc0) +- [v1.35.0-rc.1](#v1350-rc1) + - [Downloads for v1.35.0-rc.1](#downloads-for-v1350-rc1) - [Source Code](#source-code-5) - [Client Binaries](#client-binaries-5) - [Server Binaries](#server-binaries-5) - [Node Binaries](#node-binaries-5) - [Container Images](#container-images-5) - - [Changelog since v1.35.0-beta.0](#changelog-since-v1350-beta0) + - [Changelog since v1.35.0-rc.0](#changelog-since-v1350-rc0) - [Changes by Kind](#changes-by-kind-5) - [Feature](#feature-4) - [Bug or Regression](#bug-or-regression-4) + - [Other (Cleanup or Flake)](#other-cleanup-or-flake-3) - [Dependencies](#dependencies-5) - [Added](#added-5) - [Changed](#changed-5) - [Removed](#removed-5) -- [v1.35.0-beta.0](#v1350-beta0) - - [Downloads for v1.35.0-beta.0](#downloads-for-v1350-beta0) +- [v1.35.0-rc.0](#v1350-rc0) + - [Downloads for v1.35.0-rc.0](#downloads-for-v1350-rc0) - [Source Code](#source-code-6) - [Client Binaries](#client-binaries-6) - [Server Binaries](#server-binaries-6) - [Node Binaries](#node-binaries-6) - [Container Images](#container-images-6) - - [Changelog since v1.35.0-alpha.3](#changelog-since-v1350-alpha3) + - [Changelog since v1.35.0-beta.0](#changelog-since-v1350-beta0) - [Changes by Kind](#changes-by-kind-6) - - [API Change](#api-change-1) - [Feature](#feature-5) - [Bug or Regression](#bug-or-regression-5) - - [Other (Cleanup or Flake)](#other-cleanup-or-flake-4) - [Dependencies](#dependencies-6) - [Added](#added-6) - [Changed](#changed-6) - [Removed](#removed-6) -- [v1.35.0-alpha.3](#v1350-alpha3) - - [Downloads for v1.35.0-alpha.3](#downloads-for-v1350-alpha3) +- [v1.35.0-beta.0](#v1350-beta0) + - [Downloads for v1.35.0-beta.0](#downloads-for-v1350-beta0) - [Source Code](#source-code-7) - [Client Binaries](#client-binaries-7) - [Server Binaries](#server-binaries-7) - [Node Binaries](#node-binaries-7) - [Container Images](#container-images-7) - - [Changelog since v1.35.0-alpha.2](#changelog-since-v1350-alpha2) - - [Urgent Upgrade Notes](#urgent-upgrade-notes-1) - - [(No, really, you MUST read this before you upgrade)](#no-really-you-must-read-this-before-you-upgrade-1) + - [Changelog since v1.35.0-alpha.3](#changelog-since-v1350-alpha3) - [Changes by Kind](#changes-by-kind-7) - - [API Change](#api-change-2) + - [API Change](#api-change-1) - [Feature](#feature-6) - [Bug or Regression](#bug-or-regression-6) - - [Other (Cleanup or Flake)](#other-cleanup-or-flake-5) + - [Other (Cleanup or Flake)](#other-cleanup-or-flake-4) - [Dependencies](#dependencies-7) - [Added](#added-7) - [Changed](#changed-7) - [Removed](#removed-7) -- [v1.35.0-alpha.2](#v1350-alpha2) - - [Downloads for v1.35.0-alpha.2](#downloads-for-v1350-alpha2) +- [v1.35.0-alpha.3](#v1350-alpha3) + - [Downloads for v1.35.0-alpha.3](#downloads-for-v1350-alpha3) - [Source Code](#source-code-8) - [Client Binaries](#client-binaries-8) - [Server Binaries](#server-binaries-8) - [Node Binaries](#node-binaries-8) - [Container Images](#container-images-8) - - [Changelog since v1.35.0-alpha.1](#changelog-since-v1350-alpha1) + - [Changelog since v1.35.0-alpha.2](#changelog-since-v1350-alpha2) + - [Urgent Upgrade Notes](#urgent-upgrade-notes-1) + - [(No, really, you MUST read this before you upgrade)](#no-really-you-must-read-this-before-you-upgrade-1) - [Changes by Kind](#changes-by-kind-8) - - [Deprecation](#deprecation-1) - - [API Change](#api-change-3) + - [API Change](#api-change-2) - [Feature](#feature-7) - - [Documentation](#documentation-1) - [Bug or Regression](#bug-or-regression-7) - - [Other (Cleanup or Flake)](#other-cleanup-or-flake-6) + - [Other (Cleanup or Flake)](#other-cleanup-or-flake-5) - [Dependencies](#dependencies-8) - [Added](#added-8) - [Changed](#changed-8) - [Removed](#removed-8) -- [v1.35.0-alpha.1](#v1350-alpha1) - - [Downloads for v1.35.0-alpha.1](#downloads-for-v1350-alpha1) +- [v1.35.0-alpha.2](#v1350-alpha2) + - [Downloads for v1.35.0-alpha.2](#downloads-for-v1350-alpha2) - [Source Code](#source-code-9) - [Client Binaries](#client-binaries-9) - [Server Binaries](#server-binaries-9) - [Node Binaries](#node-binaries-9) - [Container Images](#container-images-9) - - [Changelog since v1.34.0](#changelog-since-v1340-1) + - [Changelog since v1.35.0-alpha.1](#changelog-since-v1350-alpha1) - [Changes by Kind](#changes-by-kind-9) - - [API Change](#api-change-4) + - [Deprecation](#deprecation-1) + - [API Change](#api-change-3) - [Feature](#feature-8) + - [Documentation](#documentation-1) - [Bug or Regression](#bug-or-regression-8) - - [Other (Cleanup or Flake)](#other-cleanup-or-flake-7) + - [Other (Cleanup or Flake)](#other-cleanup-or-flake-6) - [Dependencies](#dependencies-9) - [Added](#added-9) - [Changed](#changed-9) - [Removed](#removed-9) +- [v1.35.0-alpha.1](#v1350-alpha1) + - [Downloads for v1.35.0-alpha.1](#downloads-for-v1350-alpha1) + - [Source Code](#source-code-10) + - [Client Binaries](#client-binaries-10) + - [Server Binaries](#server-binaries-10) + - [Node Binaries](#node-binaries-10) + - [Container Images](#container-images-10) + - [Changelog since v1.34.0](#changelog-since-v1340-1) + - [Changes by Kind](#changes-by-kind-10) + - [API Change](#api-change-4) + - [Feature](#feature-9) + - [Bug or Regression](#bug-or-regression-9) + - [Other (Cleanup or Flake)](#other-cleanup-or-flake-7) + - [Dependencies](#dependencies-10) + - [Added](#added-10) + - [Changed](#changed-10) + - [Removed](#removed-10) +# v1.35.4 + + +## Downloads for v1.35.4 + + + +### Source Code + +filename | sha512 hash +-------- | ----------- +[kubernetes.tar.gz](https://dl.k8s.io/v1.35.4/kubernetes.tar.gz) | 8b21c3557a04eae7716e0de3859ecb2137f7ca7f6537b73dc59cc5aad17ac5c949c12e998516f1ce13decebdecc5ba7f56c3f02d86e8dc3aade3ddc58b85f5a6 +[kubernetes-src.tar.gz](https://dl.k8s.io/v1.35.4/kubernetes-src.tar.gz) | 3812dcf80e29384f01aa6636a01f0ab10f5e4b5a3e0f8c66ac00266897b893558e388a5de8ee4a5f4787a8125957a8e9a805978f78d907e3ce5b030ae053f033 + +### Client Binaries + +filename | sha512 hash +-------- | ----------- +[kubernetes-client-darwin-amd64.tar.gz](https://dl.k8s.io/v1.35.4/kubernetes-client-darwin-amd64.tar.gz) | b0e899146289de21ffec7b432473e8905f52bf9cd75f3977c6bcbb2810c72aa36d90cc671bec18a4999ff194b9b3dec69e33d55e9b8b4e44e59ea39141d5c0a0 +[kubernetes-client-darwin-arm64.tar.gz](https://dl.k8s.io/v1.35.4/kubernetes-client-darwin-arm64.tar.gz) | f81537e0516238a58e859efe875bde9d37219a3efdf5e34cd50b4455eb37fbdd6af43d1e6549180071d4fbfeb87c74830888bbc8f1d555b8140067ca8766c8a9 +[kubernetes-client-linux-386.tar.gz](https://dl.k8s.io/v1.35.4/kubernetes-client-linux-386.tar.gz) | 561711306eb1011193b528eca949109889b8dff460fd1ebbdbd2450936e5e9b711395c69f572a413276978db8fbde2e29e957af7889d7ea035f62ee68b52ba57 +[kubernetes-client-linux-amd64.tar.gz](https://dl.k8s.io/v1.35.4/kubernetes-client-linux-amd64.tar.gz) | 7c67019d82c7c70692948cd5843ae39c2854007cc117d6fa52fbd46b7521c7f7a10b6ec3085b68f021243529732d6af76b936406aba6c612e6b43e945bc7286b +[kubernetes-client-linux-arm.tar.gz](https://dl.k8s.io/v1.35.4/kubernetes-client-linux-arm.tar.gz) | 787308fa41ab5ebeb3b2854fc0e2d46d3bbe5edda057f8c04ed332cae62e3148307945f3336bb5e65754b7bf363dae31fb59c994a59268559a927fe369b27ab1 +[kubernetes-client-linux-arm64.tar.gz](https://dl.k8s.io/v1.35.4/kubernetes-client-linux-arm64.tar.gz) | 5dbbccd0c964894f5dae9e622619e5cec11f14e39457c8343a55bfa7ee0a30ca4155553647d0bc7dd8022ac0066fe11e9c8518300f8bc65a5a0a5ad83dfaa59f +[kubernetes-client-linux-ppc64le.tar.gz](https://dl.k8s.io/v1.35.4/kubernetes-client-linux-ppc64le.tar.gz) | 838bcec6320889ce4188b47c64ef5cfdb09a3d3b36370b767bdb923c515ab6322216895fd2508b4164502b0659a6ca89ba60fb6505fa0d879d10eb97aae25ecd +[kubernetes-client-linux-s390x.tar.gz](https://dl.k8s.io/v1.35.4/kubernetes-client-linux-s390x.tar.gz) | 20c18f8b0e0154d1f775d47f9842f51aa4f13ee695759be78b12fcb9d131408ce2790fababdf914283542cc4b53441f3ec9b9ba983fc94d722394c17ca764a66 +[kubernetes-client-windows-386.tar.gz](https://dl.k8s.io/v1.35.4/kubernetes-client-windows-386.tar.gz) | f9badf491bf799ead08ada6760882c183bc15e5ebc9435cf84281d1e9e90c23105be89929a358a5abea8340d2cc3d409cb449ecfd7929a93b9b0e5c4050deba9 +[kubernetes-client-windows-amd64.tar.gz](https://dl.k8s.io/v1.35.4/kubernetes-client-windows-amd64.tar.gz) | 688f2c84e29fdab1bbc6e92219eccac5c738a5663b2596d0c76a7632d206e47b0351c5dcac744e9360bcefcddddbd495f5455025bdaa424981ddeb1f498cb45b +[kubernetes-client-windows-arm64.tar.gz](https://dl.k8s.io/v1.35.4/kubernetes-client-windows-arm64.tar.gz) | a9f4346b190bc0e5bb3f8055ae4573772b7a6862edb45ec5fcb26d15af4cd6fafe06c728e13a9876de4b8459036fe08c5e25e890fe2dbdfd9bb76816ee11a75f + +### Server Binaries + +filename | sha512 hash +-------- | ----------- +[kubernetes-server-linux-amd64.tar.gz](https://dl.k8s.io/v1.35.4/kubernetes-server-linux-amd64.tar.gz) | 7454c2d9023de3230972e965e1e2a587a291e2a1abee703bb0737057e1646b23ebac538b93dd778e93579e3c293dc1fa8d002e6848ad5ebc01598bc47358fe44 +[kubernetes-server-linux-arm64.tar.gz](https://dl.k8s.io/v1.35.4/kubernetes-server-linux-arm64.tar.gz) | 67187a2fe780be7816f9d10e082ed02cbba306bab379980280d00ba4c4017ff53c50cd2ff81d248c91ddcc064276b8760e17c73a1a7da1d35067a6fc0ffbdc88 +[kubernetes-server-linux-ppc64le.tar.gz](https://dl.k8s.io/v1.35.4/kubernetes-server-linux-ppc64le.tar.gz) | 406dbac05507565398c6aeffff9d2e89df00ae8766e3c54ed6608ed8807a9d1746667c3ba7b6af2e156845cd77ff1f7145860fb6c7034c26c55a708008a81c37 +[kubernetes-server-linux-s390x.tar.gz](https://dl.k8s.io/v1.35.4/kubernetes-server-linux-s390x.tar.gz) | a901947d0195dff85668739327233e846b8ad27fd7e17336cd319213317b6840674ab92601a4ac99edf547cca1b39ffda8f9e5cbf4de077bc5b2e397a69e21ff + +### Node Binaries + +filename | sha512 hash +-------- | ----------- +[kubernetes-node-linux-amd64.tar.gz](https://dl.k8s.io/v1.35.4/kubernetes-node-linux-amd64.tar.gz) | bfdde7cb0eb382d28b37a499d041a210a10afc86280b1d0eefeec179e8563b8848f744ad0302a239662e3b5219622cdc2925a69c9d8098d7cd6237c659d78932 +[kubernetes-node-linux-arm64.tar.gz](https://dl.k8s.io/v1.35.4/kubernetes-node-linux-arm64.tar.gz) | 754ed5fc01a1675b7aad31feb4b4a68ebde1d63411e2565249cac9cc7b57631652808f3ec2fcc3530c9a5b2dbd39cef305b4f286c39f35e70ede616458312b3b +[kubernetes-node-linux-ppc64le.tar.gz](https://dl.k8s.io/v1.35.4/kubernetes-node-linux-ppc64le.tar.gz) | b6c31f757863ffb37ba030f75f1b323c6e7d18f0106c78219e2ad08ecfbc9768e89957373be4a36d5aa04030b7d48abb259dbe5d7a44d3c3d28011724d47cc96 +[kubernetes-node-linux-s390x.tar.gz](https://dl.k8s.io/v1.35.4/kubernetes-node-linux-s390x.tar.gz) | 65684796f3411a3cd90198c6fced6f2a09e75b289588b9ac43998b999dd94831412626eacd5276e298abf287f572bfffe19e178bd3b5c8d405d789ff403b489c +[kubernetes-node-windows-amd64.tar.gz](https://dl.k8s.io/v1.35.4/kubernetes-node-windows-amd64.tar.gz) | 407f2167d8c9a5e1ba98f1db0e8f2be01b4cbfc6e46626ff8886884c64e1846ed5883a50123fa29f5b03e9dabe74eddaad84239465504ede4ee67f8f131dce3f + +### Container Images + +All container images are available as manifest lists and support the described +architectures. It is also possible to pull a specific architecture directly by +adding the "-$ARCH" suffix to the container image name. + +name | architectures +---- | ------------- +[registry.k8s.io/conformance:v1.35.4](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/conformance) | [amd64](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/conformance-amd64), [arm64](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/conformance-arm64), [ppc64le](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/conformance-ppc64le), [s390x](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/conformance-s390x) +[registry.k8s.io/kube-apiserver:v1.35.4](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/kube-apiserver) | [amd64](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/kube-apiserver-amd64), [arm64](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/kube-apiserver-arm64), [ppc64le](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/kube-apiserver-ppc64le), [s390x](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/kube-apiserver-s390x) +[registry.k8s.io/kube-controller-manager:v1.35.4](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/kube-controller-manager) | [amd64](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/kube-controller-manager-amd64), [arm64](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/kube-controller-manager-arm64), [ppc64le](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/kube-controller-manager-ppc64le), [s390x](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/kube-controller-manager-s390x) +[registry.k8s.io/kube-proxy:v1.35.4](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/kube-proxy) | [amd64](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/kube-proxy-amd64), [arm64](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/kube-proxy-arm64), [ppc64le](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/kube-proxy-ppc64le), [s390x](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/kube-proxy-s390x) +[registry.k8s.io/kube-scheduler:v1.35.4](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/kube-scheduler) | [amd64](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/kube-scheduler-amd64), [arm64](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/kube-scheduler-arm64), [ppc64le](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/kube-scheduler-ppc64le), [s390x](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/kube-scheduler-s390x) +[registry.k8s.io/kubectl:v1.35.4](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/kubectl) | [amd64](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/kubectl-amd64), [arm64](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/kubectl-arm64), [ppc64le](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/kubectl-ppc64le), [s390x](https://console.cloud.google.com/artifacts/docker/k8s-artifacts-prod/southamerica-east1/images/kubectl-s390x) + +## Changelog since v1.35.3 + +## Changes by Kind + +### Feature + +- Kubernetes is now built using Go 1.25.9 ([#138304](https://github.com/kubernetes/kubernetes/pull/138304), [@xmudrii](https://github.com/xmudrii)) [SIG Release and Testing] + +### Bug or Regression + +- Fixed a bug where, after a kubelet restart, regular containers in a pod with a + sidecar (initContainer with restartPolicy: Always) and a startupProbe failed + to restart after crashing. Affected pods remained stuck with RestartCount: 0 + indefinitely. ([#137885](https://github.com/kubernetes/kubernetes/pull/137885), [@HirazawaUi](https://github.com/HirazawaUi)) [SIG Node and Testing] +- Fixed device plugin test failures after kubelet restart. ([#138042](https://github.com/kubernetes/kubernetes/pull/138042), [@zxqlxy](https://github.com/zxqlxy)) [SIG Node and Testing] +- Fixes a 1.34+ regression reporting apiserver request latency annotation in the audit log when request took more than 500ms ([#136281](https://github.com/kubernetes/kubernetes/pull/136281), [@chaochn47](https://github.com/chaochn47)) [SIG API Machinery] +- Fixes a 1.35 regression in StatefulSet Parallel pod management by disabling the MaxUnavailableStatefulSet feature by default. ([#137926](https://github.com/kubernetes/kubernetes/pull/137926), [@soltysh](https://github.com/soltysh)) [SIG Apps] +- Fixes kube-proxy's nftables mode to work on systems with nft 1.1.3. ([#137807](https://github.com/kubernetes/kubernetes/pull/137807), [@danwinship](https://github.com/danwinship)) [SIG Network] + +## Dependencies + +### Added +_Nothing has changed._ + +### Changed +- github.com/moby/spdystream: [v0.5.0 → v0.5.1](https://github.com/moby/spdystream/compare/v0.5.0...v0.5.1) +- go.opentelemetry.io/auto/sdk: [v1.1.0 → v1.2.1](https://github.com/open-telemetry/opentelemetry-go-instrumentation/compare/sdk/v1.1.0...sdk/v1.2.1) +- go.opentelemetry.io/otel: [v1.36.0 → v1.41.0](https://github.com/open-telemetry/opentelemetry-go/compare/a85ae98dcedc0761078518a715dea53e519b4846...v1.41.0) +- go.opentelemetry.io/otel/metric: [v1.36.0 → v1.41.0](https://github.com/open-telemetry/opentelemetry-go/compare/metric/v1.36.0...metric/v1.41.0) +- go.opentelemetry.io/otel/trace: [v1.36.0 → v1.41.0](https://github.com/open-telemetry/opentelemetry-go/compare/trace/v1.36.0...trace/v1.41.0) +- sigs.k8s.io/knftables: [v0.0.17 → v0.0.21](https://github.com/kubernetes-sigs/knftables/compare/v0.0.17...v0.0.21) + +### Removed +_Nothing has changed._ + + + # v1.35.3 diff --git a/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/cmd/init.go b/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/cmd/init.go index a52b2952cf..8ec1aa7cef 100644 --- a/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/cmd/init.go +++ b/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/cmd/init.go @@ -19,11 +19,9 @@ package cmd import ( "fmt" "io" - "net" "os" "path/filepath" "slices" - "strconv" "github.com/spf13/cobra" flag "github.com/spf13/pflag" @@ -93,6 +91,7 @@ type initData struct { skipTokenPrint bool dryRun bool kubeconfig *clientcmdapi.Config + kubeconfigOriginal *clientcmdapi.Config kubeconfigDir string kubeconfigPath string ignorePreflightErrors sets.Set[string] @@ -463,6 +462,9 @@ func (d *initData) CertificateDir() string { } // KubeConfig returns a kubeconfig after loading it from KubeConfigPath(). +// If the default kubeconfig path is used (admin.conf), instead of constructing +// a kubeconfig that points to the control plane endpoint, make it point to the localAPIEndpoint. +// This would allow 'kubeadm init' to only talk to the local kube-apiserver instance. func (d *initData) KubeConfig() (*clientcmdapi.Config, error) { if d.kubeconfig != nil { return d.kubeconfig, nil @@ -473,10 +475,26 @@ func (d *initData) KubeConfig() (*clientcmdapi.Config, error) { if err != nil { return nil, err } + d.kubeconfigOriginal = d.kubeconfig.DeepCopy() + + if d.kubeconfigPath == kubeadmconstants.GetAdminKubeConfigPath() { + kubeconfigutil.PointKubeConfigToLocalAPIEndpoint(d.kubeconfig, &d.Cfg().LocalAPIEndpoint) + } return d.kubeconfig, nil } +// KubeConfigOriginal returns the original kubeconfig loaded from file, without any modifications. +func (d *initData) KubeConfigOriginal() (*clientcmdapi.Config, error) { + if d.kubeconfigOriginal == nil { + if _, err := d.KubeConfig(); err != nil { + return nil, err + } + } + + return d.kubeconfigOriginal, nil +} + // KubeConfigDir returns the Kubernetes configuration directory or the temporary directory if DryRun is true. func (d *initData) KubeConfigDir() string { if d.dryRun { @@ -521,8 +539,12 @@ func (d *initData) OutputWriter() io.Writer { // getDryRunClient creates a fake client that answers some GET calls in order to be able to do the full init flow in dry-run mode. func getDryRunClient(d *initData) (clientset.Interface, error) { + kubeconfig, err := d.KubeConfig() + if err != nil { + return nil, err + } dryRun := apiclient.NewDryRun() - if err := dryRun.WithKubeConfigFile(d.KubeConfigPath()); err != nil { + if err := dryRun.WithKubeConfig(kubeconfig); err != nil { return nil, err } dryRun.WithDefaultMarshalFunction(). @@ -550,7 +572,11 @@ func (d *initData) Client() (clientset.Interface, error) { // and if the bootstrapping was not already done if !d.adminKubeConfigBootstrapped && isDefaultKubeConfigPath { // Call EnsureAdminClusterRoleBinding() to obtain a working client from admin.conf. - d.client, err = kubeconfigphase.EnsureAdminClusterRoleBinding(kubeadmconstants.KubernetesDir, nil) + d.client, err = kubeconfigphase.EnsureAdminClusterRoleBinding( + kubeadmconstants.KubernetesDir, + &d.Cfg().LocalAPIEndpoint, + nil, + ) if err != nil { return nil, errors.Wrapf(err, "could not bootstrap the admin user in file %s", kubeadmconstants.AdminKubeConfigFileName) } @@ -571,30 +597,6 @@ func (d *initData) Client() (clientset.Interface, error) { return d.client, nil } -// WaitControlPlaneClient returns a basic client used for the purpose of waiting -// for control plane components to report 'ok' on their respective health check endpoints. -// It uses the admin.conf as the base, but modifies it to point at the local API server instead -// of the control plane endpoint. -func (d *initData) WaitControlPlaneClient() (clientset.Interface, error) { - config, err := clientcmd.LoadFromFile(d.KubeConfigPath()) - if err != nil { - return nil, err - } - for _, v := range config.Clusters { - v.Server = fmt.Sprintf("https://%s", - net.JoinHostPort( - d.Cfg().LocalAPIEndpoint.AdvertiseAddress, - strconv.Itoa(int(d.Cfg().LocalAPIEndpoint.BindPort)), - ), - ) - } - client, err := kubeconfigutil.ToClientSet(config) - if err != nil { - return nil, err - } - return client, nil -} - // Tokens returns an array of token strings. func (d *initData) Tokens() []string { tokens := []string{} diff --git a/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/cmd/phases/init/bootstraptoken.go b/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/cmd/phases/init/bootstraptoken.go index 5fbef8bd5f..2a103b9aeb 100644 --- a/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/cmd/phases/init/bootstraptoken.go +++ b/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/cmd/phases/init/bootstraptoken.go @@ -71,7 +71,7 @@ func runBootstrapToken(c workflow.RunData) error { if err != nil { return err } - kubeconfig, err := data.KubeConfig() + kubeconfig, err := data.KubeConfigOriginal() if err != nil { return err } @@ -108,6 +108,11 @@ func runBootstrapToken(c workflow.RunData) error { return err } + // Create RBAC rules that allow the API server kubelet client to access the kubelet API + if err := nodebootstraptokenphase.AllowAPIServerToAccessKubeletAPI(client); err != nil { + return errors.Wrap(err, "error allowing API server to access kubelet API") + } + // Create the cluster-info ConfigMap with the associated RBAC rules if err := clusterinfophase.CreateBootstrapConfigMapIfNotExists(client, kubeconfig); err != nil { return errors.Wrap(err, "error creating bootstrap ConfigMap") diff --git a/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/cmd/phases/init/data.go b/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/cmd/phases/init/data.go index 6e7b8ba3a6..23f10825cd 100644 --- a/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/cmd/phases/init/data.go +++ b/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/cmd/phases/init/data.go @@ -40,6 +40,7 @@ type InitData interface { CertificateWriteDir() string CertificateDir() string KubeConfig() (*clientcmdapi.Config, error) + KubeConfigOriginal() (*clientcmdapi.Config, error) KubeConfigDir() string KubeConfigPath() string ManifestDir() string @@ -47,7 +48,6 @@ type InitData interface { ExternalCA() bool OutputWriter() io.Writer Client() (clientset.Interface, error) - WaitControlPlaneClient() (clientset.Interface, error) Tokens() []string PatchesDir() string } diff --git a/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/cmd/phases/init/data_test.go b/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/cmd/phases/init/data_test.go index 8465021446..7cafb4cbcc 100644 --- a/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/cmd/phases/init/data_test.go +++ b/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/cmd/phases/init/data_test.go @@ -32,24 +32,24 @@ type testInitData struct{} // testInitData must satisfy InitData. var _ InitData = &testInitData{} -func (t *testInitData) UploadCerts() bool { return false } -func (t *testInitData) CertificateKey() string { return "" } -func (t *testInitData) SetCertificateKey(key string) {} -func (t *testInitData) SkipCertificateKeyPrint() bool { return false } -func (t *testInitData) Cfg() *kubeadmapi.InitConfiguration { return nil } -func (t *testInitData) DryRun() bool { return false } -func (t *testInitData) SkipTokenPrint() bool { return false } -func (t *testInitData) IgnorePreflightErrors() sets.Set[string] { return nil } -func (t *testInitData) CertificateWriteDir() string { return "" } -func (t *testInitData) CertificateDir() string { return "" } -func (t *testInitData) KubeConfig() (*clientcmdapi.Config, error) { return nil, nil } -func (t *testInitData) KubeConfigDir() string { return "" } -func (t *testInitData) KubeConfigPath() string { return "" } -func (t *testInitData) ManifestDir() string { return "" } -func (t *testInitData) KubeletDir() string { return "" } -func (t *testInitData) ExternalCA() bool { return false } -func (t *testInitData) OutputWriter() io.Writer { return nil } -func (t *testInitData) Client() (clientset.Interface, error) { return nil, nil } -func (t *testInitData) WaitControlPlaneClient() (clientset.Interface, error) { return nil, nil } -func (t *testInitData) Tokens() []string { return nil } -func (t *testInitData) PatchesDir() string { return "" } +func (t *testInitData) UploadCerts() bool { return false } +func (t *testInitData) CertificateKey() string { return "" } +func (t *testInitData) SetCertificateKey(key string) {} +func (t *testInitData) SkipCertificateKeyPrint() bool { return false } +func (t *testInitData) Cfg() *kubeadmapi.InitConfiguration { return nil } +func (t *testInitData) DryRun() bool { return false } +func (t *testInitData) SkipTokenPrint() bool { return false } +func (t *testInitData) IgnorePreflightErrors() sets.Set[string] { return nil } +func (t *testInitData) CertificateWriteDir() string { return "" } +func (t *testInitData) CertificateDir() string { return "" } +func (t *testInitData) KubeConfig() (*clientcmdapi.Config, error) { return nil, nil } +func (t *testInitData) KubeConfigOriginal() (*clientcmdapi.Config, error) { return nil, nil } +func (t *testInitData) KubeConfigDir() string { return "" } +func (t *testInitData) KubeConfigPath() string { return "" } +func (t *testInitData) ManifestDir() string { return "" } +func (t *testInitData) KubeletDir() string { return "" } +func (t *testInitData) ExternalCA() bool { return false } +func (t *testInitData) OutputWriter() io.Writer { return nil } +func (t *testInitData) Client() (clientset.Interface, error) { return nil, nil } +func (t *testInitData) Tokens() []string { return nil } +func (t *testInitData) PatchesDir() string { return "" } diff --git a/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/cmd/phases/init/waitcontrolplane.go b/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/cmd/phases/init/waitcontrolplane.go index c93d02aef1..db68251a2d 100644 --- a/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/cmd/phases/init/waitcontrolplane.go +++ b/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/cmd/phases/init/waitcontrolplane.go @@ -58,7 +58,7 @@ func runWaitControlPlanePhase(c workflow.RunData) error { } } - client, err := data.WaitControlPlaneClient() + client, err := data.Client() if err != nil { return errors.Wrap(err, "cannot obtain client without bootstrap") } diff --git a/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/cmd/phases/upgrade/apply/bootstraptoken.go b/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/cmd/phases/upgrade/apply/bootstraptoken.go index e0c40c09e4..58aec1ddc2 100644 --- a/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/cmd/phases/upgrade/apply/bootstraptoken.go +++ b/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/cmd/phases/upgrade/apply/bootstraptoken.go @@ -79,6 +79,11 @@ func runBootstrapToken(c workflow.RunData) error { errs = append(errs, err) } + // Create/update RBAC rules that allow the API server kubelet client to access the kubelet API + if err := nodebootstraptoken.AllowAPIServerToAccessKubeletAPI(client); err != nil { + errs = append(errs, err) + } + // Create/update RBAC rules that makes the cluster-info ConfigMap reachable if err := clusterinfophase.CreateClusterInfoRBACRules(client); err != nil { errs = append(errs, err) diff --git a/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/constants/constants.go b/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/constants/constants.go index fee98ece17..e17e14331a 100644 --- a/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/constants/constants.go +++ b/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/constants/constants.go @@ -210,6 +210,11 @@ const ( // built-in ClusterRole. ClusterAdminsGroupAndClusterRoleBinding = "kubeadm:cluster-admins" + // KubeletAPIAdminClusterRoleBindingName is the name of the ClusterRoleBinding for the apiserver kubelet client + KubeletAPIAdminClusterRoleBindingName = "kubeadm:apiserver-kubelet-client" + // KubeletAPIAdminClusterRoleName is the name of the built-in ClusterRole for kubelet API access + KubeletAPIAdminClusterRoleName = "system:kubelet-api-admin" + // KubernetesAPICallTimeout specifies how long kubeadm should wait for API calls KubernetesAPICallTimeout = 1 * time.Minute // KubernetesAPICallRetryInterval defines how long kubeadm should wait before retrying a failed API operation diff --git a/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/phases/bootstraptoken/node/tlsbootstrap.go b/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/phases/bootstraptoken/node/tlsbootstrap.go index 72154c9ecd..8485e3779a 100644 --- a/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/phases/bootstraptoken/node/tlsbootstrap.go +++ b/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/phases/bootstraptoken/node/tlsbootstrap.go @@ -130,3 +130,25 @@ func AutoApproveNodeCertificateRotation(client clientset.Interface) error { }, }) } + +// AllowAPIServerToAccessKubeletAPI creates RBAC rules that allow the API server kubelet client to access the kubelet API +func AllowAPIServerToAccessKubeletAPI(client clientset.Interface) error { + fmt.Println("[bootstrap-token] Configured RBAC rules to allow the API server kubelet client certificate to access the kubelet API") + + return apiclient.CreateOrUpdate(client.RbacV1().ClusterRoleBindings(), &rbac.ClusterRoleBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: constants.KubeletAPIAdminClusterRoleBindingName, + }, + RoleRef: rbac.RoleRef{ + APIGroup: rbac.GroupName, + Kind: "ClusterRole", + Name: constants.KubeletAPIAdminClusterRoleName, + }, + Subjects: []rbac.Subject{ + { + Kind: rbac.UserKind, + Name: constants.APIServerKubeletClientCertCommonName, + }, + }, + }) +} diff --git a/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/phases/bootstraptoken/node/tlsbootstrap_test.go b/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/phases/bootstraptoken/node/tlsbootstrap_test.go index 1611410569..7a75fdeb1c 100644 --- a/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/phases/bootstraptoken/node/tlsbootstrap_test.go +++ b/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/phases/bootstraptoken/node/tlsbootstrap_test.go @@ -278,6 +278,63 @@ func TestAllowBootstrapTokensToGetNodes(t *testing.T) { } } +func TestAllowAPIServerToAccessKubeletAPI(t *testing.T) { + tests := []struct { + name string + client clientset.Interface + }{ + { + name: "ClusterRoleBindings is empty", + client: clientsetfake.NewSimpleClientset(), + }, + { + name: "ClusterRoleBindings already exists", + client: newMockClusterRoleBinddingClientForTest(t, &rbac.ClusterRoleBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: constants.KubeletAPIAdminClusterRoleBindingName, + }, + RoleRef: rbac.RoleRef{ + APIGroup: rbac.GroupName, + Kind: "ClusterRole", + Name: constants.KubeletAPIAdminClusterRoleName, + }, + Subjects: []rbac.Subject{ + { + Kind: rbac.UserKind, + Name: constants.APIServerKubeletClientCertCommonName, + }, + }, + }), + }, + { + name: "Create new ClusterRoleBindings", + client: newMockClusterRoleBinddingClientForTest(t, &rbac.ClusterRoleBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: constants.KubeletAPIAdminClusterRoleBindingName, + }, + RoleRef: rbac.RoleRef{ + APIGroup: rbac.GroupName, + Kind: "ClusterRole", + Name: constants.KubeletAPIAdminClusterRoleName, + }, + Subjects: []rbac.Subject{ + { + Kind: rbac.GroupKind, + Name: constants.NodesGroup, + }, + }, + }), + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if err := AllowAPIServerToAccessKubeletAPI(tt.client); err != nil { + t.Errorf("AllowAPIServerToAccessKubeletAPI() return error = %v", err) + } + }) + } +} + func newMockClusterRoleBinddingClientForTest(t *testing.T, clusterRoleBinding *rbac.ClusterRoleBinding) *clientsetfake.Clientset { client := clientsetfake.NewSimpleClientset() _, err := client.RbacV1().ClusterRoleBindings().Create(context.TODO(), clusterRoleBinding, metav1.CreateOptions{}) diff --git a/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/phases/certs/certlist.go b/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/phases/certs/certlist.go index 36caff1d92..11f773e359 100644 --- a/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/phases/certs/certlist.go +++ b/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/phases/certs/certlist.go @@ -317,9 +317,8 @@ func KubeadmCertKubeletClient() *KubeadmCert { CAName: "ca", config: pkiutil.CertConfig{ Config: certutil.Config{ - CommonName: kubeadmconstants.APIServerKubeletClientCertCommonName, - Organization: []string{kubeadmconstants.ClusterAdminsGroupAndClusterRoleBinding}, - Usages: []x509.ExtKeyUsage{x509.ExtKeyUsageClientAuth}, + CommonName: kubeadmconstants.APIServerKubeletClientCertCommonName, + Usages: []x509.ExtKeyUsage{x509.ExtKeyUsageClientAuth}, }, }, } diff --git a/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/phases/kubeconfig/kubeconfig.go b/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/phases/kubeconfig/kubeconfig.go index 09ef132cf0..1aa01bfd58 100644 --- a/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/phases/kubeconfig/kubeconfig.go +++ b/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/phases/kubeconfig/kubeconfig.go @@ -615,14 +615,19 @@ type EnsureRBACFunc func(context.Context, clientset.Interface, clientset.Interfa // constructs a client from super-admin.conf if the file exists. It then proceeds // to pass the clients to EnsureAdminClusterRoleBindingImpl. The function returns a // usable client from admin.conf with RBAC properly constructed or an error. -func EnsureAdminClusterRoleBinding(outDir string, ensureRBACFunc EnsureRBACFunc) (clientset.Interface, error) { +func EnsureAdminClusterRoleBinding(outDir string, lae *kubeadmapi.APIEndpoint, ensureRBACFunc EnsureRBACFunc) (clientset.Interface, error) { var ( err error adminClient, superAdminClient clientset.Interface ) // Create a client from admin.conf. - adminClient, err = kubeconfigutil.ClientSetFromFile(filepath.Join(outDir, kubeadmconstants.AdminKubeConfigFileName)) + kubeconfig, err := clientcmd.LoadFromFile(filepath.Join(outDir, kubeadmconstants.AdminKubeConfigFileName)) + if err != nil { + return nil, err + } + kubeconfigutil.PointKubeConfigToLocalAPIEndpoint(kubeconfig, lae) + adminClient, err = kubeconfigutil.ToClientSet(kubeconfig) if err != nil { return nil, err } @@ -630,7 +635,12 @@ func EnsureAdminClusterRoleBinding(outDir string, ensureRBACFunc EnsureRBACFunc) // Create a client from super-admin.conf. superAdminPath := filepath.Join(outDir, kubeadmconstants.SuperAdminKubeConfigFileName) if _, err := os.Stat(superAdminPath); err == nil { - superAdminClient, err = kubeconfigutil.ClientSetFromFile(superAdminPath) + kubeconfig, err := clientcmd.LoadFromFile(superAdminPath) + if err != nil { + return nil, err + } + kubeconfigutil.PointKubeConfigToLocalAPIEndpoint(kubeconfig, lae) + superAdminClient, err = kubeconfigutil.ToClientSet(kubeconfig) if err != nil { return nil, err } diff --git a/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/phases/kubeconfig/kubeconfig_test.go b/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/phases/kubeconfig/kubeconfig_test.go index b0722ec77c..7fad218363 100644 --- a/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/phases/kubeconfig/kubeconfig_test.go +++ b/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/phases/kubeconfig/kubeconfig_test.go @@ -971,7 +971,7 @@ func TestEnsureAdminClusterRoleBinding(t *testing.T) { } } - client, err := EnsureAdminClusterRoleBinding(dir, ensureRBACFunc) + client, err := EnsureAdminClusterRoleBinding(dir, &kubeadmapi.APIEndpoint{}, ensureRBACFunc) if (err != nil) != tc.expectedError { t.Fatalf("expected error: %v, got: %v, error: %v", err != nil, tc.expectedError, err) } diff --git a/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/util/config/cluster.go b/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/util/config/cluster.go index 3297c3447e..ebdb51c2ab 100644 --- a/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/util/config/cluster.go +++ b/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/util/config/cluster.go @@ -66,9 +66,11 @@ func FetchInitConfigurationFromCluster(client clientset.Interface, printer outpu return nil, err } - // Apply dynamic defaults - // NB. skip CRI detection here because it won't be used at all and will be overridden later - if err := SetInitDynamicDefaults(cfg, true); err != nil { + // Apply dynamic defaults. + // NB. skip CRI detection here because it won't be used at all and will be overridden later. + // NB. skip LocalAPIEndpoint defaulting when the caller did not request the endpoint (e.g. a + // worker join). + if err := SetInitDynamicDefaults(cfg, true, !getAPIEndpoint); err != nil { return nil, err } diff --git a/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/util/config/initconfiguration.go b/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/util/config/initconfiguration.go index 94bb650c4d..da52e82149 100644 --- a/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/util/config/initconfiguration.go +++ b/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/util/config/initconfiguration.go @@ -56,16 +56,18 @@ var ( } ) -// SetInitDynamicDefaults checks and sets configuration values for the InitConfiguration object -func SetInitDynamicDefaults(cfg *kubeadmapi.InitConfiguration, skipCRIDetect bool) error { +// SetInitDynamicDefaults checks and sets configuration values for the InitConfiguration object. +func SetInitDynamicDefaults(cfg *kubeadmapi.InitConfiguration, skipCRIDetect, skipAPIEndpoint bool) error { if err := SetBootstrapTokensDynamicDefaults(&cfg.BootstrapTokens); err != nil { return err } if err := SetNodeRegistrationDynamicDefaults(&cfg.NodeRegistration, true, skipCRIDetect); err != nil { return err } - if err := SetAPIEndpointDynamicDefaults(&cfg.LocalAPIEndpoint); err != nil { - return err + if !skipAPIEndpoint { + if err := SetAPIEndpointDynamicDefaults(&cfg.LocalAPIEndpoint); err != nil { + return err + } } return SetClusterDynamicDefaults(&cfg.ClusterConfiguration, &cfg.LocalAPIEndpoint, &cfg.NodeRegistration) } @@ -245,7 +247,7 @@ func DefaultedInitConfiguration(versionedInitCfg *kubeadmapiv1.InitConfiguration } // Applies dynamic defaults to settings not provided with flags - if err := SetInitDynamicDefaults(internalcfg, opts.SkipCRIDetect); err != nil { + if err := SetInitDynamicDefaults(internalcfg, opts.SkipCRIDetect, false); err != nil { return nil, err } // Validates cfg (flags/configs + defaults + dynamic defaults) @@ -400,7 +402,7 @@ func documentMapToInitConfiguration(gvkmap kubeadmapi.DocumentMap, allowDeprecat } // Applies dynamic defaults to settings not provided with flags - if err := SetInitDynamicDefaults(initcfg, skipCRIDetect); err != nil { + if err := SetInitDynamicDefaults(initcfg, skipCRIDetect, false); err != nil { return nil, err } diff --git a/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/util/config/initconfiguration_test.go b/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/util/config/initconfiguration_test.go index 8b95c4260d..c2322bd461 100644 --- a/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/util/config/initconfiguration_test.go +++ b/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/util/config/initconfiguration_test.go @@ -314,3 +314,56 @@ func TestBytesToInitConfiguration(t *testing.T) { } } } + +func TestSetInitDynamicDefaultsSkipAPIEndpoint(t *testing.T) { + // "not-an-ip" is a sentinel that would cause SetAPIEndpointDynamicDefaults to return + // an error if invoked. With skipAPIEndpoint=true, the value must be left untouched. + const sentinel = "not-an-ip" + + tests := []struct { + name string + skipAPIEndpoint bool + expectErr bool + expectAdvertise string + }{ + { + name: "skip leaves AdvertiseAddress untouched", + skipAPIEndpoint: true, + expectErr: false, + expectAdvertise: sentinel, + }, + { + name: "no skip surfaces invalid AdvertiseAddress error", + skipAPIEndpoint: false, + expectErr: true, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + cfg := &kubeadmapi.InitConfiguration{ + ClusterConfiguration: kubeadmapi.ClusterConfiguration{ + KubernetesVersion: constants.CurrentKubernetesVersion.String(), + }, + LocalAPIEndpoint: kubeadmapi.APIEndpoint{ + AdvertiseAddress: sentinel, + }, + } + + err := SetInitDynamicDefaults(cfg, true /* skipCRIDetect */, tc.skipAPIEndpoint) + if tc.expectErr { + if err == nil { + t.Fatalf("expected error, got nil") + } + return + } + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if cfg.LocalAPIEndpoint.AdvertiseAddress != tc.expectAdvertise { + t.Errorf("AdvertiseAddress = %q, want %q", + cfg.LocalAPIEndpoint.AdvertiseAddress, tc.expectAdvertise) + } + }) + } +} diff --git a/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/util/etcd/etcd.go b/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/util/etcd/etcd.go index 3ac966c70b..15d1ece982 100644 --- a/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/util/etcd/etcd.go +++ b/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/util/etcd/etcd.go @@ -23,6 +23,7 @@ import ( "net" "net/url" "path/filepath" + "slices" "strconv" "strings" "time" @@ -35,6 +36,7 @@ import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + utilerrors "k8s.io/apimachinery/pkg/util/errors" "k8s.io/apimachinery/pkg/util/wait" clientset "k8s.io/client-go/kubernetes" "k8s.io/klog/v2" @@ -101,6 +103,13 @@ type Client struct { listMembersFunc func(timeout time.Duration) (*clientv3.MemberListResponse, error) } +type etcdMemberStatus struct { + ep string + status *clientv3.StatusResponse + // err is any error encountered while communicating with the etcd server. + err error +} + // New creates a new EtcdCluster client func New(endpoints []string, ca, cert, key string) (*Client, error) { client := Client{Endpoints: endpoints} @@ -535,7 +544,7 @@ func (c *Client) addMember(name string, peerAddrs string, isLearner bool) ([]Mem if !isLearner { // Add the new member client address to the list of endpoints - c.Endpoints = append(c.Endpoints, GetClientURLByIP(parsedPeerAddrs.Hostname())) + c.addEndpoint(GetClientURLByIP(parsedPeerAddrs.Hostname())) } return ret, nil @@ -617,11 +626,12 @@ func (c *Client) MemberPromote(learnerID uint64) error { // 2. context deadline exceeded // 3. peer URLs already exists // Once the client provides a way to check if the etcd learner is ready to promote, the retry logic can be revisited. + var promoteResp *clientv3.MemberPromoteResponse err = wait.PollUntilContextTimeout(context.Background(), constants.EtcdAPICallRetryInterval, kubeadmapi.GetActiveTimeouts().EtcdAPICall.Duration, true, func(_ context.Context) (bool, error) { ctx, cancel := context.WithTimeout(context.Background(), etcdTimeout) defer cancel() - _, err = cli.MemberPromote(ctx, learnerID) + promoteResp, err = cli.MemberPromote(ctx, learnerID) if err == nil { klog.V(1).Infof("[etcd] The learner was promoted as a voting member: %s", learnerIDUint) return true, nil @@ -633,18 +643,52 @@ func (c *Client) MemberPromote(learnerID uint64) error { if err != nil { return lastError } + + for _, m := range promoteResp.Members { + if m.ID == learnerID { + parsedPeerAddrs, err := url.Parse(m.PeerURLs[0]) + if err != nil { + return errors.Wrapf(err, "error parsing peer address %s", m.PeerURLs[0]) + } + c.addEndpoint(GetClientURLByIP(parsedPeerAddrs.Hostname())) + break + } + } + return nil } +func (c *Client) addEndpoint(ep string) { + if slices.Contains(c.Endpoints, ep) { + return + } + c.Endpoints = append(c.Endpoints, ep) +} + // CheckClusterHealth returns nil for status Up or error for status Down func (c *Client) CheckClusterHealth() error { - _, err := c.getClusterStatus() + _, ok, err := c.getClusterStatus() + if err != nil { + klog.V(1).Infof("[etcd] cluster has quorum: %t; some members are not healthy: %v\n", ok, err) + } + if ok { + return nil + } return err } -// getClusterStatus returns nil for status Up (along with endpoint status response map) or error for status Down -func (c *Client) getClusterStatus() (map[string]*clientv3.StatusResponse, error) { - clusterStatus := make(map[string]*clientv3.StatusResponse) +// getClusterStatus checks the health of the cluster members and returns +// their individual status map, whether cluster quorum is satisfied, and any +// aggregated member errors. +// +// The boolean result is true when a majority of members are healthy +// (healthyCount > totalCount/2). +// +// A member is considered unhealthy if its status request failed or if the +// reported status contains health errors. +func (c *Client) getClusterStatus() (map[string]*etcdMemberStatus, bool, error) { + // Step 1: get the cluster status first + clusterStatus := make(map[string]*etcdMemberStatus) for _, ep := range c.Endpoints { // Gets the member status var lastError error @@ -653,6 +697,7 @@ func (c *Client) getClusterStatus() (map[string]*clientv3.StatusResponse, error) true, func(_ context.Context) (bool, error) { cli, err := c.newEtcdClient(c.Endpoints) if err != nil { + klog.V(5).Infof("Failed to create etcd client with %v: %v", c.Endpoints, err) lastError = err return false, nil } @@ -669,15 +714,33 @@ func (c *Client) getClusterStatus() (map[string]*clientv3.StatusResponse, error) return false, nil }) if err != nil { - return nil, lastError + clusterStatus[ep] = &etcdMemberStatus{ep: ep, err: lastError} + } else { + clusterStatus[ep] = &etcdMemberStatus{ep: ep, status: resp} } + } + + // Step 2: evaluate the cluster status + totalCount, healthyCount := len(clusterStatus), 0 + var memberErrs []error - clusterStatus[ep] = resp + for ep, epStatus := range clusterStatus { + if epStatus.err != nil { + memberErrs = append(memberErrs, errors.Wrapf(epStatus.err, "the status of member %s is not available", ep)) + continue + } + if len(epStatus.status.Errors) > 0 { + memberErrs = append(memberErrs, errors.Errorf("member %s is not healthy: %s", ep, strings.Join(epStatus.status.Errors, ","))) + continue + } + healthyCount++ } - return clusterStatus, nil + + err := utilerrors.NewAggregate(memberErrs) + return clusterStatus, healthyCount > totalCount/2, err } -// WaitForClusterAvailable returns true if all endpoints in the cluster are available after retry attempts, an error is returned otherwise +// WaitForClusterAvailable returns true if the etcd cluster is healthy after retry attempts, otherwise returns an error. func (c *Client) WaitForClusterAvailable(retries int, retryInterval time.Duration) (bool, error) { for i := 0; i < retries; i++ { if i > 0 { @@ -685,17 +748,13 @@ func (c *Client) WaitForClusterAvailable(retries int, retryInterval time.Duratio time.Sleep(retryInterval) } klog.V(2).Infof("[etcd] attempting to see if all cluster endpoints (%s) are available %d/%d", c.Endpoints, i+1, retries) - _, err := c.getClusterStatus() + _, ok, err := c.getClusterStatus() if err != nil { - switch err { - case context.DeadlineExceeded: - klog.V(1).Infof("[etcd] Attempt timed out") - default: - klog.V(1).Infof("[etcd] Attempt failed with error: %v\n", err) - } - continue + klog.V(1).Infof("[etcd] cluster has quorum: %t; some members are not healthy: %v\n", ok, err) + } + if ok { + return true, nil } - return true, nil } return false, errors.New("timeout waiting for etcd cluster to be available") } diff --git a/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/util/etcd/etcd_test.go b/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/util/etcd/etcd_test.go index 67671a994d..cbf1a97463 100644 --- a/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/util/etcd/etcd_test.go +++ b/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/util/etcd/etcd_test.go @@ -28,6 +28,7 @@ import ( clientv3 "go.etcd.io/etcd/client/v3" apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" clientsetfake "k8s.io/client-go/kubernetes/fake" clienttesting "k8s.io/client-go/testing" @@ -823,3 +824,144 @@ func TestGetMemberStatus(t *testing.T) { }) } } + +type fakeEtcdClientWithStatusResponse struct { + fakeEtcdClient + statusResponses map[string]*clientv3.StatusResponse + statusRequestErrors map[string]error +} + +// Status gets the status of the endpoint. +func (f *fakeEtcdClientWithStatusResponse) Status(_ context.Context, ep string) (*clientv3.StatusResponse, error) { + if f.statusRequestErrors != nil { + if _, ok := f.statusRequestErrors[ep]; ok { + return nil, f.statusRequestErrors[ep] + } + } + return f.statusResponses[ep], nil +} + +func TestEvaluateClusterStatus(t *testing.T) { + testCases := []struct { + name string + Endpoints []string + newEtcdClient func(endpoints []string) (etcdClient, error) + wantClusterHealthy bool + wantMemberErrors bool + }{ + { + name: "all the three members are healthy", + Endpoints: []string{"https://192.168.10.100:2379", "https://192.168.10.200:2379", "https://192.168.10.300:2379"}, + newEtcdClient: func(endpoints []string) (etcdClient, error) { + f := &fakeEtcdClientWithStatusResponse{ + statusResponses: map[string]*clientv3.StatusResponse{ + "https://192.168.10.100:2379": {}, + "https://192.168.10.200:2379": {}, + "https://192.168.10.300:2379": {}, + }, + } + return f, nil + }, + wantClusterHealthy: true, + wantMemberErrors: false, + }, + { + name: "one out of three members has errors", + Endpoints: []string{"https://192.168.10.100:2379", "https://192.168.10.200:2379", "https://192.168.10.300:2379"}, + newEtcdClient: func(endpoints []string) (etcdClient, error) { + f := &fakeEtcdClientWithStatusResponse{ + statusResponses: map[string]*clientv3.StatusResponse{ + "https://192.168.10.100:2379": {}, + "https://192.168.10.200:2379": {Errors: []string{"etcdserver: mvcc: database space exceeded"}}, + "https://192.168.10.300:2379": {}, + }, + } + return f, nil + }, + wantClusterHealthy: true, + wantMemberErrors: true, + }, + { + name: "one out of three members is unreachable", + Endpoints: []string{"https://192.168.10.100:2379", "https://192.168.10.200:2379", "https://192.168.10.300:2379"}, + newEtcdClient: func(endpoints []string) (etcdClient, error) { + f := &fakeEtcdClientWithStatusResponse{ + statusResponses: map[string]*clientv3.StatusResponse{ + "https://192.168.10.100:2379": {}, + "https://192.168.10.200:2379": {}, + "https://192.168.10.300:2379": {}, + }, + statusRequestErrors: map[string]error{ + "https://192.168.10.200:2379": errors.New("context deadline exceeded"), + }, + } + return f, nil + }, + wantClusterHealthy: true, + wantMemberErrors: true, + }, + { + name: "two out of three members has errors", + Endpoints: []string{"https://192.168.10.100:2379", "https://192.168.10.200:2379", "https://192.168.10.300:2379"}, + newEtcdClient: func(endpoints []string) (etcdClient, error) { + f := &fakeEtcdClientWithStatusResponse{ + statusResponses: map[string]*clientv3.StatusResponse{ + "https://192.168.10.100:2379": {}, + "https://192.168.10.200:2379": {Errors: []string{"etcdserver: mvcc: database space exceeded"}}, + "https://192.168.10.300:2379": {Errors: []string{"etcdserver: mvcc: data corrupted"}}, + }, + } + return f, nil + }, + wantClusterHealthy: false, + wantMemberErrors: true, + }, + { + name: "two out of three members are unreachable", + Endpoints: []string{"https://192.168.10.100:2379", "https://192.168.10.200:2379", "https://192.168.10.300:2379"}, + newEtcdClient: func(endpoints []string) (etcdClient, error) { + f := &fakeEtcdClientWithStatusResponse{ + statusResponses: map[string]*clientv3.StatusResponse{ + "https://192.168.10.100:2379": {}, + "https://192.168.10.200:2379": {}, + "https://192.168.10.300:2379": {}, + }, + statusRequestErrors: map[string]error{ + "https://192.168.10.200:2379": errors.New("context deadline exceeded"), + "https://192.168.10.300:2379": errors.New("context deadline exceeded"), + }, + } + return f, nil + }, + wantClusterHealthy: false, + wantMemberErrors: true, + }, + } + + // Temporarily reduce the etcd API call timeout from 2 minutes to 1 second. + oldActiveTimeout := kubeadmapi.GetActiveTimeouts() + newActiveTimeout := oldActiveTimeout.DeepCopy() + newActiveTimeout.EtcdAPICall = &metav1.Duration{Duration: 1 * time.Second} + kubeadmapi.SetActiveTimeouts(newActiveTimeout) + defer func() { + kubeadmapi.SetActiveTimeouts(oldActiveTimeout) + }() + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + c := &Client{ + Endpoints: tc.Endpoints, + newEtcdClient: tc.newEtcdClient, + } + _, gotClusterHealthy, err := c.getClusterStatus() + + if gotClusterHealthy != tc.wantClusterHealthy { + t.Errorf("gotClusterHealthy = %t, want = %t", gotClusterHealthy, tc.wantClusterHealthy) + } + + if tc.wantMemberErrors != (err != nil) { + t.Errorf("gotMemberErrors = %v, wantMemberErrors = %t", err, tc.wantMemberErrors) + } + }) + } +} diff --git a/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/util/kubeconfig/kubeconfig.go b/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/util/kubeconfig/kubeconfig.go index 1b94364572..3ab63468b3 100644 --- a/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/util/kubeconfig/kubeconfig.go +++ b/deps/github.com/openshift/kubernetes/cmd/kubeadm/app/util/kubeconfig/kubeconfig.go @@ -18,12 +18,15 @@ package kubeconfig import ( "fmt" + "net" "os" + "strconv" clientset "k8s.io/client-go/kubernetes" "k8s.io/client-go/tools/clientcmd" clientcmdapi "k8s.io/client-go/tools/clientcmd/api" + kubeadmapi "k8s.io/kubernetes/cmd/kubeadm/app/apis/kubeadm" "k8s.io/kubernetes/cmd/kubeadm/app/util/errors" ) @@ -121,6 +124,18 @@ func GetClusterFromKubeConfig(config *clientcmdapi.Config) (string, *clientcmdap return "", nil, errors.Errorf("the current context is invalid: %s", config.CurrentContext) } +// PointKubeConfigToLocalAPIEndpoint modifies the provided kubeconfig to point to the given APIEndpoint. +func PointKubeConfigToLocalAPIEndpoint(config *clientcmdapi.Config, lae *kubeadmapi.APIEndpoint) { + for _, v := range config.Clusters { + v.Server = fmt.Sprintf("https://%s", + net.JoinHostPort( + lae.AdvertiseAddress, + strconv.Itoa(int(lae.BindPort)), + ), + ) + } +} + // HasAuthenticationCredentials returns true if the current user has valid authentication credentials for // token authentication, basic authentication or X509 authentication func HasAuthenticationCredentials(config *clientcmdapi.Config) bool { diff --git a/deps/github.com/openshift/kubernetes/openshift-hack/images/hyperkube/Dockerfile.rhel b/deps/github.com/openshift/kubernetes/openshift-hack/images/hyperkube/Dockerfile.rhel index f95e0d622b..5c8de9a5fe 100644 --- a/deps/github.com/openshift/kubernetes/openshift-hack/images/hyperkube/Dockerfile.rhel +++ b/deps/github.com/openshift/kubernetes/openshift-hack/images/hyperkube/Dockerfile.rhel @@ -15,4 +15,4 @@ COPY --from=builder /tmp/build/* /usr/bin/ LABEL io.k8s.display-name="OpenShift Kubernetes Server Commands" \ io.k8s.description="OpenShift is a platform for developing, building, and deploying containerized applications." \ io.openshift.tags="openshift,hyperkube" \ - io.openshift.build.versions="kubernetes=1.35.4" + io.openshift.build.versions="kubernetes=1.35.5" diff --git a/deps/github.com/openshift/kubernetes/pkg/kubelet/kubelet.go b/deps/github.com/openshift/kubernetes/pkg/kubelet/kubelet.go index 339231bf4c..bef11db1c3 100644 --- a/deps/github.com/openshift/kubernetes/pkg/kubelet/kubelet.go +++ b/deps/github.com/openshift/kubernetes/pkg/kubelet/kubelet.go @@ -653,6 +653,7 @@ func NewMainKubelet(ctx context.Context, nodeStatusMaxImages: nodeStatusMaxImages, tracer: tracer, nodeStartupLatencyTracker: kubeDeps.NodeStartupLatencyTracker, + podStartupLatencyTracker: kubeDeps.PodStartupLatencyTracker, healthChecker: kubeDeps.HealthChecker, flagz: kubeDeps.Flagz, } @@ -815,10 +816,11 @@ func NewMainKubelet(ctx context.Context, kubeCfg.MemorySwap.SwapBehavior, kubeDeps.ContainerManager.GetNodeAllocatableAbsolute, *kubeCfg.MemoryThrottlingFactor, - kubeDeps.PodStartupLatencyTracker, + klet.podStartupLatencyTracker, kubeDeps.TracerProvider, tokenManager, getServiceAccount, + klet.podStartupLatencyTracker, ) if err != nil { return nil, err @@ -1530,6 +1532,9 @@ type Kubelet struct { // Track node startup latencies nodeStartupLatencyTracker util.NodeStartupLatencyTracker + // Track pod startup latencies + podStartupLatencyTracker util.PodStartupLatencyTracker + // Health check kubelet healthChecker watchdog.HealthChecker diff --git a/deps/github.com/openshift/kubernetes/pkg/kubelet/kubelet_test.go b/deps/github.com/openshift/kubernetes/pkg/kubelet/kubelet_test.go index 3f69c2432f..c827695581 100644 --- a/deps/github.com/openshift/kubernetes/pkg/kubelet/kubelet_test.go +++ b/deps/github.com/openshift/kubernetes/pkg/kubelet/kubelet_test.go @@ -302,8 +302,8 @@ func newTestKubeletWithImageList( kubelet.configMapManager = configMapManager kubelet.mirrorPodClient = fakeMirrorClient kubelet.podManager = kubepod.NewBasicPodManager() - podStartupLatencyTracker := kubeletutil.NewPodStartupLatencyTracker() - kubelet.statusManager = status.NewManager(fakeKubeClient, kubelet.podManager, &statustest.FakePodDeletionSafetyProvider{}, podStartupLatencyTracker) + kubelet.podStartupLatencyTracker = kubeletutil.NewPodStartupLatencyTracker() + kubelet.statusManager = status.NewManager(fakeKubeClient, kubelet.podManager, &statustest.FakePodDeletionSafetyProvider{}, kubelet.podStartupLatencyTracker) kubelet.nodeStartupLatencyTracker = kubeletutil.NewNodeStartupLatencyTracker() kubelet.podCertificateManager = &podcertificate.NoOpManager{} @@ -3454,10 +3454,11 @@ func TestSyncPodSpans(t *testing.T) { kubeCfg.MemorySwap.SwapBehavior, kubelet.containerManager.GetNodeAllocatableAbsolute, *kubeCfg.MemoryThrottlingFactor, - kubeletutil.NewPodStartupLatencyTracker(), + kubelet.podStartupLatencyTracker, tp, token.NewManager(kubelet.kubeClient), func(string, string) (*v1.ServiceAccount, error) { return nil, nil }, + kubelet.podStartupLatencyTracker, ) assert.NoError(t, err) kubelet.allocationManager.SetContainerRuntime(kubelet.containerRuntime) diff --git a/deps/github.com/openshift/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go b/deps/github.com/openshift/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go index 818ca633a1..efb3034107 100644 --- a/deps/github.com/openshift/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go +++ b/deps/github.com/openshift/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go @@ -106,6 +106,11 @@ type podStateProvider interface { ShouldPodRuntimeBeRemoved(kubetypes.UID) bool } +type PodInitContainerTimeRecorder interface { + RecordInitContainerStarted(podUID kubetypes.UID, startedAt time.Time) + RecordInitContainerFinished(podUID kubetypes.UID, finishedAt time.Time) +} + type kubeGenericRuntimeManager struct { runtimeName string recorder record.EventRecorder @@ -191,6 +196,9 @@ type kubeGenericRuntimeManager struct { // Swap controller availability check function (Linux only) // Uses sync.OnceValue for lazy initialization getSwapControllerAvailable func() bool + + // Records first initContainer start time and last initContainer finish time + podInitContainerTimeRecorder PodInitContainerTimeRecorder } // KubeGenericRuntime is a interface contains interfaces for container runtime and command. @@ -240,6 +248,7 @@ func NewKubeGenericRuntimeManager( tracerProvider trace.TracerProvider, tokenManager *token.Manager, getServiceAccount plugin.GetServiceAccountFunc, + podInitContainerTimeRecorder PodInitContainerTimeRecorder, ) (KubeGenericRuntime, []images.PostImageGCHook, error) { logger := klog.FromContext(ctx) @@ -247,29 +256,30 @@ func NewKubeGenericRuntimeManager( imageService = newInstrumentedImageManagerService(imageService) tracer := tracerProvider.Tracer(instrumentationScope) kubeRuntimeManager := &kubeGenericRuntimeManager{ - recorder: recorder, - singleProcessOOMKill: singleProcessOOMKill, - cpuCFSQuota: cpuCFSQuota, - cpuCFSQuotaPeriod: cpuCFSQuotaPeriod, - seccompProfileRoot: filepath.Join(rootDirectory, "seccomp"), - livenessManager: livenessManager, - readinessManager: readinessManager, - startupManager: startupManager, - machineInfo: machineInfo, - osInterface: osInterface, - runtimeHelper: runtimeHelper, - runtimeService: runtimeService, - imageService: imageService, - containerManager: containerManager, - internalLifecycle: containerManager.InternalContainerLifecycle(), - logManager: logManager, - runtimeClassManager: runtimeClassManager, - logReduction: logreduction.NewLogReduction(identicalErrorDelay), - seccompDefault: seccompDefault, - memorySwapBehavior: memorySwapBehavior, - getNodeAllocatable: getNodeAllocatable, - memoryThrottlingFactor: memoryThrottlingFactor, - podLogsDirectory: podLogsDirectory, + recorder: recorder, + singleProcessOOMKill: singleProcessOOMKill, + cpuCFSQuota: cpuCFSQuota, + cpuCFSQuotaPeriod: cpuCFSQuotaPeriod, + seccompProfileRoot: filepath.Join(rootDirectory, "seccomp"), + livenessManager: livenessManager, + readinessManager: readinessManager, + startupManager: startupManager, + machineInfo: machineInfo, + osInterface: osInterface, + runtimeHelper: runtimeHelper, + runtimeService: runtimeService, + imageService: imageService, + containerManager: containerManager, + internalLifecycle: containerManager.InternalContainerLifecycle(), + logManager: logManager, + runtimeClassManager: runtimeClassManager, + logReduction: logreduction.NewLogReduction(identicalErrorDelay), + seccompDefault: seccompDefault, + memorySwapBehavior: memorySwapBehavior, + getNodeAllocatable: getNodeAllocatable, + memoryThrottlingFactor: memoryThrottlingFactor, + podLogsDirectory: podLogsDirectory, + podInitContainerTimeRecorder: podInitContainerTimeRecorder, } // Initialize swap controller availability check with lazy evaluation @@ -1665,6 +1675,14 @@ func (m *kubeGenericRuntimeManager) SyncPod(ctx context.Context, pod *v1.Pod, po } return err } + if typeName == "init container" { + // Don't measure restartable init containers (sidecars) + if !podutil.IsRestartableInitContainer(spec.container) { + if m.podInitContainerTimeRecorder != nil { + m.podInitContainerTimeRecorder.RecordInitContainerStarted(pod.UID, time.Now()) + } + } + } return nil } @@ -1694,6 +1712,21 @@ func (m *kubeGenericRuntimeManager) SyncPod(ctx context.Context, pod *v1.Pod, po logger.V(4).Info("Completed init container for pod", "containerName", container.Name, "pod", klog.KObj(pod)) } + for _, cs := range podStatus.ContainerStatuses { + // Check if this is an init container + for _, init := range pod.Spec.InitContainers { + // Don't measure restartable init containers (sidecars) + if podutil.IsRestartableInitContainer(&init) { + continue + } + if cs.Name == init.Name && cs.State == kubecontainer.ContainerStateExited && !cs.FinishedAt.IsZero() { + if m.podInitContainerTimeRecorder != nil { + m.podInitContainerTimeRecorder.RecordInitContainerFinished(pod.UID, cs.FinishedAt) + } + } + } + } + // Step 7: For containers in podContainerChanges.ContainersToUpdate[CPU,Memory] list, invoke UpdateContainerResources if resizable, _, _ := allocation.IsInPlacePodVerticalScalingAllowed(pod); resizable { if len(podContainerChanges.ContainersToUpdate) > 0 || podContainerChanges.UpdatePodResources || podContainerChanges.UpdatePodLevelResources { diff --git a/deps/github.com/openshift/kubernetes/pkg/kubelet/util/pod_startup_latency_tracker.go b/deps/github.com/openshift/kubernetes/pkg/kubelet/util/pod_startup_latency_tracker.go index d972815a78..d083464f4d 100644 --- a/deps/github.com/openshift/kubernetes/pkg/kubelet/util/pod_startup_latency_tracker.go +++ b/deps/github.com/openshift/kubernetes/pkg/kubelet/util/pod_startup_latency_tracker.go @@ -34,6 +34,8 @@ type PodStartupLatencyTracker interface { ObservedPodOnWatch(pod *v1.Pod, when time.Time) RecordImageStartedPulling(podUID types.UID) RecordImageFinishedPulling(podUID types.UID) + RecordInitContainerStarted(podUID types.UID, startedAt time.Time) + RecordInitContainerFinished(podUID types.UID, finishedAt time.Time) RecordStatusUpdated(pod *v1.Pod) DeletePodStartupState(podUID types.UID) } @@ -42,15 +44,26 @@ type basicPodStartupLatencyTracker struct { // protect against concurrent read and write on pods map lock sync.Mutex pods map[types.UID]*perPodState + // Track pods that were excluded from SLI due to unschedulability + // These pods should never be re-added even if they later become schedulable + excludedPods map[types.UID]bool // metrics for the first network pod only firstNetworkPodSeen bool // For testability clock clock.Clock } +type imagePullSession struct { + start time.Time + end time.Time +} type perPodState struct { - firstStartedPulling time.Time - lastFinishedPulling time.Time + // Session-based image pulling tracking for accurate overlap handling + imagePullSessions []imagePullSession + imagePullSessionsStarts []time.Time // Track multiple concurrent pull starts + // Init container tracking + totalInitContainerRuntime time.Duration + currentInitContainerStart time.Time // first time, when pod status changed into Running observedRunningTime time.Time // log, if pod latency was already Observed @@ -60,8 +73,9 @@ type perPodState struct { // NewPodStartupLatencyTracker creates an instance of PodStartupLatencyTracker func NewPodStartupLatencyTracker() PodStartupLatencyTracker { return &basicPodStartupLatencyTracker{ - pods: map[types.UID]*perPodState{}, - clock: clock.RealClock{}, + pods: map[types.UID]*perPodState{}, + excludedPods: map[types.UID]bool{}, + clock: clock.RealClock{}, } } @@ -77,13 +91,29 @@ func (p *basicPodStartupLatencyTracker) ObservedPodOnWatch(pod *v1.Pod, when tim state := p.pods[pod.UID] if state == nil { - // create a new record for pod, only if it was not yet acknowledged by the Kubelet - // this is required, as we want to log metric only for those pods, that where scheduled - // after Kubelet started + // if pod was previously unschedulable, don't track it again + if p.excludedPods[pod.UID] { + return + } + + // create a new record for pod if pod.Status.StartTime.IsZero() { - p.pods[pod.UID] = &perPodState{} + if isPodUnschedulable(pod) { + p.excludedPods[pod.UID] = true + return + } + + // if pod is schedulable then track it + state = &perPodState{} + p.pods[pod.UID] = state } + return + } + // remove existing pods from tracking (this handles cases where scheduling state becomes known later) + if isPodUnschedulable(pod) { + delete(p.pods, pod.UID) + p.excludedPods[pod.UID] = true return } @@ -102,29 +132,74 @@ func (p *basicPodStartupLatencyTracker) ObservedPodOnWatch(pod *v1.Pod, when tim ctx := context.TODO() logger := klog.FromContext(ctx) podStartingDuration := when.Sub(pod.CreationTimestamp.Time) - imagePullingDuration := state.lastFinishedPulling.Sub(state.firstStartedPulling) - podStartSLOduration := (podStartingDuration - imagePullingDuration).Seconds() + podStartSLOduration := podStartingDuration + + totalImagesPullingTime := calculateImagePullingTime(state.imagePullSessions) + if totalImagesPullingTime > 0 { + podStartSLOduration -= totalImagesPullingTime + } + + if state.totalInitContainerRuntime > 0 { + podStartSLOduration -= state.totalInitContainerRuntime + } + + podIsStateful := isStatefulPod(pod) logger.Info("Observed pod startup duration", "pod", klog.KObj(pod), - "podStartSLOduration", podStartSLOduration, + "podStartSLOduration", podStartSLOduration.Seconds(), "podStartE2EDuration", podStartingDuration, + "totalImagesPullingTime", totalImagesPullingTime, + "totalInitContainerRuntime", state.totalInitContainerRuntime, + "isStatefulPod", podIsStateful, "podCreationTimestamp", pod.CreationTimestamp.Time, - "firstStartedPulling", state.firstStartedPulling, - "lastFinishedPulling", state.lastFinishedPulling, + "imagePullSessionsCount", len(state.imagePullSessions), + "imagePullSessionsStartsCount", len(state.imagePullSessionsStarts), "observedRunningTime", state.observedRunningTime, "watchObservedRunningTime", when) - metrics.PodStartSLIDuration.WithLabelValues().Observe(podStartSLOduration) metrics.PodStartTotalDuration.WithLabelValues().Observe(podStartingDuration.Seconds()) + if !podIsStateful { + metrics.PodStartSLIDuration.WithLabelValues().Observe(podStartSLOduration.Seconds()) + // if is the first Pod with network track the start values + // these metrics will help to identify problems with the CNI plugin + if !pod.Spec.HostNetwork && !p.firstNetworkPodSeen { + metrics.FirstNetworkPodStartSLIDuration.Set(podStartSLOduration.Seconds()) + p.firstNetworkPodSeen = true + } + } state.metricRecorded = true - // if is the first Pod with network track the start values - // these metrics will help to identify problems with the CNI plugin - if !pod.Spec.HostNetwork && !p.firstNetworkPodSeen { - metrics.FirstNetworkPodStartSLIDuration.Set(podStartSLOduration) - p.firstNetworkPodSeen = true + } +} + +// calculateImagePullingTime computes the total time spent pulling images, +// accounting for overlapping pull sessions properly +func calculateImagePullingTime(sessions []imagePullSession) time.Duration { + if len(sessions) == 0 { + return 0 + } + + var totalTime time.Duration + var currentEnd time.Time + + for i, session := range sessions { + if session.end.IsZero() { + continue } + + if i == 0 || session.start.After(currentEnd) { + // First session or no overlap with previous session + totalTime += session.end.Sub(session.start) + currentEnd = session.end + } else if session.end.After(currentEnd) { + // Partial overlap - add only the non-overlapping part + totalTime += session.end.Sub(currentEnd) + currentEnd = session.end + } + // If session.end <= currentEnd, it's completely overlapped } + + return totalTime } func (p *basicPodStartupLatencyTracker) RecordImageStartedPulling(podUID types.UID) { @@ -136,9 +211,8 @@ func (p *basicPodStartupLatencyTracker) RecordImageStartedPulling(podUID types.U return } - if state.firstStartedPulling.IsZero() { - state.firstStartedPulling = p.clock.Now() - } + now := p.clock.Now() + state.imagePullSessionsStarts = append(state.imagePullSessionsStarts, now) } func (p *basicPodStartupLatencyTracker) RecordImageFinishedPulling(podUID types.UID) { @@ -150,8 +224,48 @@ func (p *basicPodStartupLatencyTracker) RecordImageFinishedPulling(podUID types. return } - if !state.firstStartedPulling.IsZero() { - state.lastFinishedPulling = p.clock.Now() // Now is always grater than values from the past. + now := p.clock.Now() + + // Complete the oldest pull session if we have active starts + if len(state.imagePullSessionsStarts) > 0 { + // Take the first (oldest) start and create a session + startTime := state.imagePullSessionsStarts[0] + session := imagePullSession{ + start: startTime, + end: now, + } + state.imagePullSessions = append(state.imagePullSessions, session) + state.imagePullSessionsStarts = state.imagePullSessionsStarts[1:] + } +} + +func (p *basicPodStartupLatencyTracker) RecordInitContainerStarted(podUID types.UID, startedAt time.Time) { + p.lock.Lock() + defer p.lock.Unlock() + + state := p.pods[podUID] + if state == nil { + return + } + + state.currentInitContainerStart = startedAt +} + +func (p *basicPodStartupLatencyTracker) RecordInitContainerFinished(podUID types.UID, finishedAt time.Time) { + p.lock.Lock() + defer p.lock.Unlock() + + state := p.pods[podUID] + if state == nil { + return + } + + if !state.currentInitContainerStart.IsZero() { + initDuration := finishedAt.Sub(state.currentInitContainerStart) + if initDuration > 0 { + state.totalInitContainerRuntime += initDuration + } + state.currentInitContainerStart = time.Time{} } } @@ -197,9 +311,43 @@ func hasPodStartedSLO(pod *v1.Pod) bool { return true } +// isStatefulPod determines if a pod is stateful according to the SLI documentation: +// "A stateful pod is defined as a pod that mounts at least one volume with sources +// other than secrets, config maps, downward API and empty dir." +// We also include Projected volumes since they are a collection of ephemeral types. +// ref: https://github.com/kubernetes/community/blob/master/sig-scalability/slos/pod_startup_latency.md +func isStatefulPod(pod *v1.Pod) bool { + for _, volume := range pod.Spec.Volumes { + // Check if this volume is NOT a stateless/ephemeral type + if volume.Secret == nil && + volume.ConfigMap == nil && + volume.DownwardAPI == nil && + volume.EmptyDir == nil && + volume.Projected == nil { + return true + } + } + return false +} + +// isPodUnschedulable determines if a pod should be excluded from SLI tracking +// according to the SLI definition: "By schedulable pod we mean a pod that has to be +// immediately (without actions from any other components) schedulable in the cluster +// without causing any preemption." +// Any pod with PodScheduled=False is not immediately schedulable and should be excluded. +func isPodUnschedulable(pod *v1.Pod) bool { + for _, condition := range pod.Status.Conditions { + if condition.Type == v1.PodScheduled && condition.Status == v1.ConditionFalse { + return true + } + } + return false +} + func (p *basicPodStartupLatencyTracker) DeletePodStartupState(podUID types.UID) { p.lock.Lock() defer p.lock.Unlock() delete(p.pods, podUID) + delete(p.excludedPods, podUID) } diff --git a/deps/github.com/openshift/kubernetes/pkg/kubelet/util/pod_startup_latency_tracker_test.go b/deps/github.com/openshift/kubernetes/pkg/kubelet/util/pod_startup_latency_tracker_test.go index 5679f0568d..578d849881 100644 --- a/deps/github.com/openshift/kubernetes/pkg/kubelet/util/pod_startup_latency_tracker_test.go +++ b/deps/github.com/openshift/kubernetes/pkg/kubelet/util/pod_startup_latency_tracker_test.go @@ -47,7 +47,8 @@ func TestNoEvents(t *testing.T) { metrics.Register() tracker := &basicPodStartupLatencyTracker{ - pods: map[types.UID]*perPodState{}, + pods: map[types.UID]*perPodState{}, + excludedPods: map[types.UID]bool{}, } if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), metricsName); err != nil { @@ -68,7 +69,8 @@ func TestPodsRunningBeforeKubeletStarted(t *testing.T) { metrics.Register() tracker := &basicPodStartupLatencyTracker{ - pods: map[types.UID]*perPodState{}, + pods: map[types.UID]*perPodState{}, + excludedPods: map[types.UID]bool{}, } if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), metricsName); err != nil { @@ -129,7 +131,7 @@ kubelet_pod_start_sli_duration_seconds_count 1 metrics.Register() tracker := &basicPodStartupLatencyTracker{ - pods: map[types.UID]*perPodState{}, + pods: map[types.UID]*perPodState{}, excludedPods: map[types.UID]bool{}, clock: fakeClock, } @@ -145,9 +147,9 @@ kubelet_pod_start_sli_duration_seconds_count 1 if !ok { t.Errorf("expected to track pod: %s, but pod not found", podInit.UID) } - if !podState.lastFinishedPulling.Equal(podState.firstStartedPulling.Add(time.Millisecond * 100)) { - t.Errorf("expected pod firstStartedPulling: %s and lastFinishedPulling: %s but got firstStartedPulling: %s and lastFinishedPulling: %s", - podState.firstStartedPulling, podState.firstStartedPulling.Add(time.Millisecond*100), podState.firstStartedPulling, podState.lastFinishedPulling) + + if len(podState.imagePullSessions) != 1 { + t.Errorf("expected one image pull session to be recorded") } podStarted := buildRunningPod() @@ -170,7 +172,7 @@ kubelet_pod_start_sli_duration_seconds_count 1 func TestSinglePodMultipleDownloadsAndRestartsRecorded(t *testing.T) { - t.Run("single pod; started in 30s, image pulling between 10th and 20th seconds", func(t *testing.T) { + t.Run("single pod; started in 30s, overlapping image pulling between 10th and 20th seconds", func(t *testing.T) { wants := ` # HELP kubelet_pod_start_sli_duration_seconds [ALPHA] Duration in seconds to start a pod, excluding time to pull images and run init containers, measured from pod creation timestamp to when all its containers are reported as started and observed via watch @@ -210,27 +212,25 @@ kubelet_pod_start_sli_duration_seconds_count 1 metrics.Register() tracker := &basicPodStartupLatencyTracker{ - pods: map[types.UID]*perPodState{}, + pods: map[types.UID]*perPodState{}, excludedPods: map[types.UID]bool{}, clock: fakeClock, } podInitializing := buildInitializingPod() tracker.ObservedPodOnWatch(podInitializing, frozenTime) - - // image pulling started at 10s and the last one finished at 30s - // first image starts pulling at 10s + // Image 1: 10-16s fakeClock.SetTime(frozenTime.Add(time.Second * 10)) tracker.RecordImageStartedPulling(podInitializing.UID) - // second image starts pulling at 11s + // Image 2: 11-18s fakeClock.SetTime(frozenTime.Add(time.Second * 11)) tracker.RecordImageStartedPulling(podInitializing.UID) - // third image starts pulling at 14s + // Image 3: 14-20s fakeClock.SetTime(frozenTime.Add(time.Second * 14)) tracker.RecordImageStartedPulling(podInitializing.UID) - // first image finished pulling at 18s + fakeClock.SetTime(frozenTime.Add(time.Second * 16)) + tracker.RecordImageFinishedPulling(podInitializing.UID) fakeClock.SetTime(frozenTime.Add(time.Second * 18)) tracker.RecordImageFinishedPulling(podInitializing.UID) - // second and third finished pulling at 20s fakeClock.SetTime(frozenTime.Add(time.Second * 20)) tracker.RecordImageFinishedPulling(podInitializing.UID) @@ -238,13 +238,13 @@ kubelet_pod_start_sli_duration_seconds_count 1 if !ok { t.Errorf("expected to track pod: %s, but pod not found", podInitializing.UID) } - if !podState.firstStartedPulling.Equal(frozenTime.Add(time.Second * 10)) { // second and third image start pulling should not affect pod firstStartedPulling - t.Errorf("expected pod firstStartedPulling: %s but got firstStartedPulling: %s", - podState.firstStartedPulling.Add(time.Second*10), podState.firstStartedPulling) + if len(podState.imagePullSessions) != 3 { + t.Errorf("expected 3 image pull sessions but got %d", len(podState.imagePullSessions)) } - if !podState.lastFinishedPulling.Equal(frozenTime.Add(time.Second * 20)) { // should be updated when the pod's last image finished pulling - t.Errorf("expected pod lastFinishedPulling: %s but got lastFinishedPulling: %s", - podState.lastFinishedPulling.Add(time.Second*20), podState.lastFinishedPulling) + totalTime := calculateImagePullingTime(podState.imagePullSessions) + expectedTime := time.Second * 10 + if totalTime != expectedTime { + t.Errorf("expected total pulling time: %v but got %v", expectedTime, totalTime) } // pod started @@ -275,6 +275,456 @@ kubelet_pod_start_sli_duration_seconds_count 1 }) } +func TestPodWithInitContainersAndMainContainers(t *testing.T) { + + t.Run("single pod with multiple init containers and main containers", func(t *testing.T) { + + wants := ` +# HELP kubelet_pod_start_sli_duration_seconds [ALPHA] Duration in seconds to start a pod, excluding time to pull images and run init containers, measured from pod creation timestamp to when all its containers are reported as started and observed via watch +# TYPE kubelet_pod_start_sli_duration_seconds histogram +kubelet_pod_start_sli_duration_seconds_bucket{le="0.5"} 0 +kubelet_pod_start_sli_duration_seconds_bucket{le="1"} 0 +kubelet_pod_start_sli_duration_seconds_bucket{le="2"} 0 +kubelet_pod_start_sli_duration_seconds_bucket{le="3"} 0 +kubelet_pod_start_sli_duration_seconds_bucket{le="4"} 0 +kubelet_pod_start_sli_duration_seconds_bucket{le="5"} 1 +kubelet_pod_start_sli_duration_seconds_bucket{le="6"} 1 +kubelet_pod_start_sli_duration_seconds_bucket{le="8"} 1 +kubelet_pod_start_sli_duration_seconds_bucket{le="10"} 1 +kubelet_pod_start_sli_duration_seconds_bucket{le="20"} 1 +kubelet_pod_start_sli_duration_seconds_bucket{le="30"} 1 +kubelet_pod_start_sli_duration_seconds_bucket{le="45"} 1 +kubelet_pod_start_sli_duration_seconds_bucket{le="60"} 1 +kubelet_pod_start_sli_duration_seconds_bucket{le="120"} 1 +kubelet_pod_start_sli_duration_seconds_bucket{le="180"} 1 +kubelet_pod_start_sli_duration_seconds_bucket{le="240"} 1 +kubelet_pod_start_sli_duration_seconds_bucket{le="300"} 1 +kubelet_pod_start_sli_duration_seconds_bucket{le="360"} 1 +kubelet_pod_start_sli_duration_seconds_bucket{le="480"} 1 +kubelet_pod_start_sli_duration_seconds_bucket{le="600"} 1 +kubelet_pod_start_sli_duration_seconds_bucket{le="900"} 1 +kubelet_pod_start_sli_duration_seconds_bucket{le="1200"} 1 +kubelet_pod_start_sli_duration_seconds_bucket{le="1800"} 1 +kubelet_pod_start_sli_duration_seconds_bucket{le="2700"} 1 +kubelet_pod_start_sli_duration_seconds_bucket{le="3600"} 1 +kubelet_pod_start_sli_duration_seconds_bucket{le="+Inf"} 1 +kubelet_pod_start_sli_duration_seconds_sum 4.2 +kubelet_pod_start_sli_duration_seconds_count 1 + ` + + fakeClock := testingclock.NewFakeClock(frozenTime) + + metrics.Register() + + tracker := &basicPodStartupLatencyTracker{ + pods: map[types.UID]*perPodState{}, excludedPods: map[types.UID]bool{}, + clock: fakeClock, + } + + podInit := buildInitializingPod("init-1", "init-2") + tracker.ObservedPodOnWatch(podInit, frozenTime) + + // Init container 1 image pull: 0.5s-1.5s + fakeClock.SetTime(frozenTime.Add(time.Millisecond * 500)) + tracker.RecordImageStartedPulling(podInit.UID) + fakeClock.SetTime(frozenTime.Add(time.Millisecond * 1500)) + tracker.RecordImageFinishedPulling(podInit.UID) + // Init container 1 runtime: 2s-4s + fakeClock.SetTime(frozenTime.Add(time.Second * 2)) + tracker.RecordInitContainerStarted(types.UID(uid), fakeClock.Now()) + fakeClock.SetTime(frozenTime.Add(time.Second * 4)) + tracker.RecordInitContainerFinished(types.UID(uid), fakeClock.Now()) + + // Init container 2 image pull: 4.2s-5.2s + fakeClock.SetTime(frozenTime.Add(time.Millisecond * 4200)) + tracker.RecordImageStartedPulling(podInit.UID) + fakeClock.SetTime(frozenTime.Add(time.Millisecond * 5200)) + tracker.RecordImageFinishedPulling(podInit.UID) + // Init container 2 runtime: 5.5s-7s + fakeClock.SetTime(frozenTime.Add(time.Millisecond * 5500)) + tracker.RecordInitContainerStarted(types.UID(uid), fakeClock.Now()) + fakeClock.SetTime(frozenTime.Add(time.Second * 7)) + tracker.RecordInitContainerFinished(types.UID(uid), fakeClock.Now()) + + // Main container 1 image pull: 7.2s-8.2s + fakeClock.SetTime(frozenTime.Add(time.Millisecond * 7200)) + tracker.RecordImageStartedPulling(podInit.UID) + fakeClock.SetTime(frozenTime.Add(time.Millisecond * 8200)) + tracker.RecordImageFinishedPulling(podInit.UID) + // Main container 2 image pull: 7.3s-8.5s + fakeClock.SetTime(frozenTime.Add(time.Millisecond * 7300)) + tracker.RecordImageStartedPulling(podInit.UID) + fakeClock.SetTime(frozenTime.Add(time.Millisecond * 8500)) + tracker.RecordImageFinishedPulling(podInit.UID) + + // Pod becomes running at 11s + podStarted := buildRunningPod() + tracker.RecordStatusUpdated(podStarted) + tracker.ObservedPodOnWatch(podStarted, frozenTime.Add(time.Second*11)) + + if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), metricsName); err != nil { + t.Fatal(err) + } + + state := tracker.pods[types.UID(uid)] + assert.NotNil(t, state, "Pod state should exist") + + expectedInitRuntime := 2*time.Second + 1500*time.Millisecond + assert.Equal(t, expectedInitRuntime, state.totalInitContainerRuntime, + "Total init container runtime should be 3.5s (2s + 1.5s)") + + totalImageTime := calculateImagePullingTime(state.imagePullSessions) + expectedImageTime := 3300 * time.Millisecond // 1s + 1s + 1.3s concurrent overlap + assert.Equal(t, expectedImageTime, totalImageTime, + "Image pulling time should be 3.3s: init-1(1s) + init-2(1s) + concurrent-main(1.3s)") + + assert.Len(t, state.imagePullSessions, 4, "Should have 4 image pull sessions") + + // cleanup + tracker.DeletePodStartupState(podStarted.UID) + + assert.Empty(t, tracker.pods) + metrics.PodStartSLIDuration.Reset() + }) +} + +func TestImmediatelySchedulablePods(t *testing.T) { + t.Run("pods not immediately schedulable should be excluded from tracking", func(t *testing.T) { + wants := "" + + fakeClock := testingclock.NewFakeClock(frozenTime) + metrics.Register() + + tracker := &basicPodStartupLatencyTracker{ + pods: map[types.UID]*perPodState{}, + excludedPods: map[types.UID]bool{}, + clock: fakeClock, + } + + // pod that is not immediately schedulable (PodScheduled=False) + podNotSchedulable := buildInitializingPod() + podNotSchedulable.UID = "not-schedulable-pod" + podNotSchedulable.Status.Conditions = []corev1.PodCondition{ + { + Type: corev1.PodScheduled, + Status: corev1.ConditionFalse, + Reason: "Unschedulable", + }, + } + tracker.ObservedPodOnWatch(podNotSchedulable, frozenTime) + assert.Empty(t, tracker.pods, "Pod with PodScheduled=False should not be tracked") + + // pod that is immediately schedulable (PodScheduled=True) + podSchedulable := buildInitializingPod() + podSchedulable.UID = "schedulable-pod" + podSchedulable.Status.Conditions = []corev1.PodCondition{ + { + Type: corev1.PodScheduled, + Status: corev1.ConditionTrue, + Reason: "Scheduled", + }, + } + tracker.ObservedPodOnWatch(podSchedulable, frozenTime) + assert.Len(t, tracker.pods, 1, "Pod with PodScheduled=True should be tracked") + + // pod without PodScheduled condition + podNoCondition := buildInitializingPod() + podNoCondition.UID = "no-condition-pod" + tracker.ObservedPodOnWatch(podNoCondition, frozenTime) + assert.Len(t, tracker.pods, 2, "Pod without PodScheduled condition should be tracked by default") + + // Verify metrics are empty + if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), metricsName); err != nil { + t.Fatal(err) + } + + // cleanup + tracker.DeletePodStartupState(podSchedulable.UID) + tracker.DeletePodStartupState(podNoCondition.UID) + assert.Empty(t, tracker.pods) + metrics.PodStartSLIDuration.Reset() + }) + + t.Run("pod observed as schedulable first, then becomes unschedulable should not be tracked", func(t *testing.T) { + wants := "" + + fakeClock := testingclock.NewFakeClock(frozenTime) + metrics.Register() + + tracker := &basicPodStartupLatencyTracker{ + pods: map[types.UID]*perPodState{}, excludedPods: map[types.UID]bool{}, + clock: fakeClock, + } + + // First observe pod as schedulable + podSchedulable := buildInitializingPod() + podSchedulable.UID = "becomes-unschedulable" + tracker.ObservedPodOnWatch(podSchedulable, frozenTime) + assert.Len(t, tracker.pods, 1, "Pod should be tracked initially") + + // Later observe the same pod as unschedulable + podUnschedulable := buildInitializingPod() + podUnschedulable.UID = "becomes-unschedulable" + podUnschedulable.Status.Conditions = []corev1.PodCondition{ + { + Type: corev1.PodScheduled, + Status: corev1.ConditionFalse, + Reason: "Unschedulable", + }, + } + + tracker.ObservedPodOnWatch(podUnschedulable, frozenTime.Add(time.Second)) + assert.Empty(t, tracker.pods, "Pod should be removed when it becomes unschedulable") + + // Verify no metrics + if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), metricsName); err != nil { + t.Fatal(err) + } + + metrics.PodStartSLIDuration.Reset() + }) + + t.Run("pod observed as unschedulable first, then becomes schedulable should remain not tracked", func(t *testing.T) { + wants := "" + + fakeClock := testingclock.NewFakeClock(frozenTime) + metrics.Register() + + tracker := &basicPodStartupLatencyTracker{ + pods: map[types.UID]*perPodState{}, + excludedPods: map[types.UID]bool{}, + clock: fakeClock, + } + + // First observe pod as unschedulable + podUnschedulable := buildInitializingPod() + podUnschedulable.UID = "unschedulable-first" + podUnschedulable.Status.Conditions = []corev1.PodCondition{ + { + Type: corev1.PodScheduled, + Status: corev1.ConditionFalse, + Reason: "Unschedulable", + }, + } + + tracker.ObservedPodOnWatch(podUnschedulable, frozenTime) + assert.Empty(t, tracker.pods, "Pod should not be tracked when first observed as unschedulable") + assert.True(t, tracker.excludedPods[podUnschedulable.UID], "Pod should be in excludedPods map") + + // Later observe the same pod as schedulable (e.g., after cluster autoscaling) + podSchedulable := buildInitializingPod() + podSchedulable.UID = "unschedulable-first" + podSchedulable.Status.Conditions = []corev1.PodCondition{ + { + Type: corev1.PodScheduled, + Status: corev1.ConditionTrue, + Reason: "Scheduled", + }, + } + + tracker.ObservedPodOnWatch(podSchedulable, frozenTime.Add(time.Second*5)) + assert.Empty(t, tracker.pods, "Pod should remain excluded even after becoming schedulable") + assert.True(t, tracker.excludedPods[podSchedulable.UID], "Pod should remain in excludedPods map") + + // Complete the startup process - should not record metrics + podRunning := buildRunningPod() + podRunning.UID = "unschedulable-first" + tracker.RecordStatusUpdated(podRunning) + tracker.ObservedPodOnWatch(podRunning, frozenTime.Add(time.Second*10)) + + // Verify no SLI metrics recorded + if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wants), metricsName); err != nil { + t.Fatal(err) + } + + // cleanup + tracker.DeletePodStartupState(podRunning.UID) + assert.Empty(t, tracker.pods) + assert.False(t, tracker.excludedPods[podRunning.UID], "Pod should be removed from excludedPods on cleanup") + metrics.PodStartSLIDuration.Reset() + }) +} + +func TestStatefulPodExclusion(t *testing.T) { + t.Run("stateful pods should be excluded from SLI metrics", func(t *testing.T) { + wantsSLI := "" + + fakeClock := testingclock.NewFakeClock(frozenTime) + metrics.Register() + + tracker := &basicPodStartupLatencyTracker{ + pods: map[types.UID]*perPodState{}, excludedPods: map[types.UID]bool{}, + clock: fakeClock, + } + + // Create a stateful pod (with PVC volume) + statefulPod := buildInitializingPod() + statefulPod.UID = "stateful-pod" + statefulPod.Spec.Volumes = []corev1.Volume{ + { + Name: "persistent-storage", + VolumeSource: corev1.VolumeSource{ + PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ + ClaimName: "test-pvc", + }, + }, + }, + } + + tracker.ObservedPodOnWatch(statefulPod, frozenTime) + + statefulPodRunning := buildRunningPod() + statefulPodRunning.UID = "stateful-pod" + statefulPodRunning.Spec.Volumes = statefulPod.Spec.Volumes + tracker.RecordStatusUpdated(statefulPodRunning) + + // Observe pod as running after 3 seconds + tracker.ObservedPodOnWatch(statefulPodRunning, frozenTime.Add(time.Second*3)) + + // Verify no SLI metrics for stateful pod ( + if err := testutil.GatherAndCompare(metrics.GetGather(), strings.NewReader(wantsSLI), metricsName); err != nil { + t.Fatal(err) + } + + // cleanup + tracker.DeletePodStartupState(statefulPod.UID) + assert.Empty(t, tracker.pods) + metrics.PodStartSLIDuration.Reset() + }) +} + +func TestIsStatefulPod(t *testing.T) { + tests := []struct { + name string + volumes []corev1.Volume + expected bool + }{ + { + name: "no volumes is stateless", + volumes: nil, + expected: false, + }, + { + name: "projected volume (such as service account token) is stateless", + volumes: []corev1.Volume{ + { + Name: "kube-api-access-abcde", + VolumeSource: corev1.VolumeSource{ + Projected: &corev1.ProjectedVolumeSource{ + Sources: []corev1.VolumeProjection{ + {ServiceAccountToken: &corev1.ServiceAccountTokenProjection{Path: "token"}}, + {ConfigMap: &corev1.ConfigMapProjection{ + LocalObjectReference: corev1.LocalObjectReference{Name: "kube-root-ca.crt"}, + }}, + {DownwardAPI: &corev1.DownwardAPIProjection{}}, + }, + }, + }, + }, + }, + expected: false, + }, + { + name: "projected volume with a secret and configmap is stateless", + volumes: []corev1.Volume{ + { + Name: "kube-api-access-abcde", + VolumeSource: corev1.VolumeSource{Projected: &corev1.ProjectedVolumeSource{}}, + }, + { + Name: "my-secret", + VolumeSource: corev1.VolumeSource{Secret: &corev1.SecretVolumeSource{SecretName: "s"}}, + }, + { + Name: "my-config", + VolumeSource: corev1.VolumeSource{ConfigMap: &corev1.ConfigMapVolumeSource{LocalObjectReference: corev1.LocalObjectReference{Name: "c"}}}, + }, + }, + expected: false, + }, + { + name: "PVC volume is stateful", + volumes: []corev1.Volume{ + { + Name: "data", + VolumeSource: corev1.VolumeSource{ + PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ClaimName: "pvc"}, + }, + }, + }, + expected: true, + }, + { + name: "PVC with a projected volume is stateful", + volumes: []corev1.Volume{ + { + Name: "kube-api-access-abcde", + VolumeSource: corev1.VolumeSource{Projected: &corev1.ProjectedVolumeSource{}}, + }, + { + Name: "data", + VolumeSource: corev1.VolumeSource{ + PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ClaimName: "pvc"}, + }, + }, + }, + expected: true, + }, + { + name: "hostPath volume is stateful", + volumes: []corev1.Volume{ + { + Name: "host-vol", + VolumeSource: corev1.VolumeSource{HostPath: &corev1.HostPathVolumeSource{Path: "/data"}}, + }, + }, + expected: true, + }, + { + name: "emptyDir is stateless", + volumes: []corev1.Volume{ + { + Name: "scratch", + VolumeSource: corev1.VolumeSource{EmptyDir: &corev1.EmptyDirVolumeSource{}}, + }, + }, + expected: false, + }, + { + name: "downwardAPI is stateless", + volumes: []corev1.Volume{ + { + Name: "podinfo", + VolumeSource: corev1.VolumeSource{DownwardAPI: &corev1.DownwardAPIVolumeSource{}}, + }, + }, + expected: false, + }, + { + name: "secret is stateless", + volumes: []corev1.Volume{ + { + Name: "my-secret", + VolumeSource: corev1.VolumeSource{Secret: &corev1.SecretVolumeSource{SecretName: "s"}}, + }, + }, + expected: false, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + pod := &corev1.Pod{ + Spec: corev1.PodSpec{ + Volumes: tc.volumes, + }, + } + result := isStatefulPod(pod) + assert.Equal(t, tc.expected, result, "isStatefulPod mismatch for %q", tc.name) + }) + } +} + func TestFirstNetworkPodMetrics(t *testing.T) { t.Run("first network pod; started in 30s, image pulling between 10th and 20th seconds", func(t *testing.T) { @@ -290,7 +740,7 @@ kubelet_first_network_pod_start_sli_duration_seconds 30 metrics.Register() tracker := &basicPodStartupLatencyTracker{ - pods: map[types.UID]*perPodState{}, + pods: map[types.UID]*perPodState{}, excludedPods: map[types.UID]bool{}, clock: fakeClock, } @@ -338,10 +788,20 @@ kubelet_first_network_pod_start_sli_duration_seconds 30 }) } -func buildInitializingPod() *corev1.Pod { - return buildPodWithStatus([]corev1.ContainerStatus{ +func buildInitializingPod(initContainerNames ...string) *corev1.Pod { + pod := buildPodWithStatus([]corev1.ContainerStatus{ {State: corev1.ContainerState{Waiting: &corev1.ContainerStateWaiting{Reason: "PodInitializing"}}}, }) + + // Add init containers if specified + if len(initContainerNames) > 0 { + pod.Spec.InitContainers = make([]corev1.Container, len(initContainerNames)) + for i, name := range initContainerNames { + pod.Spec.InitContainers[i] = corev1.Container{Name: name} + } + } + + return pod } func buildRunningPod() *corev1.Pod { @@ -361,3 +821,78 @@ func buildPodWithStatus(cs []corev1.ContainerStatus) *corev1.Pod { }, } } + +func TestCalculateImagePullingTime(t *testing.T) { + baseTime := time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC) + + t.Run("empty sessions should return zero", func(t *testing.T) { + sessions := []imagePullSession{} + result := calculateImagePullingTime(sessions) + assert.Equal(t, time.Duration(0), result) + }) + + t.Run("incomplete sessions should be ignored (for example: kubelet restart)", func(t *testing.T) { + sessions := []imagePullSession{ + {start: baseTime, end: baseTime.Add(3 * time.Second)}, // valid: 3s + {start: baseTime.Add(5 * time.Second), end: time.Time{}}, // incomplete, ignored + {start: baseTime.Add(10 * time.Second), end: baseTime.Add(12 * time.Second)}, // valid: 2s + } + result := calculateImagePullingTime(sessions) + assert.Equal(t, 5*time.Second, result) // 3s + 2s + }) + + t.Run("non-overlapping sessions", func(t *testing.T) { + sessions := []imagePullSession{ + {start: baseTime, end: baseTime.Add(2 * time.Second)}, // 2s + {start: baseTime.Add(5 * time.Second), end: baseTime.Add(8 * time.Second)}, // 3s + {start: baseTime.Add(10 * time.Second), end: baseTime.Add(15 * time.Second)}, // 5s + } + result := calculateImagePullingTime(sessions) + assert.Equal(t, 10*time.Second, result) // 2s + 3s + 5s + }) + + t.Run("partially overlapping sessions", func(t *testing.T) { + sessions := []imagePullSession{ + {start: baseTime, end: baseTime.Add(5 * time.Second)}, // 0-5s + {start: baseTime.Add(3 * time.Second), end: baseTime.Add(8 * time.Second)}, // 3-8s (overlap 3-5s) + {start: baseTime.Add(7 * time.Second), end: baseTime.Add(12 * time.Second)}, // 7-12s (overlap 7-8s) + } + result := calculateImagePullingTime(sessions) + assert.Equal(t, 12*time.Second, result) // 12s + }) + + t.Run("completely overlapped sessions", func(t *testing.T) { + sessions := []imagePullSession{ + {start: baseTime, end: baseTime.Add(10 * time.Second)}, // 0-10s + {start: baseTime.Add(2 * time.Second), end: baseTime.Add(8 * time.Second)}, // 2-8s (completely inside) + {start: baseTime.Add(3 * time.Second), end: baseTime.Add(5 * time.Second)}, // 3-5s (completely inside) + } + result := calculateImagePullingTime(sessions) + assert.Equal(t, 10*time.Second, result) // Only outer session: 10s + }) + + t.Run("partially and completely overlapping sessions", func(t *testing.T) { + sessions := []imagePullSession{ + {start: baseTime, end: baseTime.Add(10 * time.Second)}, // 0-10s + {start: baseTime.Add(2 * time.Second), end: baseTime.Add(5 * time.Second)}, // 2-5s (completely inside first) + {start: baseTime.Add(3 * time.Second), end: baseTime.Add(7 * time.Second)}, // 3-7s (completely inside first and partially overlapping second) + } + result := calculateImagePullingTime(sessions) + assert.Equal(t, 10*time.Second, result) // 10s + }) + + t.Run("completely overlapped and partially overlapped sessions with gap", func(t *testing.T) { + // Test mixing completely overlapped sessions with separate sessions + sessions := []imagePullSession{ + {start: baseTime, end: baseTime.Add(8 * time.Second)}, // 0-8s + {start: baseTime.Add(2 * time.Second), end: baseTime.Add(6 * time.Second)}, // 2-6s (completely inside first) + {start: baseTime.Add(3 * time.Second), end: baseTime.Add(5 * time.Second)}, // 3-5s (completely inside first) + {start: baseTime.Add(15 * time.Second), end: baseTime.Add(20 * time.Second)}, // 15-20s (separate) + {start: baseTime.Add(16 * time.Second), end: baseTime.Add(18 * time.Second)}, // 16-18s (completely inside fourth) + {start: baseTime.Add(17 * time.Second), end: baseTime.Add(21 * time.Second)}, // 17-21s (partially overlapping fourth and fifth) + } + result := calculateImagePullingTime(sessions) + // 8s (first group) + 6s (second group) = 14s + assert.Equal(t, 14*time.Second, result) + }) +} diff --git a/deps/github.com/openshift/kubernetes/pkg/proxy/iptables/proxier.go b/deps/github.com/openshift/kubernetes/pkg/proxy/iptables/proxier.go index d4f8cf13db..05551a7d2f 100644 --- a/deps/github.com/openshift/kubernetes/pkg/proxy/iptables/proxier.go +++ b/deps/github.com/openshift/kubernetes/pkg/proxy/iptables/proxier.go @@ -745,7 +745,9 @@ func (proxier *Proxier) syncProxyRules() (retryError error) { // Keep track of how long syncs take. start := time.Now() - doFullSync := proxier.needFullSync || (time.Since(proxier.lastFullSync) > proxyutil.FullSyncPeriod) + doFullSync := proxier.needFullSync || + // Avoid regular full syncs for large clusters. + ((time.Since(proxier.lastFullSync) > proxyutil.FullSyncPeriod) && !proxier.largeClusterMode) defer func() { metrics.SyncProxyRulesLatency.WithLabelValues(string(proxier.ipFamily)).Observe(metrics.SinceInSeconds(start)) diff --git a/deps/github.com/openshift/kubernetes/pkg/proxy/iptables/proxier_test.go b/deps/github.com/openshift/kubernetes/pkg/proxy/iptables/proxier_test.go index ca8d9a168c..e3b26698b9 100644 --- a/deps/github.com/openshift/kubernetes/pkg/proxy/iptables/proxier_test.go +++ b/deps/github.com/openshift/kubernetes/pkg/proxy/iptables/proxier_test.go @@ -5640,6 +5640,21 @@ func TestSyncProxyRulesLargeClusterMode(t *testing.T) { t.Errorf("numComments (%d) != 0 after partial resync when numEndpoints (%d) > threshold (%d)", numComments, expectedEndpoints+3, largeClusterEndpointsThreshold) } + // Even if FullSyncPeriod has elapsed, large-cluster mode should keep this as + // a partial resync when there are no explicit changes requiring a full sync. + if !fp.largeClusterMode { + t.Fatalf("expected to be in large cluster mode") + } + expectedLastFullSync := time.Now().Add(-proxyutil.FullSyncPeriod).Add(-time.Second) + fp.lastFullSync = expectedLastFullSync + err := fp.syncProxyRules() + if err != nil { + t.Fatalf("syncProxyRules failed: %v", err) + } + if !fp.lastFullSync.Equal(expectedLastFullSync) { + t.Fatalf("expected periodic sync in large cluster mode to skip full sync: lastFullSync changed from %v to %v", expectedLastFullSync, fp.lastFullSync) + } + // Now force a full resync and confirm that it rewrites the older services with // no comments as well. fp.forceSyncProxyRules() diff --git a/deps/github.com/openshift/kubernetes/pkg/proxy/winkernel/hns.go b/deps/github.com/openshift/kubernetes/pkg/proxy/winkernel/hns.go index 6c9aaa8579..392cd79cf9 100644 --- a/deps/github.com/openshift/kubernetes/pkg/proxy/winkernel/hns.go +++ b/deps/github.com/openshift/kubernetes/pkg/proxy/winkernel/hns.go @@ -32,7 +32,10 @@ import ( type HostNetworkService interface { getNetworkByName(name string) (*hnsNetworkInfo, error) - getAllEndpointsByNetwork(networkName string) (map[string]*endpointInfo, error) + // Returns a map of endpoints keyed by both endpoint ID and IP address for all endpoints on the specified network, and a map of remote endpoints with duplicate IPs to be deleted. + getAllEndpointsByNetwork(networkName string) (map[string]*endpointInfo, map[string]bool, error) + // deleteAllRemoteEndpointsWithDupIP deletes all remote endpoints with duplicate IPs that were found in getAllEndpointsByNetwork. This is needed to clean up stale remote endpoints that can be left behind due to a Windows bug. + deleteAllRemoteEndpointsWithDupIP(remoteEPsWithDupIP map[string]bool) getEndpointByID(id string) (*endpointInfo, error) getEndpointByIpAddress(ip string, networkName string) (*endpointInfo, error) getEndpointByName(id string) (*endpointInfo, error) @@ -115,17 +118,20 @@ func (hns hns) getNetworkByName(name string) (*hnsNetworkInfo, error) { }, nil } -func (hns hns) getAllEndpointsByNetwork(networkName string) (map[string]*(endpointInfo), error) { +func (hns hns) getAllEndpointsByNetwork(networkName string) (map[string]*(endpointInfo), map[string]bool, error) { hcnnetwork, err := hns.hcn.GetNetworkByName(networkName) if err != nil { klog.ErrorS(err, "failed to get HNS network by name", "name", networkName) - return nil, err + return nil, nil, err } endpoints, err := hns.hcn.ListEndpointsOfNetwork(hcnnetwork.Id) if err != nil { - return nil, fmt.Errorf("failed to list endpoints: %w", err) + return nil, nil, fmt.Errorf("failed to list endpoints: %w", err) } + endpointInfos := make(map[string]*(endpointInfo)) + remoteEPsWithDupIP := make(map[string]bool) + for _, ep := range endpoints { if len(ep.IpConfigurations) == 0 { @@ -143,14 +149,22 @@ func (hns hns) getAllEndpointsByNetwork(networkName string) (map[string]*(endpoi break } - isLocal := uint32(ep.Flags&hcn.EndpointFlagsRemoteEndpoint) == 0 - - if existingEp, ok := endpointInfos[ipConfig.IpAddress]; ok && isLocal { - // If the endpoint is already part of the queried endpoints map and is local, - // then we should not add it again to the map - // This is to avoid overwriting the remote endpoint info with a local endpoint. - klog.V(3).InfoS("Endpoint already exists in queried endpoints map; skipping.", "newLocalEndpoint", ep, "ipConfig", ipConfig, "existingEndpoint", existingEp) - continue + curEpIsLocal := uint32(ep.Flags&hcn.EndpointFlagsRemoteEndpoint) == 0 + + if existingEp, ok := endpointInfos[ipConfig.IpAddress]; ok { + if curEpIsLocal && !existingEp.isLocal { + // Local found, stale remote in map → delete remote from HNS, overwrite + remoteEPsWithDupIP[existingEp.hnsID] = true + delete(endpointInfos, existingEp.hnsID) + delete(endpointInfos, existingEp.ip) + // fall through to add local + } else if !curEpIsLocal && existingEp.isLocal { + // Local already in map, remote arriving → delete remote from HNS, skip + remoteEPsWithDupIP[ep.Id] = true + continue + } else { + continue // same type, keep existing + } } // Add to map with key endpoint ID or IP address @@ -158,7 +172,7 @@ func (hns hns) getAllEndpointsByNetwork(networkName string) (map[string]*(endpoi // TODO: Store by IP only and remove any lookups by endpoint ID. epInfo := &endpointInfo{ ip: ipConfig.IpAddress, - isLocal: isLocal, + isLocal: curEpIsLocal, macAddress: ep.MacAddress, hnsID: ep.Id, hns: hns, @@ -173,7 +187,17 @@ func (hns hns) getAllEndpointsByNetwork(networkName string) (map[string]*(endpoi } klog.V(3).InfoS("Queried endpoints from network", "network", networkName, "count", len(endpointInfos)) klog.V(5).InfoS("Queried endpoints details", "network", networkName, "endpointInfos", endpointInfos) - return endpointInfos, nil + return endpointInfos, remoteEPsWithDupIP, nil +} + +func (hns hns) deleteAllRemoteEndpointsWithDupIP(remoteEPsWithDupIP map[string]bool) { + for hnsID := range remoteEPsWithDupIP { + klog.V(3).InfoS("Deleting stale remote endpoint with duplicate IP", "hnsID", hnsID) + err := hns.deleteEndpoint(hnsID) + if err != nil { + klog.ErrorS(err, "Failed to delete stale remote endpoint with duplicate IP", "hnsID", hnsID) + } + } } func (hns hns) getEndpointByID(id string) (*endpointInfo, error) { diff --git a/deps/github.com/openshift/kubernetes/pkg/proxy/winkernel/hns_test.go b/deps/github.com/openshift/kubernetes/pkg/proxy/winkernel/hns_test.go index 604eec7444..ec504ab906 100644 --- a/deps/github.com/openshift/kubernetes/pkg/proxy/winkernel/hns_test.go +++ b/deps/github.com/openshift/kubernetes/pkg/proxy/winkernel/hns_test.go @@ -95,7 +95,7 @@ func TestGetAllEndpointsByNetwork(t *testing.T) { t.Error(err) } - mapEndpointsInfo, err := hns.getAllEndpointsByNetwork(Network.Name) + mapEndpointsInfo, _, err := hns.getAllEndpointsByNetwork(Network.Name) if err != nil { t.Error(err) } @@ -157,24 +157,25 @@ func TestGetAllEndpointsByNetworkWithDupEP(t *testing.T) { t.Error(err) } - mapEndpointsInfo, err := hns.getAllEndpointsByNetwork(Network.Name) + mapEndpointsInfo, remoteEPsWithDupIP, err := hns.getAllEndpointsByNetwork(Network.Name) if err != nil { t.Error(err) } + hns.deleteAllRemoteEndpointsWithDupIP(remoteEPsWithDupIP) endpointIpv4, ipv4EpPresent := mapEndpointsInfo[ipv4Config.IpAddress] assert.True(t, ipv4EpPresent, "IPV4 endpoint is missing in Dualstack mode") assert.Equal(t, endpointIpv4.ip, epIpAddress, "IPV4 IP is missing in Dualstack mode") - assert.Equal(t, endpointIpv4.hnsID, remoteEndpoint.Id, "HNS ID is not matching with remote endpoint") + assert.Equal(t, endpointIpv4.hnsID, dupLocalEndpoint.Id, "HNS ID is not matching with local endpoint") endpointIpv6, ipv6EpPresent := mapEndpointsInfo[ipv6Config.IpAddress] assert.True(t, ipv6EpPresent, "IPV6 endpoint is missing in Dualstack mode") assert.Equal(t, endpointIpv6.ip, epIpv6Address, "IPV6 IP is missing in Dualstack mode") - assert.Equal(t, endpointIpv6.hnsID, remoteEndpoint.Id, "HNS ID is not matching with remote endpoint") + assert.Equal(t, endpointIpv6.hnsID, dupLocalEndpoint.Id, "HNS ID is not matching with local endpoint") - err = hns.hcn.DeleteEndpoint(remoteEndpoint) - if err != nil { - t.Error(err) - } + remoteEpExists, _ := hns.hcn.GetEndpointByID(remoteEndpoint.Id) + assert.Nil(t, remoteEpExists, "Remote endpoint with duplicate IP should have been deleted") + + // Clean up the duplicate local endpoint err = hns.hcn.DeleteEndpoint(dupLocalEndpoint) if err != nil { t.Error(err) diff --git a/deps/github.com/openshift/kubernetes/pkg/proxy/winkernel/proxier.go b/deps/github.com/openshift/kubernetes/pkg/proxy/winkernel/proxier.go index 46ed064b62..4cb12eda38 100644 --- a/deps/github.com/openshift/kubernetes/pkg/proxy/winkernel/proxier.go +++ b/deps/github.com/openshift/kubernetes/pkg/proxy/winkernel/proxier.go @@ -184,7 +184,8 @@ type remoteSubnetInfo struct { } const ( - NETWORK_TYPE_OVERLAY = "overlay" + NETWORK_TYPE_OVERLAY = "overlay" + NETWORK_TYPE_L2BRIDGE = "L2Bridge" // MAX_COUNT_STALE_LOADBALANCERS is the maximum number of stale loadbalancers which cleanedup in single syncproxyrules. // If there are more stale loadbalancers to clean, it will go to next iteration of syncproxyrules. MAX_COUNT_STALE_LOADBALANCERS = 20 @@ -1243,7 +1244,9 @@ func (proxier *Proxier) syncProxyRules() (retryError error) { _ = proxier.endpointsMap.Update(proxier.endpointsChanges) // Query HNS for endpoints and load balancers - queriedEndpoints, err := hns.getAllEndpointsByNetwork(hnsNetworkName) + queriedEndpoints, remoteEPsWithDupIP, err := hns.getAllEndpointsByNetwork(hnsNetworkName) + defer hns.deleteAllRemoteEndpointsWithDupIP(remoteEPsWithDupIP) + if err != nil { klog.ErrorS(err, "Querying HNS for endpoints failed") return @@ -1788,23 +1791,30 @@ func (proxier *Proxier) syncProxyRules() (retryError error) { } // remove stale endpoint refcount entries + proxier.deleteTerminatedEndpoints(queriedEndpoints) + + // This will cleanup stale load balancers which are pending delete + // in last iteration + proxier.cleanupStaleLoadbalancers() + return +} + +func (proxier *Proxier) deleteTerminatedEndpoints(queriedEndpoints map[string]*(endpointInfo)) { for epIP := range proxier.terminatedEndpoints { klog.V(5).InfoS("Terminated endpoints ready for deletion", "epIP", epIP) if epToDelete := queriedEndpoints[epIP]; epToDelete != nil && epToDelete.hnsID != "" && !epToDelete.IsLocal() { - if refCount := proxier.endPointsRefCount.getRefCount(epToDelete.hnsID); refCount == nil || *refCount == 0 { - err := proxier.hns.deleteEndpoint(epToDelete.hnsID) - if err != nil { + refCount := proxier.endPointsRefCount.getRefCount(epToDelete.hnsID) + if refCount == nil || *refCount == 0 { + if err := proxier.hns.deleteEndpoint(epToDelete.hnsID); err != nil { klog.ErrorS(err, "Deleting unreferenced remote endpoint failed", "hnsID", epToDelete.hnsID) } else { klog.V(3).InfoS("Deleting unreferenced remote endpoint succeeded", "hnsID", epToDelete.hnsID, "IP", epToDelete.ip) } + } else { + klog.V(3).InfoS("Not deleting remote endpoint as it is still referenced", "hnsID", epToDelete.hnsID, "IP", epToDelete.ip, "refCount", refCount) } } } - // This will cleanup stale load balancers which are pending delete - // in last iteration - proxier.cleanupStaleLoadbalancers() - return } // deleteExistingLoadBalancer checks whether loadbalancer delete is needed or not. diff --git a/deps/github.com/openshift/kubernetes/pkg/proxy/winkernel/proxier_test.go b/deps/github.com/openshift/kubernetes/pkg/proxy/winkernel/proxier_test.go index d6492a9297..4ebce722b9 100644 --- a/deps/github.com/openshift/kubernetes/pkg/proxy/winkernel/proxier_test.go +++ b/deps/github.com/openshift/kubernetes/pkg/proxy/winkernel/proxier_test.go @@ -2004,3 +2004,674 @@ type testHostMacProvider struct { func (r *testHostMacProvider) GetHostMac(nodeIP net.IP) string { return r.macAddress } + +// TestRemoteAndLocalEndpointsSameIP demonstrates a reference counting issue +// when two services share an endpoint with the same IP address, where one +// service treats it as local (NodeName matches proxy hostname) and the other +// treats it as remote (NodeName doesn't match). The remote proxy endpoint +// resolves to the local HNS endpoint, causing its refCount to never be +// incremented via the shared endPointsRefCount map. +func TestRemoteAndLocalEndpointsSameIP(t *testing.T) { + proxier := NewFakeProxier(t, testNodeName, netutils.ParseIPSloppy("10.0.0.1"), NETWORK_TYPE_L2BRIDGE, false) + if proxier == nil { + t.Error("Failed to create proxier") + } + + sharedEPIP := epIpAddressLocal1 // "192.168.4.4" — same IP for both services + + svcIP1 := "10.20.30.41" + svcPort1 := 80 + svcPortName1 := proxy.ServicePortName{ + NamespacedName: makeNSN("ns1", "svc1"), + Port: "p80", + Protocol: v1.ProtocolTCP, + } + + svcIP2 := "10.20.30.42" + svcPort2 := 80 + svcPortName2 := proxy.ServicePortName{ + NamespacedName: makeNSN("ns1", "svc2"), + Port: "p80", + Protocol: v1.ProtocolTCP, + } + + makeServiceMap(proxier, + // svc1 uses the endpoint as LOCAL + makeTestService(svcPortName1.Namespace, svcPortName1.Name, func(svc *v1.Service) { + svc.Spec.Type = v1.ServiceTypeClusterIP + svc.Spec.ClusterIP = svcIP1 + svc.Spec.Ports = []v1.ServicePort{{ + Name: svcPortName1.Port, + Port: int32(svcPort1), + Protocol: v1.ProtocolTCP, + }} + }), + // svc2 uses the endpoint as REMOTE + makeTestService(svcPortName2.Namespace, svcPortName2.Name, func(svc *v1.Service) { + svc.Spec.Type = v1.ServiceTypeClusterIP + svc.Spec.ClusterIP = svcIP2 + svc.Spec.Ports = []v1.ServicePort{{ + Name: svcPortName2.Port, + Port: int32(svcPort2), + Protocol: v1.ProtocolTCP, + }} + }), + ) + + populateEndpointSlices(proxier, + // svc1's endpoint: local (NodeName = "testhost" matches proxy hostname) + makeTestEndpointSlice(svcPortName1.Namespace, svcPortName1.Name, 1, func(eps *discovery.EndpointSlice) { + eps.AddressType = discovery.AddressTypeIPv4 + eps.Endpoints = []discovery.Endpoint{{ + Addresses: []string{sharedEPIP}, + NodeName: ptr.To(testNodeName), + }} + eps.Ports = []discovery.EndpointPort{{ + Name: ptr.To(svcPortName1.Port), + Port: ptr.To(int32(svcPort1)), + Protocol: ptr.To(v1.ProtocolTCP), + }} + }), + // svc2's endpoint: remote (NodeName = "testhost2" doesn't match proxy hostname) + makeTestEndpointSlice(svcPortName2.Namespace, svcPortName2.Name, 1, func(eps *discovery.EndpointSlice) { + eps.AddressType = discovery.AddressTypeIPv4 + eps.Endpoints = []discovery.Endpoint{{ + Addresses: []string{sharedEPIP}, + NodeName: ptr.To("testhost2"), + }} + eps.Ports = []discovery.EndpointPort{{ + Name: ptr.To(svcPortName2.Port), + Port: ptr.To(int32(svcPort2)), + Protocol: ptr.To(v1.ProtocolTCP), + }} + }), + ) + + // Pre-populate the local HNS endpoint at sharedEPIP (as CNI would create it) + hcnMock := (proxier.hcn).(*fakehcn.HcnMock) + hcnMock.PopulateQueriedEndpoints(endpointLocal1, networkId, sharedEPIP, macAddressLocal1, prefixLen) + + proxier.setInitialized(true) + proxier.syncProxyRules() + + // Find each service's endpoint + var localEp, remoteEp *endpointInfo + for _, ep := range proxier.endpointsMap[svcPortName1] { + if epI, ok := ep.(*endpointInfo); ok && epI.ip == sharedEPIP { + localEp = epI + } + } + for _, ep := range proxier.endpointsMap[svcPortName2] { + if epI, ok := ep.(*endpointInfo); ok && epI.ip == sharedEPIP { + remoteEp = epI + } + } + + assert.NotNil(t, localEp, "Expected to find local endpoint for svc1") + assert.NotNil(t, remoteEp, "Expected to find remote endpoint for svc2") + + // Both should resolve to the same local HNS endpoint + assert.Equal(t, endpointLocal1, localEp.hnsID, + "Local ep should have the pre-populated HNS endpoint ID") + assert.Equal(t, endpointLocal1, remoteEp.hnsID, + "Remote ep should resolve to the same local HNS endpoint ID") + + // Verify the endpoint locality as seen by the proxy layer + assert.True(t, localEp.IsLocal(), "svc1's endpoint should be local") + assert.False(t, remoteEp.IsLocal(), "svc2's endpoint should be remote") + + // The remote ep's refCount was never incremented via endPointsRefCount + // because the resolved HNS endpoint is local (newHnsEndpoint.IsLocal()=true), + // so the code took the hnsLocalEndpoints branch instead of incrementing the + // shared refCount. The remote ep retains its private refCount (value 0). + assert.NotNil(t, remoteEp.refCount, "Remote ep refCount pointer should not be nil") + assert.Equal(t, uint16(0), *remoteEp.refCount, + "Remote ep refCount should be 0 — it was never incremented because "+ + "the HNS endpoint is local, exposing a refCount tracking gap") + + // The shared endPointsRefCount map should not have an entry for this + // HNS endpoint (or if it does from some other path, it should be 0), + // because refCounts are only tracked for remote HNS endpoints. + if refCountPtr, exists := proxier.endPointsRefCount[endpointLocal1]; exists { + assert.Equal(t, uint16(0), *refCountPtr, + "Shared refCount for the local HNS endpoint should be 0") + } + // If the entry doesn't exist at all, that also confirms the issue: + // the remote proxy endpoint has no shared refCount tracking. +} + +// TestRemoteEndpointDeleteDoesNotAffectLocalEndpointWithSameIP verifies that +// when a remote endpoint is deleted (due to endpoint map changes), a local +// endpoint with the same IP address is not incorrectly deleted. +// +// Flow: +// 1. Create a local endpoint (IP-A) and a remote endpoint (IP-B). +// 2. Two services (svc1, svc2) both use IP-A and IP-B. Remote endpoint refCount = 2. +// 3. Add a third service (svc3) with a local endpoint at IP-B (same IP as the remote). +// Remote refCount stays 2; local endpoint refCount is 0 (locals are not ref-counted). +// 4. Remove endpoints for svc1 and svc2. The remote endpoint at IP-B should be +// cleaned up (refCount drops to 0), but the local endpoint at IP-B for svc3 +// must survive. +func TestRemoteEndpointDeleteDoesNotAffectLocalEndpointWithSameIP(t *testing.T) { + proxier := NewFakeProxier(t, testNodeName, netutils.ParseIPSloppy("10.0.0.1"), NETWORK_TYPE_L2BRIDGE, false) + if proxier == nil { + t.Fatal("Failed to create proxier") + } + + localIP := epIpAddressLocal1 // "192.168.4.4" + remoteIP := epIpAddressRemote // "192.168.2.3" + + svcIP1 := "10.20.30.41" + svcPort1 := 80 + svcNodePort1 := 3001 + svcPortName1 := proxy.ServicePortName{ + NamespacedName: makeNSN("ns1", "svc1"), + Port: "p80", + Protocol: v1.ProtocolTCP, + } + + svcIP2 := "10.20.30.42" + svcPort2 := 80 + svcNodePort2 := 3002 + svcPortName2 := proxy.ServicePortName{ + NamespacedName: makeNSN("ns1", "svc2"), + Port: "p80", + Protocol: v1.ProtocolTCP, + } + + svcIP3 := "10.20.30.43" + svcPort3 := 80 + svcPortName3 := proxy.ServicePortName{ + NamespacedName: makeNSN("ns1", "svc3"), + Port: "p80", + Protocol: v1.ProtocolTCP, + } + + // Step 1+2: Create two services (svc1, svc2) each with a local endpoint (IP-A) + // and a remote endpoint (IP-B). + makeServiceMap(proxier, + makeTestService(svcPortName1.Namespace, svcPortName1.Name, func(svc *v1.Service) { + svc.Spec.Type = "NodePort" + svc.Spec.ClusterIP = svcIP1 + svc.Spec.Ports = []v1.ServicePort{{ + Name: svcPortName1.Port, + Port: int32(svcPort1), + Protocol: v1.ProtocolTCP, + NodePort: int32(svcNodePort1), + }} + }), + makeTestService(svcPortName2.Namespace, svcPortName2.Name, func(svc *v1.Service) { + svc.Spec.Type = "NodePort" + svc.Spec.ClusterIP = svcIP2 + svc.Spec.Ports = []v1.ServicePort{{ + Name: svcPortName2.Port, + Port: int32(svcPort2), + Protocol: v1.ProtocolTCP, + NodePort: int32(svcNodePort2), + }} + }), + ) + + populateEndpointSlices(proxier, + makeTestEndpointSlice(svcPortName1.Namespace, svcPortName1.Name, 1, func(eps *discovery.EndpointSlice) { + eps.AddressType = discovery.AddressTypeIPv4 + eps.Endpoints = []discovery.Endpoint{ + { + Addresses: []string{localIP}, + NodeName: ptr.To(testNodeName), + }, + { + Addresses: []string{remoteIP}, + }, + } + eps.Ports = []discovery.EndpointPort{{ + Name: ptr.To(svcPortName1.Port), + Port: ptr.To(int32(svcPort1)), + Protocol: ptr.To(v1.ProtocolTCP), + }} + }), + makeTestEndpointSlice(svcPortName2.Namespace, svcPortName2.Name, 1, func(eps *discovery.EndpointSlice) { + eps.AddressType = discovery.AddressTypeIPv4 + eps.Endpoints = []discovery.Endpoint{ + { + Addresses: []string{localIP}, + NodeName: ptr.To(testNodeName), + }, + { + Addresses: []string{remoteIP}, + }, + } + eps.Ports = []discovery.EndpointPort{{ + Name: ptr.To(svcPortName2.Port), + Port: ptr.To(int32(svcPort2)), + Protocol: ptr.To(v1.ProtocolTCP), + }} + }), + ) + + // Pre-populate the local HNS endpoint (as CNI would create it). + hcnMock := (proxier.hcn).(*fakehcn.HcnMock) + hcnMock.PopulateQueriedEndpoints(endpointLocal1, networkId, localIP, macAddressLocal1, prefixLen) + + proxier.setInitialized(true) + proxier.syncProxyRules() + + // Find the remote endpoint for svc1 and assert refCount == 2. + var remoteEpInfo *endpointInfo + var remoteEpHnsID string + for _, ep := range proxier.endpointsMap[svcPortName1] { + epI, ok := ep.(*endpointInfo) + if ok && epI.ip == remoteIP { + remoteEpInfo = epI + remoteEpHnsID = epI.hnsID + } + } + assert.NotNil(t, remoteEpInfo, "Expected to find remote endpoint for svc1") + assert.NotEmpty(t, remoteEpHnsID, "Remote endpoint should have an HNS ID") + assert.Equal(t, uint16(2), *remoteEpInfo.refCount, + "Remote endpoint refCount should be 2 after two services reference it") + assert.Equal(t, *proxier.endPointsRefCount[remoteEpHnsID], *remoteEpInfo.refCount, + "Global and endpoint refCounts should match") + + // Step 3: Add svc3 with a local endpoint at IP-B (same IP as the remote endpoint). + proxier.setInitialized(false) + + proxier.OnServiceAdd( + makeTestService(svcPortName3.Namespace, svcPortName3.Name, func(svc *v1.Service) { + svc.Spec.Type = v1.ServiceTypeClusterIP + svc.Spec.ClusterIP = svcIP3 + svc.Spec.Ports = []v1.ServicePort{{ + Name: svcPortName3.Port, + Port: int32(svcPort3), + Protocol: v1.ProtocolTCP, + }} + })) + proxier.mu.Lock() + proxier.servicesSynced = true + proxier.mu.Unlock() + + proxier.OnEndpointSliceAdd( + makeTestEndpointSlice(svcPortName3.Namespace, svcPortName3.Name, 1, func(eps *discovery.EndpointSlice) { + eps.AddressType = discovery.AddressTypeIPv4 + eps.Endpoints = []discovery.Endpoint{{ + Addresses: []string{remoteIP}, + NodeName: ptr.To(testNodeName), // Local because NodeName matches + }} + eps.Ports = []discovery.EndpointPort{{ + Name: ptr.To(svcPortName3.Port), + Port: ptr.To(int32(svcPort3)), + Protocol: ptr.To(v1.ProtocolTCP), + }} + })) + proxier.mu.Lock() + proxier.endpointSlicesSynced = true + proxier.mu.Unlock() + + // Pre-populate the new local HNS endpoint at IP-B. + hcnMock.PopulateQueriedEndpoints(endpointLocal2, networkId, remoteIP, macAddressLocal2, prefixLen) + + proxier.setInitialized(true) + proxier.syncProxyRules() + + // The remote endpoint refCount should still be 2 (svc1 and svc2 still reference it). + remoteEpInfo = nil + for _, ep := range proxier.endpointsMap[svcPortName1] { + epI, ok := ep.(*endpointInfo) + if ok && epI.ip == remoteIP { + remoteEpInfo = epI + } + } + assert.NotNil(t, remoteEpInfo, "Expected remote endpoint still present for svc1") + assert.Equal(t, uint16(2), *remoteEpInfo.refCount, + "Remote endpoint refCount should still be 2 after adding local endpoint with same IP") + + // Find svc3's local endpoint at IP-B. Its refCount should be 0 + // (local endpoints are not ref-counted). + var localEpAtRemoteIP *endpointInfo + for _, ep := range proxier.endpointsMap[svcPortName3] { + epI, ok := ep.(*endpointInfo) + if ok && epI.ip == remoteIP { + localEpAtRemoteIP = epI + } + } + assert.NotNil(t, localEpAtRemoteIP, "Expected to find local endpoint for svc3 at IP-B") + assert.True(t, localEpAtRemoteIP.IsLocal(), "svc3's endpoint should be local") + assert.NotNil(t, localEpAtRemoteIP.refCount, "Local endpoint refCount pointer should not be nil") + assert.Equal(t, uint16(0), *localEpAtRemoteIP.refCount, + "Local endpoint refCount should be 0 (locals are not ref-counted)") + + // Step 4: Remove endpoint slices for svc1 and svc2 (IPs A and B disappear). + // This should cause the remote endpoint at IP-B to be cleaned up, + // but the local endpoint at IP-B (used by svc3) must NOT be deleted. + proxier.setInitialized(false) + + deleteEndpointSlices(proxier, + makeTestEndpointSlice(svcPortName1.Namespace, svcPortName1.Name, 1, func(eps *discovery.EndpointSlice) { + eps.AddressType = discovery.AddressTypeIPv4 + eps.Endpoints = []discovery.Endpoint{ + { + Addresses: []string{localIP}, + NodeName: ptr.To(testNodeName), + }, + { + Addresses: []string{remoteIP}, + }, + } + eps.Ports = []discovery.EndpointPort{{ + Name: ptr.To(svcPortName1.Port), + Port: ptr.To(int32(svcPort1)), + Protocol: ptr.To(v1.ProtocolTCP), + }} + }), + makeTestEndpointSlice(svcPortName2.Namespace, svcPortName2.Name, 1, func(eps *discovery.EndpointSlice) { + eps.AddressType = discovery.AddressTypeIPv4 + eps.Endpoints = []discovery.Endpoint{ + { + Addresses: []string{localIP}, + NodeName: ptr.To(testNodeName), + }, + { + Addresses: []string{remoteIP}, + }, + } + eps.Ports = []discovery.EndpointPort{{ + Name: ptr.To(svcPortName2.Port), + Port: ptr.To(int32(svcPort2)), + Protocol: ptr.To(v1.ProtocolTCP), + }} + }), + ) + + proxier.setInitialized(true) + proxier.syncProxyRules() + + // svc1 and svc2 should have no endpoints now. + assert.Empty(t, proxier.endpointsMap[svcPortName1], + "svc1 should have no endpoints after deletion") + assert.Empty(t, proxier.endpointsMap[svcPortName2], + "svc2 should have no endpoints after deletion") + + // svc3's local endpoint at IP-B should still be present and functional. + var svc3Ep *endpointInfo + for _, ep := range proxier.endpointsMap[svcPortName3] { + epI, ok := ep.(*endpointInfo) + if ok && epI.ip == remoteIP { + svc3Ep = epI + } + } + assert.NotNil(t, svc3Ep, "svc3's local endpoint at IP-B should still exist") + assert.True(t, svc3Ep.IsLocal(), "svc3's endpoint should still be local") + assert.NotEmpty(t, svc3Ep.hnsID, "svc3's local endpoint should still have a valid HNS ID") + + // Verify the local HNS endpoint was NOT deleted from HNS. + _, err := hcnMock.GetEndpointByID(svc3Ep.hnsID) + assert.NoError(t, err, "Local HNS endpoint at IP-B should still exist in HNS (not deleted)") +} + +// TestEndpointTransitionWithLBFailures verifies endpoint reference counting +// when an endpoint set transitions from [A,B,C,D] to [A,B] while load +// balancer delete and create operations both fail. +// +// Flow: +// 1. Create a local endpoint (A) and 3 remote endpoints (B, C, D). +// 2. Two services (svc1, svc2) each reference all 4 endpoints. +// Assert remote endpoint refCounts B=2, C=2, D=2. +// 3. Transition endpoints from [A,B,C,D] to [A,B] while LB delete is +// injected to fail. LB create then also fails because the old LB +// (same VIP:port) was never removed. +// 4. Assert that C and D refCounts drop from 2 to 0. B gets a new HNS +// endpoint with refCount=2. A remains local with refCount=0. +func TestEndpointTransitionWithLBFailures(t *testing.T) { + proxier := NewFakeProxier(t, testNodeName, netutils.ParseIPSloppy("10.0.0.1"), NETWORK_TYPE_L2BRIDGE, false) + if proxier == nil { + t.Fatal("Failed to create proxier") + } + + localIP := epIpAddressLocal1 // "192.168.4.4" + remoteIPB := epIpAddressRemote // "192.168.2.3" + remoteIPC := "192.168.2.4" + remoteIPD := "192.168.2.5" + + svcIP1 := "10.20.30.41" + svcPort1 := 80 + svcNodePort1 := 3001 + svcPortName1 := proxy.ServicePortName{ + NamespacedName: makeNSN("ns1", "svc1"), + Port: "p80", + Protocol: v1.ProtocolTCP, + } + + svcIP2 := "10.20.30.42" + svcPort2 := 80 + svcNodePort2 := 3002 + svcPortName2 := proxy.ServicePortName{ + NamespacedName: makeNSN("ns1", "svc2"), + Port: "p80", + Protocol: v1.ProtocolTCP, + } + + // Step 1+2: Create two services with a local endpoint (A) and + // three remote endpoints (B, C, D). + makeServiceMap(proxier, + makeTestService(svcPortName1.Namespace, svcPortName1.Name, func(svc *v1.Service) { + svc.Spec.Type = "NodePort" + svc.Spec.ClusterIP = svcIP1 + svc.Spec.Ports = []v1.ServicePort{{ + Name: svcPortName1.Port, + Port: int32(svcPort1), + Protocol: v1.ProtocolTCP, + NodePort: int32(svcNodePort1), + }} + }), + makeTestService(svcPortName2.Namespace, svcPortName2.Name, func(svc *v1.Service) { + svc.Spec.Type = "NodePort" + svc.Spec.ClusterIP = svcIP2 + svc.Spec.Ports = []v1.ServicePort{{ + Name: svcPortName2.Port, + Port: int32(svcPort2), + Protocol: v1.ProtocolTCP, + NodePort: int32(svcNodePort2), + }} + }), + ) + + populateEndpointSlices(proxier, + makeTestEndpointSlice(svcPortName1.Namespace, svcPortName1.Name, 1, func(eps *discovery.EndpointSlice) { + eps.AddressType = discovery.AddressTypeIPv4 + eps.Endpoints = []discovery.Endpoint{ + {Addresses: []string{localIP}, NodeName: ptr.To(testNodeName)}, + {Addresses: []string{remoteIPB}}, + {Addresses: []string{remoteIPC}}, + {Addresses: []string{remoteIPD}}, + } + eps.Ports = []discovery.EndpointPort{{ + Name: ptr.To(svcPortName1.Port), + Port: ptr.To(int32(svcPort1)), + Protocol: ptr.To(v1.ProtocolTCP), + }} + }), + makeTestEndpointSlice(svcPortName2.Namespace, svcPortName2.Name, 1, func(eps *discovery.EndpointSlice) { + eps.AddressType = discovery.AddressTypeIPv4 + eps.Endpoints = []discovery.Endpoint{ + {Addresses: []string{localIP}, NodeName: ptr.To(testNodeName)}, + {Addresses: []string{remoteIPB}}, + {Addresses: []string{remoteIPC}}, + {Addresses: []string{remoteIPD}}, + } + eps.Ports = []discovery.EndpointPort{{ + Name: ptr.To(svcPortName2.Port), + Port: ptr.To(int32(svcPort2)), + Protocol: ptr.To(v1.ProtocolTCP), + }} + }), + ) + + // Pre-populate the local HNS endpoint (as CNI would create it). + hcnMock := (proxier.hcn).(*fakehcn.HcnMock) + hcnMock.PopulateQueriedEndpoints(endpointLocal1, networkId, localIP, macAddressLocal1, prefixLen) + + proxier.setInitialized(true) + proxier.syncProxyRules() + + // Collect the HNS IDs for B, C, D after the first sync. + // For L2Bridge without overlay, remote endpoints are created sequentially: + // B → EPID-1, C → EPID-2, D → EPID-3 + findRemoteEp := func(svcPortName proxy.ServicePortName, ip string) *endpointInfo { + for _, ep := range proxier.endpointsMap[svcPortName] { + if epI, ok := ep.(*endpointInfo); ok && epI.ip == ip { + return epI + } + } + return nil + } + + epB := findRemoteEp(svcPortName1, remoteIPB) + epC := findRemoteEp(svcPortName1, remoteIPC) + epD := findRemoteEp(svcPortName1, remoteIPD) + epA := findRemoteEp(svcPortName1, localIP) + + assert.NotNil(t, epB, "Expected to find remote endpoint B") + assert.NotNil(t, epC, "Expected to find remote endpoint C") + assert.NotNil(t, epD, "Expected to find remote endpoint D") + assert.NotNil(t, epA, "Expected to find local endpoint A") + + hnsIDB := epB.hnsID + hnsIDC := epC.hnsID + hnsIDD := epD.hnsID + + // Assert all remote endpoint refCounts are 2 (shared by svc1 and svc2). + assert.Equal(t, uint16(2), *proxier.endPointsRefCount[hnsIDB], + "B refCount should be 2") + assert.Equal(t, uint16(2), *proxier.endPointsRefCount[hnsIDC], + "C refCount should be 2") + assert.Equal(t, uint16(2), *proxier.endPointsRefCount[hnsIDD], + "D refCount should be 2") + + // Record the old LB IDs for both services. + svc1Info := proxier.svcPortMap[svcPortName1].(*serviceInfo) + svc2Info := proxier.svcPortMap[svcPortName2].(*serviceInfo) + oldLBID1 := svc1Info.hnsID + oldLBID2 := svc2Info.hnsID + assert.NotEmpty(t, oldLBID1, "svc1 should have a ClusterIP LB after first sync") + assert.NotEmpty(t, oldLBID2, "svc2 should have a ClusterIP LB after first sync") + + // Step 3: Inject LB delete failure and transition endpoints from + // [A,B,C,D] to [A,B]. + hcnMock.ShouldFailDeleteLoadBalancer = true + proxier.setInitialized(false) + + // Build the old and new endpoint slices for the update. + oldSlice1 := makeTestEndpointSlice(svcPortName1.Namespace, svcPortName1.Name, 1, func(eps *discovery.EndpointSlice) { + eps.AddressType = discovery.AddressTypeIPv4 + eps.Endpoints = []discovery.Endpoint{ + {Addresses: []string{localIP}, NodeName: ptr.To(testNodeName)}, + {Addresses: []string{remoteIPB}}, + {Addresses: []string{remoteIPC}}, + {Addresses: []string{remoteIPD}}, + } + eps.Ports = []discovery.EndpointPort{{ + Name: ptr.To(svcPortName1.Port), + Port: ptr.To(int32(svcPort1)), + Protocol: ptr.To(v1.ProtocolTCP), + }} + }) + newSlice1 := makeTestEndpointSlice(svcPortName1.Namespace, svcPortName1.Name, 1, func(eps *discovery.EndpointSlice) { + eps.AddressType = discovery.AddressTypeIPv4 + eps.Endpoints = []discovery.Endpoint{ + {Addresses: []string{localIP}, NodeName: ptr.To(testNodeName)}, + {Addresses: []string{remoteIPB}}, + } + eps.Ports = []discovery.EndpointPort{{ + Name: ptr.To(svcPortName1.Port), + Port: ptr.To(int32(svcPort1)), + Protocol: ptr.To(v1.ProtocolTCP), + }} + }) + oldSlice2 := makeTestEndpointSlice(svcPortName2.Namespace, svcPortName2.Name, 1, func(eps *discovery.EndpointSlice) { + eps.AddressType = discovery.AddressTypeIPv4 + eps.Endpoints = []discovery.Endpoint{ + {Addresses: []string{localIP}, NodeName: ptr.To(testNodeName)}, + {Addresses: []string{remoteIPB}}, + {Addresses: []string{remoteIPC}}, + {Addresses: []string{remoteIPD}}, + } + eps.Ports = []discovery.EndpointPort{{ + Name: ptr.To(svcPortName2.Port), + Port: ptr.To(int32(svcPort2)), + Protocol: ptr.To(v1.ProtocolTCP), + }} + }) + newSlice2 := makeTestEndpointSlice(svcPortName2.Namespace, svcPortName2.Name, 1, func(eps *discovery.EndpointSlice) { + eps.AddressType = discovery.AddressTypeIPv4 + eps.Endpoints = []discovery.Endpoint{ + {Addresses: []string{localIP}, NodeName: ptr.To(testNodeName)}, + {Addresses: []string{remoteIPB}}, + } + eps.Ports = []discovery.EndpointPort{{ + Name: ptr.To(svcPortName2.Port), + Port: ptr.To(int32(svcPort2)), + Protocol: ptr.To(v1.ProtocolTCP), + }} + }) + + proxier.OnEndpointSliceUpdate(oldSlice1, newSlice1) + proxier.OnEndpointSliceUpdate(oldSlice2, newSlice2) + + proxier.mu.Lock() + proxier.endpointSlicesSynced = true + proxier.mu.Unlock() + + proxier.setInitialized(true) + proxier.syncProxyRules() + + // Step 4: Assert the results. + // + // LB delete was injected to fail, so the old LBs are still in HNS. + // LB create naturally fails because the old LBs (same VIP:port) were + // never removed, causing a "port already exists" conflict. + svc1Info = proxier.svcPortMap[svcPortName1].(*serviceInfo) + svc2Info = proxier.svcPortMap[svcPortName2].(*serviceInfo) + + // svcInfo.hnsID should still hold the old stale LB ID (delete failed, create skipped). + assert.Empty(t, svc1Info.hnsID, "svc1 LB ID should be empty because create failed due to existing LB") + assert.Empty(t, svc2Info.hnsID, "svc2 LB ID should be empty because create failed due to existing LB") + + // The old LBs should still exist in HNS because deletion failed. + lb1, err1 := hcnMock.GetLoadBalancerByID(oldLBID1) + assert.NoError(t, err1, "Old LB for svc1 should still exist in HNS (delete failed)") + assert.NotNil(t, lb1) + lb2, err2 := hcnMock.GetLoadBalancerByID(oldLBID2) + assert.NoError(t, err2, "Old LB for svc2 should still exist in HNS (delete failed)") + assert.NotNil(t, lb2) + + // C and D refCounts should have dropped from 2 to 0. + // The endpointsMapChange callback calls cleanupAllPolicies with the + // OLD endpoint set (since the map hasn't been updated yet at callback time). + // This decrements C and D's refCounts twice (once per service), reaching 0. + // They stay in terminatedEndpoints (not in the new map) and get deleted by + // cleanupTerminatedEndpoints. + assert.Equal(t, uint16(2), *proxier.endPointsRefCount[hnsIDB], + "B refCount should be 2") + assert.Equal(t, uint16(0), *proxier.endPointsRefCount[hnsIDC], + "C refCount should be 0 after transition") + assert.Equal(t, uint16(0), *proxier.endPointsRefCount[hnsIDD], + "D refCount should be 0 after transition") + + // C and D HNS endpoints should have been deleted by cleanupTerminatedEndpoints. + _, errC := hcnMock.GetEndpointByID(hnsIDC) + assert.Error(t, errC, "C HNS endpoint should have been deleted") + _, errD := hcnMock.GetEndpointByID(hnsIDD) + assert.Error(t, errD, "D HNS endpoint should have been deleted") + + epBNew := findRemoteEp(svcPortName1, remoteIPB) + assert.NotNil(t, epBNew, "B endpoint should exist for svc1") + assert.Equal(t, hnsIDB, epBNew.hnsID, + "B should keep the same HNS ID (reused, not recreated)") + assert.Equal(t, uint16(2), *proxier.endPointsRefCount[hnsIDB], + "B refCount should be 2 after reuse by both services") + + // A is local — its private refCount should be 0 (locals are not ref-counted). + epANew := findRemoteEp(svcPortName1, localIP) + assert.NotNil(t, epANew, "Local endpoint A should still exist") + assert.True(t, epANew.IsLocal(), "A should be local") +} diff --git a/deps/github.com/openshift/kubernetes/pkg/proxy/winkernel/testing/hcnutils_mock.go b/deps/github.com/openshift/kubernetes/pkg/proxy/winkernel/testing/hcnutils_mock.go index bea3eb1753..85a73884ab 100644 --- a/deps/github.com/openshift/kubernetes/pkg/proxy/winkernel/testing/hcnutils_mock.go +++ b/deps/github.com/openshift/kubernetes/pkg/proxy/winkernel/testing/hcnutils_mock.go @@ -34,8 +34,9 @@ var ( ) type HcnMock struct { - supportedFeatures hcn.SupportedFeatures - network *hcn.HostComputeNetwork + supportedFeatures hcn.SupportedFeatures + network *hcn.HostComputeNetwork + ShouldFailDeleteLoadBalancer bool } func (hcnObj HcnMock) generateEndpointGuid() (endpointId string, endpointName string) { @@ -217,6 +218,9 @@ func (hcnObj HcnMock) DeleteLoadBalancer(loadBalancer *hcn.HostComputeLoadBalanc if _, ok := loadbalancerMap[loadBalancer.Id]; !ok { return hcn.LoadBalancerNotFoundError{LoadBalancerId: loadBalancer.Id} } + if hcnObj.ShouldFailDeleteLoadBalancer { + return fmt.Errorf("injected DeleteLoadBalancer failure for %s", loadBalancer.Id) + } delete(loadbalancerMap, loadBalancer.Id) return nil } diff --git a/deps/github.com/openshift/kubernetes/pkg/scheduler/schedule_one.go b/deps/github.com/openshift/kubernetes/pkg/scheduler/schedule_one.go index 683bfaed69..fd7d4991d6 100644 --- a/deps/github.com/openshift/kubernetes/pkg/scheduler/schedule_one.go +++ b/deps/github.com/openshift/kubernetes/pkg/scheduler/schedule_one.go @@ -1081,6 +1081,10 @@ func (sched *Scheduler) handleSchedulingFailure(ctx context.Context, fwk framewo logger.Info("Pod has been assigned to node. Abort adding it back to queue.", "pod", klog.KObj(pod), "node", cachedPod.Spec.NodeName) // We need to call DonePod here because we don't call AddUnschedulableIfNotPresent in this case. } else { + if cachedPod.UID != podInfo.Pod.UID { + logger.V(2).Info("Pod was recreated while handling scheduling failure. Skip requeueing and status updates.", "pod", klog.KObj(pod), "oldUID", podInfo.Pod.UID, "newUID", cachedPod.UID) + return + } // As is from SharedInformer, we need to do a DeepCopy() here. // ignore this err since apiserver doesn't properly validate affinity terms // and we can't fix the validation for backwards compatibility. diff --git a/deps/github.com/openshift/kubernetes/pkg/scheduler/schedule_one_test.go b/deps/github.com/openshift/kubernetes/pkg/scheduler/schedule_one_test.go index 592ff01a84..3ed925a056 100644 --- a/deps/github.com/openshift/kubernetes/pkg/scheduler/schedule_one_test.go +++ b/deps/github.com/openshift/kubernetes/pkg/scheduler/schedule_one_test.go @@ -1175,6 +1175,79 @@ func TestSchedulerScheduleOne(t *testing.T) { } } +func TestHandleSchedulingFailureSkipsRecreatedPod(t *testing.T) { + logger, ctx := ktesting.NewTestContext(t) + ctx, cancel := context.WithCancel(ctx) + defer cancel() + + oldPod := st.MakePod().Name("foo").Namespace("ns").UID("old-uid").SchedulerName(testSchedulerName).Obj() + recreatedPod := oldPod.DeepCopy() + recreatedPod.UID = "new-uid" + + client := clientsetfake.NewClientset(recreatedPod) + informerFactory := informers.NewSharedInformerFactory(client, 0) + eventBroadcaster := events.NewBroadcaster(&events.EventSinkImpl{Interface: client.EventsV1()}) + + schedFramework, err := tf.NewFramework(ctx, + []tf.RegisterPluginFunc{ + tf.RegisterQueueSortPlugin(queuesort.Name, queuesort.New), + tf.RegisterBindPlugin(defaultbinder.Name, defaultbinder.New), + }, + testSchedulerName, + frameworkruntime.WithClientSet(client), + frameworkruntime.WithEventRecorder(eventBroadcaster.NewRecorder(scheme.Scheme, testSchedulerName)), + frameworkruntime.WithInformerFactory(informerFactory), + ) + if err != nil { + t.Fatal(err) + } + + ar := metrics.NewMetricsAsyncRecorder(10, time.Second, ctx.Done()) + queue := internalqueue.NewSchedulingQueue(nil, informerFactory, internalqueue.WithMetricsRecorder(ar)) + sched := &Scheduler{ + client: client, + SchedulingQueue: queue, + } + + informerFactory.Start(ctx.Done()) + informerFactory.WaitForCacheSync(ctx.Done()) + + queue.Add(logger, oldPod) + popped, err := queue.Pop(logger) + if err != nil { + t.Fatalf("Pop: %v", err) + } + if got := queue.InFlightPods(); !podListContainsPod(got, oldPod) { + t.Fatalf("expected popped pod to be in-flight before failure handling, got %v", got) + } + + nominatingInfo := &fwk.NominatingInfo{NominatingMode: fwk.ModeOverride, NominatedNodeName: "node1"} + sched.handleSchedulingFailure(ctx, schedFramework, popped, fwk.NewStatus(fwk.Unschedulable, "no fit"), nominatingInfo, time.Now()) + + if err := wait.PollUntilContextTimeout(ctx, time.Millisecond, wait.ForeverTestTimeout, false, func(context.Context) (bool, error) { + return len(queue.InFlightPods()) == 0, nil + }); err != nil { + t.Fatalf("in-flight pod was not cleared: %v", queue.InFlightPods()) + } + if got := queue.PodsInBackoffQ(); len(got) != 0 { + t.Fatalf("expected recreated pod to stay out of backoffQ, got %v", got) + } + if got := queue.UnschedulablePods(); len(got) != 0 { + t.Fatalf("expected recreated pod to stay out of unschedulablePods, got %v", got) + } + if got := queue.NominatedPodsForNode("node1"); len(got) != 0 { + t.Fatalf("expected recreated pod to stay out of nominated pods, got %v", got) + } + + updatedPod, err := client.CoreV1().Pods(recreatedPod.Namespace).Get(ctx, recreatedPod.Name, metav1.GetOptions{}) + if err != nil { + t.Fatalf("Get pod: %v", err) + } + if diff := cmp.Diff(recreatedPod.Status, updatedPod.Status); diff != "" { + t.Fatalf("expected recreated pod status to remain unchanged (-want,+got):\n%s", diff) + } +} + type constSigPluginConfig struct { name string signature []fwk.SignFragment diff --git a/deps/github.com/openshift/kubernetes/staging/src/k8s.io/kubectl/pkg/cmd/cp/cp.go b/deps/github.com/openshift/kubernetes/staging/src/k8s.io/kubectl/pkg/cmd/cp/cp.go index f8e2faa097..78d44e4119 100644 --- a/deps/github.com/openshift/kubernetes/staging/src/k8s.io/kubectl/pkg/cmd/cp/cp.go +++ b/deps/github.com/openshift/kubernetes/staging/src/k8s.io/kubectl/pkg/cmd/cp/cp.go @@ -76,6 +76,7 @@ type CopyOptions struct { ClientConfig *restclient.Config Clientset kubernetes.Interface ExecParentCmdName string + Executor exec.RemoteExecutor args []string @@ -204,6 +205,7 @@ func (o *CopyOptions) Complete(f cmdutil.Factory, cmd *cobra.Command, args []str if cmd.Parent() != nil { o.ExecParentCmdName = cmd.Parent().CommandPath() } + o.Executor = &exec.DefaultRemoteExecutor{} var err error o.Namespace, _, err = f.ToRawKubeConfigLoader().Namespace() @@ -277,7 +279,7 @@ func (o *CopyOptions) checkDestinationIsDir(dest fileSpec) error { }, Command: []string{"test", "-d", dest.File.String()}, - Executor: &exec.DefaultRemoteExecutor{}, + Executor: o.Executor, } ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) @@ -345,7 +347,7 @@ func (o *CopyOptions) copyToPod(src, dest fileSpec, options *exec.ExecOptions) e } options.Command = cmdArr - options.Executor = &exec.DefaultRemoteExecutor{} + options.Executor = o.Executor return o.execute(options) } @@ -391,10 +393,11 @@ func (t *TarPipe) initReadFrom(n uint64) { }, Command: []string{"tar", "cf", "-", t.src.File.String()}, - Executor: &exec.DefaultRemoteExecutor{}, + Executor: t.o.Executor, } if t.o.MaxTries != 0 { - options.Command = []string{"sh", "-c", fmt.Sprintf("tar cf - %s | tail -c+%d", t.src.File, n)} + escapedPath := strings.ReplaceAll(t.src.File.String(), "'", `'\''`) + options.Command = []string{"sh", "-c", fmt.Sprintf("tar cf - '%s' | tail -c+%d", escapedPath, n)} } go func() { diff --git a/deps/github.com/openshift/kubernetes/staging/src/k8s.io/kubectl/pkg/cmd/cp/cp_test.go b/deps/github.com/openshift/kubernetes/staging/src/k8s.io/kubectl/pkg/cmd/cp/cp_test.go index b0038d0f3f..95ba4216ac 100644 --- a/deps/github.com/openshift/kubernetes/staging/src/k8s.io/kubectl/pkg/cmd/cp/cp_test.go +++ b/deps/github.com/openshift/kubernetes/staging/src/k8s.io/kubectl/pkg/cmd/cp/cp_test.go @@ -19,9 +19,11 @@ package cp import ( "archive/tar" "bytes" + "context" "fmt" "io" "net/http" + "net/url" "os" "path/filepath" "reflect" @@ -33,10 +35,13 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/cli-runtime/pkg/genericiooptions" + restclient "k8s.io/client-go/rest" "k8s.io/client-go/rest/fake" + "k8s.io/client-go/tools/remotecommand" kexec "k8s.io/kubectl/pkg/cmd/exec" cmdtesting "k8s.io/kubectl/pkg/cmd/testing" "k8s.io/kubectl/pkg/scheme" @@ -674,6 +679,113 @@ func TestCopyToPod(t *testing.T) { } } +func TestCopyFromPod(t *testing.T) { + tf := cmdtesting.NewTestFactory().WithNamespace("test") + ns := scheme.Codecs.WithoutConversion() + codec := scheme.Codecs.LegacyCodec(scheme.Scheme.PrioritizedVersionsAllGroups()...) + + tf.Client = &fake.RESTClient{ + GroupVersion: schema.GroupVersion{Group: "", Version: "v1"}, + NegotiatedSerializer: ns, + Client: fake.CreateHTTPClient(func(req *http.Request) (*http.Response, error) { + responsePod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "pod-name", Namespace: "pod-ns"}, + Spec: v1.PodSpec{Containers: []v1.Container{{Name: "container"}}}, + } + return &http.Response{StatusCode: http.StatusOK, Header: cmdtesting.DefaultHeader(), Body: io.NopCloser(bytes.NewReader([]byte(runtime.EncodeOrDie(codec, responsePod))))}, nil + }), + } + + tf.ClientConfigVal = cmdtesting.DefaultClientConfig() + ioStreams, _, _, _ := genericiooptions.NewTestIOStreams() + + cmd := NewCmdCp(tf, ioStreams) + + destDir, err := os.MkdirTemp("", "test") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + defer os.RemoveAll(destDir) + + tests := map[string]struct { + src string + dest string + podName string + retries int + expectedErr string + expectedCommand string + }{ + "copy from pod to empty path": { + src: "pod-ns/pod-name:/tmp/foo", + dest: "", + expectedErr: "filepath can not be empty", + }, + "path without single quotes": { + src: "pod-ns/pod-name:/tmp/foo", + dest: destDir, + podName: "pod-name", + expectedCommand: "tar cf - /tmp/foo", + }, + "path with single quotes": { + src: "pod-ns/pod-name:/tmp/path'with'quotes", + dest: destDir, + podName: "pod-name", + retries: 1, + expectedCommand: `sh -c tar cf - '/tmp/path'\''with'\''quotes' | tail -c+1`, + }, + } + + for name, test := range tests { + opts := NewCopyOptions(ioStreams) + opts.MaxTries = test.retries + if err := opts.Complete(tf, cmd, []string{test.src, test.dest}); err != nil { + t.Fatalf("unexpected error: %v", err) + } + remoteExec := &testingRemoteExecutor{} + opts.Executor = remoteExec + t.Run(name, func(t *testing.T) { + err := opts.Run() + if len(test.expectedErr) > 0 { + if err == nil { + t.Fatalf("expected error but got none") + } + if !strings.Contains(err.Error(), test.expectedErr) { + t.Errorf("expected error to contain %q, got: %v", test.expectedErr, err) + } + } + if len(test.expectedErr) == 0 && err != nil { + t.Errorf("unexpected error: %v", err) + } + if !strings.Contains(remoteExec.capturedPath, test.podName) { + t.Errorf("missing pod name %q in the captured path: %q", test.podName, remoteExec.capturedPath) + } + query, err := url.ParseQuery(remoteExec.capturedQuery) + if err != nil { + t.Errorf("unexpected error parsing captured query: %v", err) + } + actualQuery := strings.Join(query["command"], " ") + if actualQuery != test.expectedCommand { + t.Errorf("unexpected command, got %q, expected: %q", actualQuery, test.expectedCommand) + } + }) + } +} + +type testingRemoteExecutor struct { + capturedPath string + capturedQuery string +} + +func (t *testingRemoteExecutor) Execute(url *url.URL, config *restclient.Config, stdin io.Reader, stdout, stderr io.Writer, tty bool, terminalSizeQueue remotecommand.TerminalSizeQueue) error { + return t.ExecuteWithContext(context.Background(), url, config, stdin, stdout, stderr, tty, terminalSizeQueue) +} + +func (t *testingRemoteExecutor) ExecuteWithContext(ctx context.Context, url *url.URL, config *restclient.Config, stdin io.Reader, stdout, stderr io.Writer, tty bool, terminalSizeQueue remotecommand.TerminalSizeQueue) error { + t.capturedPath = url.Path + t.capturedQuery = url.RawQuery + return nil +} + func TestCopyToPodNoPreserve(t *testing.T) { tf := cmdtesting.NewTestFactory().WithNamespace("test") ns := scheme.Codecs.WithoutConversion() diff --git a/etcd/go.mod b/etcd/go.mod index e5439a18ec..c0ff185242 100644 --- a/etcd/go.mod +++ b/etcd/go.mod @@ -10,11 +10,11 @@ require ( github.com/openshift/microshift v0.0.0-00010101000000-000000000000 github.com/spf13/cobra v1.10.2 go.etcd.io/etcd/server/v3 v3.6.5 - k8s.io/apimachinery v1.35.4 - k8s.io/cli-runtime v1.35.4 - k8s.io/component-base v1.35.4 + k8s.io/apimachinery v1.35.5 + k8s.io/cli-runtime v1.35.5 + k8s.io/component-base v1.35.5 k8s.io/klog/v2 v2.140.0 - k8s.io/kubectl v1.35.4 + k8s.io/kubectl v1.35.5 sigs.k8s.io/yaml v1.6.0 ) @@ -98,11 +98,11 @@ require ( google.golang.org/genproto/googleapis/rpc v0.0.0-20260226221140-a57be14db171 // indirect gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect - k8s.io/api v1.35.4 // indirect - k8s.io/apiserver v1.35.4 // indirect - k8s.io/client-go v1.35.4 // indirect + k8s.io/api v1.35.5 // indirect + k8s.io/apiserver v1.35.5 // indirect + k8s.io/client-go v1.35.5 // indirect k8s.io/kube-openapi v0.0.0-20260304202019-5b3e3fdb0acf // indirect - k8s.io/kubelet v1.35.4 // indirect + k8s.io/kubelet v1.35.5 // indirect k8s.io/utils v0.0.0-20260210185600-b8788abfbbc2 // indirect sigs.k8s.io/kustomize/api v0.20.1 // indirect sigs.k8s.io/kustomize/kyaml v0.20.1 // indirect diff --git a/etcd/vendor/modules.txt b/etcd/vendor/modules.txt index 04ff544a18..54df8af91b 100644 --- a/etcd/vendor/modules.txt +++ b/etcd/vendor/modules.txt @@ -664,7 +664,7 @@ gopkg.in/inf.v0 # gopkg.in/natefinch/lumberjack.v2 v2.2.1 ## explicit; go 1.13 gopkg.in/natefinch/lumberjack.v2 -# k8s.io/api v1.35.4 => ../deps/github.com/openshift/kubernetes/staging/src/k8s.io/api +# k8s.io/api v1.35.5 => ../deps/github.com/openshift/kubernetes/staging/src/k8s.io/api ## explicit; go 1.25.0 k8s.io/api/admission/v1 k8s.io/api/admission/v1beta1 @@ -726,7 +726,7 @@ k8s.io/api/storage/v1 k8s.io/api/storage/v1alpha1 k8s.io/api/storage/v1beta1 k8s.io/api/storagemigration/v1beta1 -# k8s.io/apimachinery v1.35.4 => ../deps/github.com/openshift/kubernetes/staging/src/k8s.io/apimachinery +# k8s.io/apimachinery v1.35.5 => ../deps/github.com/openshift/kubernetes/staging/src/k8s.io/apimachinery ## explicit; go 1.25.0 k8s.io/apimachinery/pkg/api/equality k8s.io/apimachinery/pkg/api/errors @@ -785,18 +785,18 @@ k8s.io/apimachinery/pkg/version k8s.io/apimachinery/pkg/watch k8s.io/apimachinery/third_party/forked/golang/json k8s.io/apimachinery/third_party/forked/golang/reflect -# k8s.io/apiserver v1.35.4 => ../deps/github.com/openshift/kubernetes/staging/src/k8s.io/apiserver +# k8s.io/apiserver v1.35.5 => ../deps/github.com/openshift/kubernetes/staging/src/k8s.io/apiserver ## explicit; go 1.25.0 k8s.io/apiserver/pkg/apis/audit k8s.io/apiserver/pkg/apis/audit/v1 k8s.io/apiserver/pkg/authentication/user -# k8s.io/cli-runtime v1.35.4 => ../deps/github.com/openshift/kubernetes/staging/src/k8s.io/cli-runtime +# k8s.io/cli-runtime v1.35.5 => ../deps/github.com/openshift/kubernetes/staging/src/k8s.io/cli-runtime ## explicit; go 1.25.0 k8s.io/cli-runtime/pkg/genericclioptions k8s.io/cli-runtime/pkg/genericiooptions k8s.io/cli-runtime/pkg/printers k8s.io/cli-runtime/pkg/resource -# k8s.io/client-go v1.35.4 => ../deps/github.com/openshift/kubernetes/staging/src/k8s.io/client-go +# k8s.io/client-go v1.35.5 => ../deps/github.com/openshift/kubernetes/staging/src/k8s.io/client-go ## explicit; go 1.25.0 k8s.io/client-go/applyconfigurations/admissionregistration/v1 k8s.io/client-go/applyconfigurations/admissionregistration/v1alpha1 @@ -951,7 +951,7 @@ k8s.io/client-go/util/homedir k8s.io/client-go/util/jsonpath k8s.io/client-go/util/keyutil k8s.io/client-go/util/workqueue -# k8s.io/component-base v1.35.4 => ../deps/github.com/openshift/kubernetes/staging/src/k8s.io/component-base +# k8s.io/component-base v1.35.5 => ../deps/github.com/openshift/kubernetes/staging/src/k8s.io/component-base ## explicit; go 1.25.0 k8s.io/component-base/cli k8s.io/component-base/cli/flag @@ -989,7 +989,7 @@ k8s.io/kube-openapi/pkg/util k8s.io/kube-openapi/pkg/util/proto k8s.io/kube-openapi/pkg/util/proto/validation k8s.io/kube-openapi/pkg/validation/spec -# k8s.io/kubectl v1.35.4 => ../deps/github.com/openshift/kubernetes/staging/src/k8s.io/kubectl +# k8s.io/kubectl v1.35.5 => ../deps/github.com/openshift/kubernetes/staging/src/k8s.io/kubectl ## explicit; go 1.25.0 k8s.io/kubectl/pkg/cmd/util k8s.io/kubectl/pkg/scheme @@ -999,7 +999,7 @@ k8s.io/kubectl/pkg/util/openapi k8s.io/kubectl/pkg/util/templates k8s.io/kubectl/pkg/util/term k8s.io/kubectl/pkg/validation -# k8s.io/kubelet v1.35.4 => ../deps/github.com/openshift/kubernetes/staging/src/k8s.io/kubelet +# k8s.io/kubelet v1.35.5 => ../deps/github.com/openshift/kubernetes/staging/src/k8s.io/kubelet ## explicit; go 1.25.0 k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1 # k8s.io/utils v0.0.0-20260210185600-b8788abfbbc2 diff --git a/go.mod b/go.mod index 1157743f80..e8b86f33ed 100644 --- a/go.mod +++ b/go.mod @@ -38,17 +38,17 @@ require ( github.com/prometheus/prometheus v0.302.1 github.com/squat/generic-device-plugin v0.0.0-20251019101956-043a51e18f31 gopkg.in/yaml.v2 v2.4.0 - k8s.io/api v1.35.4 - k8s.io/apiextensions-apiserver v1.35.4 - k8s.io/apimachinery v1.35.4 - k8s.io/apiserver v1.35.4 - k8s.io/cli-runtime v1.35.4 - k8s.io/client-go v1.35.4 - k8s.io/cloud-provider v1.35.4 - k8s.io/component-base v1.35.4 - k8s.io/kube-aggregator v1.35.4 - k8s.io/kubectl v1.35.4 - k8s.io/kubelet v1.35.4 + k8s.io/api v1.35.5 + k8s.io/apiextensions-apiserver v1.35.5 + k8s.io/apimachinery v1.35.5 + k8s.io/apiserver v1.35.5 + k8s.io/cli-runtime v1.35.5 + k8s.io/client-go v1.35.5 + k8s.io/cloud-provider v1.35.5 + k8s.io/component-base v1.35.5 + k8s.io/kube-aggregator v1.35.5 + k8s.io/kubectl v1.35.5 + k8s.io/kubelet v1.35.5 k8s.io/utils v0.0.0-20260210185600-b8788abfbbc2 sigs.k8s.io/kube-storage-version-migrator v0.0.6-0.20230721195810-5c8923c5ff96 sigs.k8s.io/kustomize/api v0.20.1 @@ -171,22 +171,22 @@ require ( gopkg.in/go-jose/go-jose.v2 v2.6.3 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect - k8s.io/cluster-bootstrap v1.35.4 // indirect - k8s.io/component-helpers v1.35.4 // indirect - k8s.io/controller-manager v1.35.4 // indirect - k8s.io/cri-api v1.35.4 // indirect - k8s.io/cri-client v1.35.4 // indirect - k8s.io/csi-translation-lib v1.35.4 // indirect - k8s.io/dynamic-resource-allocation v1.35.4 // indirect - k8s.io/endpointslice v1.35.4 // indirect - k8s.io/externaljwt v1.35.4 // indirect - k8s.io/kms v1.35.4 // indirect - k8s.io/kube-controller-manager v1.35.4 // indirect - k8s.io/kube-proxy v1.35.4 // indirect - k8s.io/kube-scheduler v1.35.4 // indirect - k8s.io/metrics v1.35.4 // indirect - k8s.io/mount-utils v1.35.4 // indirect - k8s.io/pod-security-admission v1.35.4 // indirect + k8s.io/cluster-bootstrap v1.35.5 // indirect + k8s.io/component-helpers v1.35.5 // indirect + k8s.io/controller-manager v1.35.5 // indirect + k8s.io/cri-api v1.35.5 // indirect + k8s.io/cri-client v1.35.5 // indirect + k8s.io/csi-translation-lib v1.35.5 // indirect + k8s.io/dynamic-resource-allocation v1.35.5 // indirect + k8s.io/endpointslice v1.35.5 // indirect + k8s.io/externaljwt v1.35.5 // indirect + k8s.io/kms v1.35.5 // indirect + k8s.io/kube-controller-manager v1.35.5 // indirect + k8s.io/kube-proxy v1.35.5 // indirect + k8s.io/kube-scheduler v1.35.5 // indirect + k8s.io/metrics v1.35.5 // indirect + k8s.io/mount-utils v1.35.5 // indirect + k8s.io/pod-security-admission v1.35.5 // indirect sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.34.0 // indirect sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 // indirect sigs.k8s.io/randfill v1.0.0 // indirect @@ -226,7 +226,7 @@ require ( google.golang.org/protobuf v1.36.11 // indirect k8s.io/gengo/v2 v2.0.0-20250922181213-ec3ebc5fd46b // indirect k8s.io/klog/v2 v2.140.0 - k8s.io/kubernetes v1.35.4 + k8s.io/kubernetes v1.35.5 ) replace ( diff --git a/packaging/crio.conf.d/10-microshift_amd64.conf b/packaging/crio.conf.d/10-microshift_amd64.conf index cce33b402a..fc49c78a7a 100644 --- a/packaging/crio.conf.d/10-microshift_amd64.conf +++ b/packaging/crio.conf.d/10-microshift_amd64.conf @@ -2,6 +2,6 @@ # for community builds on top of OKD, this setting has no effect [crio.image] global_auth_file="/etc/crio/openshift-pull-secret" -pause_image = "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:40cf94e00c8e84d6c613e38795d0742bffbec39264acfe450eefd7d26b75c784" +pause_image = "quay.io/openshift-release-dev/ocp-v4.0-art-dev@sha256:f5958e314edca4fee07140d284af68c62fbbb4ae9379ee0504a1ea1018f04068" pause_image_auth_file = "/etc/crio/openshift-pull-secret" pause_command = "/usr/bin/pod" diff --git a/scripts/auto-rebase/changelog.txt b/scripts/auto-rebase/changelog.txt index e69de29bb2..9cbb0b2f97 100644 --- a/scripts/auto-rebase/changelog.txt +++ b/scripts/auto-rebase/changelog.txt @@ -0,0 +1,44 @@ +- cluster-kube-apiserver-operator embedded-component 1741228b58952244aceb8e74d35f918431717191 to fb68eab51544f9dffac9916796723f6cee4faf3c + - 9bac764 2026-05-12T15:08:27-04:00 vendor: bump library-go for atomicdir fsync fix + +- kubernetes embedded-component 6b9ddbe6e09aa225e949baecde4145208bf94c8e to c5ea727d19837f8a22cda5c0ba8899960b7777ff + - c3b2ba200 2026-05-12T17:01:37-04:00 UPSTREAM: : hack/update-vendor.sh, make update and update image + - 6636cbce3 2026-05-12T09:50:49Z Release commit for Kubernetes v1.35.5 + - b9ffe7c06 2026-05-11T11:25:41+02:00 kubeadm: use dedicated ClusterRole for apiserver kubelet client + - 7909267f8 2026-05-06T11:08:39+02:00 kubeadm: skip LocalAPIEndpoint defaulting on worker join + - f79279632 2026-04-29T17:39:56+02:00 kubeadm: use the localAPIEndpoint for all API calls in 'init' + - faa811285 2026-04-28T07:49:50Z kube-proxy: don't do full periodic syncs on large cluster mode + - 943d94194 2026-04-26T09:54:46Z Delete remote endpoint if it has same ip as local endpoint in the system. + - d550d458d 2026-04-26T09:54:46Z Delete remote endpoint if it has same ip as local endpoint in the system. + - 6d314b2fd 2026-04-23T09:15:12+01:00 Add a (*Client) addEndpoint method + - feb7fa111 2026-04-23T09:15:11+01:00 Evaluate etcd cluster health using quorum + - 1a7f0b3de 2026-04-21T10:55:12+02:00 Escape path inside the container + - 6953afbad 2026-04-17T06:40:11Z scheduler: address recreated pod review feedback + - bf14155eb 2026-04-17T06:39:06Z scheduler: skip requeueing recreated pods on scheduling failure + - 55f12e847 2026-04-17T06:36:28Z scheduler: fix inFlightPods leak when pod is recreated during scheduling failure + - 31d47ca37 2026-04-15T18:12:54Z Update CHANGELOG/CHANGELOG-1.35.md for v1.35.4 + - 69dd59d5b 2026-04-01T11:11:34-07:00 podStartSLOduration excludes init container runtime and image pulling time, includes only stateless and immediately schedulable pods + +- machine-config-operator embedded-component 864d1bc04264397caddb7ff1ffa43fc5502aa5e7 to c7abfa0bd88d48bc7a8d8b70d982127d2b6a9158 + - 1c422991 2026-05-14T14:12:56Z e2e: fix up baremetal skew test + - 3f9451c5 2026-05-14T10:09:12Z TestNoReboot should only consider password hash in /etc/shadow + - 267488fe 2026-05-07T06:24:30Z controller: add terminationMessagePolicy to build pod containers + +- kubernetes image-amd64 6b9ddbe6e09aa225e949baecde4145208bf94c8e to c5ea727d19837f8a22cda5c0ba8899960b7777ff + - c3b2ba200 2026-05-12T17:01:37-04:00 UPSTREAM: : hack/update-vendor.sh, make update and update image + - 6636cbce3 2026-05-12T09:50:49Z Release commit for Kubernetes v1.35.5 + - b9ffe7c06 2026-05-11T11:25:41+02:00 kubeadm: use dedicated ClusterRole for apiserver kubelet client + - 7909267f8 2026-05-06T11:08:39+02:00 kubeadm: skip LocalAPIEndpoint defaulting on worker join + - f79279632 2026-04-29T17:39:56+02:00 kubeadm: use the localAPIEndpoint for all API calls in 'init' + - faa811285 2026-04-28T07:49:50Z kube-proxy: don't do full periodic syncs on large cluster mode + - 943d94194 2026-04-26T09:54:46Z Delete remote endpoint if it has same ip as local endpoint in the system. + - d550d458d 2026-04-26T09:54:46Z Delete remote endpoint if it has same ip as local endpoint in the system. + - 6d314b2fd 2026-04-23T09:15:12+01:00 Add a (*Client) addEndpoint method + - feb7fa111 2026-04-23T09:15:11+01:00 Evaluate etcd cluster health using quorum + - 1a7f0b3de 2026-04-21T10:55:12+02:00 Escape path inside the container + - 6953afbad 2026-04-17T06:40:11Z scheduler: address recreated pod review feedback + - bf14155eb 2026-04-17T06:39:06Z scheduler: skip requeueing recreated pods on scheduling failure + - 55f12e847 2026-04-17T06:36:28Z scheduler: fix inFlightPods leak when pod is recreated during scheduling failure + - 31d47ca37 2026-04-15T18:12:54Z Update CHANGELOG/CHANGELOG-1.35.md for v1.35.4 + - 69dd59d5b 2026-04-01T11:11:34-07:00 podStartSLOduration excludes init container runtime and image pulling time, includes only stateless and immediately schedulable pods + diff --git a/scripts/auto-rebase/commits.txt b/scripts/auto-rebase/commits.txt index 10251e6eb0..42acea717b 100644 --- a/scripts/auto-rebase/commits.txt +++ b/scripts/auto-rebase/commits.txt @@ -2,7 +2,7 @@ https://github.com/openshift/api embedded-component 694421e64aeeae4374e7e7394014 https://github.com/openshift/cluster-csi-snapshot-controller-operator embedded-component 108f37f0e378accc322cbeb68136ec500ec35b94 https://github.com/openshift/cluster-dns-operator embedded-component 3d2141182243cde1ec6417bd005c76d29aa88a01 https://github.com/openshift/cluster-ingress-operator embedded-component 52c2b0465ecf1dcdcd57c010b9b15ac28f62a2ba -https://github.com/openshift/cluster-kube-apiserver-operator embedded-component 1741228b58952244aceb8e74d35f918431717191 +https://github.com/openshift/cluster-kube-apiserver-operator embedded-component fb68eab51544f9dffac9916796723f6cee4faf3c https://github.com/openshift/cluster-kube-controller-manager-operator embedded-component 95150ed5dbf11370b4a06e6959c77efa13768561 https://github.com/openshift/cluster-kube-scheduler-operator embedded-component 51ea59abd057d0cef56b29b8a74efc28411d5427 https://github.com/openshift/cluster-network-operator embedded-component e20b9cb9a0b3bc293e622ef1caf70a813710ffa8 @@ -10,9 +10,9 @@ https://github.com/openshift/cluster-openshift-controller-manager-operator embed https://github.com/openshift/cluster-policy-controller embedded-component 44985a1306411101c84dd5081598fc928b432321 https://github.com/openshift/csi-external-snapshotter embedded-component d1bc3ffaa9759c13a06c2ec61c541342e71bd109 https://github.com/openshift/etcd embedded-component d8d67b8ce849f816d6d23c904098336632e2348f -https://github.com/openshift/kubernetes embedded-component 6b9ddbe6e09aa225e949baecde4145208bf94c8e +https://github.com/openshift/kubernetes embedded-component c5ea727d19837f8a22cda5c0ba8899960b7777ff https://github.com/openshift/kubernetes-kube-storage-version-migrator embedded-component 72835e43c7754356645e41031f3a99926b4d42e6 -https://github.com/openshift/machine-config-operator embedded-component 864d1bc04264397caddb7ff1ffa43fc5502aa5e7 +https://github.com/openshift/machine-config-operator embedded-component c7abfa0bd88d48bc7a8d8b70d982127d2b6a9158 https://github.com/openshift/openshift-controller-manager embedded-component 26d20feae8892f648f5b06ed3f5492fe6ffb4532 https://github.com/openshift/operator-framework-olm embedded-component de6260e53055e10e216ec668d692a35554ab0585 https://github.com/openshift/route-controller-manager embedded-component 624742d93f3a7885cf7f70985f1e23ff60da580d @@ -23,7 +23,7 @@ https://github.com/openshift/csi-external-snapshotter image-amd64 d1bc3ffaa9759c https://github.com/openshift/router image-amd64 896390778ebe15f57f87e6ca78f11c96e64c2652 https://github.com/openshift/kube-rbac-proxy image-amd64 d12e274605248f6c59373240a7eae7a7a357dcb3 https://github.com/openshift/ovn-kubernetes image-amd64 72db5e8d8ec23d4108183a8c67b45cdfdb8057ba -https://github.com/openshift/kubernetes image-amd64 6b9ddbe6e09aa225e949baecde4145208bf94c8e +https://github.com/openshift/kubernetes image-amd64 c5ea727d19837f8a22cda5c0ba8899960b7777ff https://github.com/openshift/service-ca-operator image-amd64 9fff46a576033685f66fbb1121eb0a2f134a29be https://github.com/openshift/oc image-arm64 66dee73f66c4f048c333c6a77d76871872027896 https://github.com/openshift/coredns image-arm64 0dded2d232dab43c107b1dab9d0d9fdfd8259622 diff --git a/scripts/auto-rebase/last_rebase.sh b/scripts/auto-rebase/last_rebase.sh index 2cda31c792..545dbf0b95 100755 --- a/scripts/auto-rebase/last_rebase.sh +++ b/scripts/auto-rebase/last_rebase.sh @@ -1,2 +1,2 @@ #!/bin/bash -x -./scripts/auto-rebase/rebase.sh to "registry.ci.openshift.org/ocp/release:4.22.0-0.nightly-2026-05-13-154759" "registry.ci.openshift.org/ocp-arm64/release-arm64:4.22.0-0.nightly-arm64-2026-05-18-015115" +./scripts/auto-rebase/rebase.sh to "registry.ci.openshift.org/ocp/release:4.22.0-0.nightly-2026-05-18-130120" "registry.ci.openshift.org/ocp-arm64/release-arm64:4.22.0-0.nightly-arm64-2026-05-18-195115" diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/kubelet.go b/vendor/k8s.io/kubernetes/pkg/kubelet/kubelet.go index 339231bf4c..bef11db1c3 100644 --- a/vendor/k8s.io/kubernetes/pkg/kubelet/kubelet.go +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/kubelet.go @@ -653,6 +653,7 @@ func NewMainKubelet(ctx context.Context, nodeStatusMaxImages: nodeStatusMaxImages, tracer: tracer, nodeStartupLatencyTracker: kubeDeps.NodeStartupLatencyTracker, + podStartupLatencyTracker: kubeDeps.PodStartupLatencyTracker, healthChecker: kubeDeps.HealthChecker, flagz: kubeDeps.Flagz, } @@ -815,10 +816,11 @@ func NewMainKubelet(ctx context.Context, kubeCfg.MemorySwap.SwapBehavior, kubeDeps.ContainerManager.GetNodeAllocatableAbsolute, *kubeCfg.MemoryThrottlingFactor, - kubeDeps.PodStartupLatencyTracker, + klet.podStartupLatencyTracker, kubeDeps.TracerProvider, tokenManager, getServiceAccount, + klet.podStartupLatencyTracker, ) if err != nil { return nil, err @@ -1530,6 +1532,9 @@ type Kubelet struct { // Track node startup latencies nodeStartupLatencyTracker util.NodeStartupLatencyTracker + // Track pod startup latencies + podStartupLatencyTracker util.PodStartupLatencyTracker + // Health check kubelet healthChecker watchdog.HealthChecker diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go index 818ca633a1..efb3034107 100644 --- a/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/kuberuntime/kuberuntime_manager.go @@ -106,6 +106,11 @@ type podStateProvider interface { ShouldPodRuntimeBeRemoved(kubetypes.UID) bool } +type PodInitContainerTimeRecorder interface { + RecordInitContainerStarted(podUID kubetypes.UID, startedAt time.Time) + RecordInitContainerFinished(podUID kubetypes.UID, finishedAt time.Time) +} + type kubeGenericRuntimeManager struct { runtimeName string recorder record.EventRecorder @@ -191,6 +196,9 @@ type kubeGenericRuntimeManager struct { // Swap controller availability check function (Linux only) // Uses sync.OnceValue for lazy initialization getSwapControllerAvailable func() bool + + // Records first initContainer start time and last initContainer finish time + podInitContainerTimeRecorder PodInitContainerTimeRecorder } // KubeGenericRuntime is a interface contains interfaces for container runtime and command. @@ -240,6 +248,7 @@ func NewKubeGenericRuntimeManager( tracerProvider trace.TracerProvider, tokenManager *token.Manager, getServiceAccount plugin.GetServiceAccountFunc, + podInitContainerTimeRecorder PodInitContainerTimeRecorder, ) (KubeGenericRuntime, []images.PostImageGCHook, error) { logger := klog.FromContext(ctx) @@ -247,29 +256,30 @@ func NewKubeGenericRuntimeManager( imageService = newInstrumentedImageManagerService(imageService) tracer := tracerProvider.Tracer(instrumentationScope) kubeRuntimeManager := &kubeGenericRuntimeManager{ - recorder: recorder, - singleProcessOOMKill: singleProcessOOMKill, - cpuCFSQuota: cpuCFSQuota, - cpuCFSQuotaPeriod: cpuCFSQuotaPeriod, - seccompProfileRoot: filepath.Join(rootDirectory, "seccomp"), - livenessManager: livenessManager, - readinessManager: readinessManager, - startupManager: startupManager, - machineInfo: machineInfo, - osInterface: osInterface, - runtimeHelper: runtimeHelper, - runtimeService: runtimeService, - imageService: imageService, - containerManager: containerManager, - internalLifecycle: containerManager.InternalContainerLifecycle(), - logManager: logManager, - runtimeClassManager: runtimeClassManager, - logReduction: logreduction.NewLogReduction(identicalErrorDelay), - seccompDefault: seccompDefault, - memorySwapBehavior: memorySwapBehavior, - getNodeAllocatable: getNodeAllocatable, - memoryThrottlingFactor: memoryThrottlingFactor, - podLogsDirectory: podLogsDirectory, + recorder: recorder, + singleProcessOOMKill: singleProcessOOMKill, + cpuCFSQuota: cpuCFSQuota, + cpuCFSQuotaPeriod: cpuCFSQuotaPeriod, + seccompProfileRoot: filepath.Join(rootDirectory, "seccomp"), + livenessManager: livenessManager, + readinessManager: readinessManager, + startupManager: startupManager, + machineInfo: machineInfo, + osInterface: osInterface, + runtimeHelper: runtimeHelper, + runtimeService: runtimeService, + imageService: imageService, + containerManager: containerManager, + internalLifecycle: containerManager.InternalContainerLifecycle(), + logManager: logManager, + runtimeClassManager: runtimeClassManager, + logReduction: logreduction.NewLogReduction(identicalErrorDelay), + seccompDefault: seccompDefault, + memorySwapBehavior: memorySwapBehavior, + getNodeAllocatable: getNodeAllocatable, + memoryThrottlingFactor: memoryThrottlingFactor, + podLogsDirectory: podLogsDirectory, + podInitContainerTimeRecorder: podInitContainerTimeRecorder, } // Initialize swap controller availability check with lazy evaluation @@ -1665,6 +1675,14 @@ func (m *kubeGenericRuntimeManager) SyncPod(ctx context.Context, pod *v1.Pod, po } return err } + if typeName == "init container" { + // Don't measure restartable init containers (sidecars) + if !podutil.IsRestartableInitContainer(spec.container) { + if m.podInitContainerTimeRecorder != nil { + m.podInitContainerTimeRecorder.RecordInitContainerStarted(pod.UID, time.Now()) + } + } + } return nil } @@ -1694,6 +1712,21 @@ func (m *kubeGenericRuntimeManager) SyncPod(ctx context.Context, pod *v1.Pod, po logger.V(4).Info("Completed init container for pod", "containerName", container.Name, "pod", klog.KObj(pod)) } + for _, cs := range podStatus.ContainerStatuses { + // Check if this is an init container + for _, init := range pod.Spec.InitContainers { + // Don't measure restartable init containers (sidecars) + if podutil.IsRestartableInitContainer(&init) { + continue + } + if cs.Name == init.Name && cs.State == kubecontainer.ContainerStateExited && !cs.FinishedAt.IsZero() { + if m.podInitContainerTimeRecorder != nil { + m.podInitContainerTimeRecorder.RecordInitContainerFinished(pod.UID, cs.FinishedAt) + } + } + } + } + // Step 7: For containers in podContainerChanges.ContainersToUpdate[CPU,Memory] list, invoke UpdateContainerResources if resizable, _, _ := allocation.IsInPlacePodVerticalScalingAllowed(pod); resizable { if len(podContainerChanges.ContainersToUpdate) > 0 || podContainerChanges.UpdatePodResources || podContainerChanges.UpdatePodLevelResources { diff --git a/vendor/k8s.io/kubernetes/pkg/kubelet/util/pod_startup_latency_tracker.go b/vendor/k8s.io/kubernetes/pkg/kubelet/util/pod_startup_latency_tracker.go index d972815a78..d083464f4d 100644 --- a/vendor/k8s.io/kubernetes/pkg/kubelet/util/pod_startup_latency_tracker.go +++ b/vendor/k8s.io/kubernetes/pkg/kubelet/util/pod_startup_latency_tracker.go @@ -34,6 +34,8 @@ type PodStartupLatencyTracker interface { ObservedPodOnWatch(pod *v1.Pod, when time.Time) RecordImageStartedPulling(podUID types.UID) RecordImageFinishedPulling(podUID types.UID) + RecordInitContainerStarted(podUID types.UID, startedAt time.Time) + RecordInitContainerFinished(podUID types.UID, finishedAt time.Time) RecordStatusUpdated(pod *v1.Pod) DeletePodStartupState(podUID types.UID) } @@ -42,15 +44,26 @@ type basicPodStartupLatencyTracker struct { // protect against concurrent read and write on pods map lock sync.Mutex pods map[types.UID]*perPodState + // Track pods that were excluded from SLI due to unschedulability + // These pods should never be re-added even if they later become schedulable + excludedPods map[types.UID]bool // metrics for the first network pod only firstNetworkPodSeen bool // For testability clock clock.Clock } +type imagePullSession struct { + start time.Time + end time.Time +} type perPodState struct { - firstStartedPulling time.Time - lastFinishedPulling time.Time + // Session-based image pulling tracking for accurate overlap handling + imagePullSessions []imagePullSession + imagePullSessionsStarts []time.Time // Track multiple concurrent pull starts + // Init container tracking + totalInitContainerRuntime time.Duration + currentInitContainerStart time.Time // first time, when pod status changed into Running observedRunningTime time.Time // log, if pod latency was already Observed @@ -60,8 +73,9 @@ type perPodState struct { // NewPodStartupLatencyTracker creates an instance of PodStartupLatencyTracker func NewPodStartupLatencyTracker() PodStartupLatencyTracker { return &basicPodStartupLatencyTracker{ - pods: map[types.UID]*perPodState{}, - clock: clock.RealClock{}, + pods: map[types.UID]*perPodState{}, + excludedPods: map[types.UID]bool{}, + clock: clock.RealClock{}, } } @@ -77,13 +91,29 @@ func (p *basicPodStartupLatencyTracker) ObservedPodOnWatch(pod *v1.Pod, when tim state := p.pods[pod.UID] if state == nil { - // create a new record for pod, only if it was not yet acknowledged by the Kubelet - // this is required, as we want to log metric only for those pods, that where scheduled - // after Kubelet started + // if pod was previously unschedulable, don't track it again + if p.excludedPods[pod.UID] { + return + } + + // create a new record for pod if pod.Status.StartTime.IsZero() { - p.pods[pod.UID] = &perPodState{} + if isPodUnschedulable(pod) { + p.excludedPods[pod.UID] = true + return + } + + // if pod is schedulable then track it + state = &perPodState{} + p.pods[pod.UID] = state } + return + } + // remove existing pods from tracking (this handles cases where scheduling state becomes known later) + if isPodUnschedulable(pod) { + delete(p.pods, pod.UID) + p.excludedPods[pod.UID] = true return } @@ -102,29 +132,74 @@ func (p *basicPodStartupLatencyTracker) ObservedPodOnWatch(pod *v1.Pod, when tim ctx := context.TODO() logger := klog.FromContext(ctx) podStartingDuration := when.Sub(pod.CreationTimestamp.Time) - imagePullingDuration := state.lastFinishedPulling.Sub(state.firstStartedPulling) - podStartSLOduration := (podStartingDuration - imagePullingDuration).Seconds() + podStartSLOduration := podStartingDuration + + totalImagesPullingTime := calculateImagePullingTime(state.imagePullSessions) + if totalImagesPullingTime > 0 { + podStartSLOduration -= totalImagesPullingTime + } + + if state.totalInitContainerRuntime > 0 { + podStartSLOduration -= state.totalInitContainerRuntime + } + + podIsStateful := isStatefulPod(pod) logger.Info("Observed pod startup duration", "pod", klog.KObj(pod), - "podStartSLOduration", podStartSLOduration, + "podStartSLOduration", podStartSLOduration.Seconds(), "podStartE2EDuration", podStartingDuration, + "totalImagesPullingTime", totalImagesPullingTime, + "totalInitContainerRuntime", state.totalInitContainerRuntime, + "isStatefulPod", podIsStateful, "podCreationTimestamp", pod.CreationTimestamp.Time, - "firstStartedPulling", state.firstStartedPulling, - "lastFinishedPulling", state.lastFinishedPulling, + "imagePullSessionsCount", len(state.imagePullSessions), + "imagePullSessionsStartsCount", len(state.imagePullSessionsStarts), "observedRunningTime", state.observedRunningTime, "watchObservedRunningTime", when) - metrics.PodStartSLIDuration.WithLabelValues().Observe(podStartSLOduration) metrics.PodStartTotalDuration.WithLabelValues().Observe(podStartingDuration.Seconds()) + if !podIsStateful { + metrics.PodStartSLIDuration.WithLabelValues().Observe(podStartSLOduration.Seconds()) + // if is the first Pod with network track the start values + // these metrics will help to identify problems with the CNI plugin + if !pod.Spec.HostNetwork && !p.firstNetworkPodSeen { + metrics.FirstNetworkPodStartSLIDuration.Set(podStartSLOduration.Seconds()) + p.firstNetworkPodSeen = true + } + } state.metricRecorded = true - // if is the first Pod with network track the start values - // these metrics will help to identify problems with the CNI plugin - if !pod.Spec.HostNetwork && !p.firstNetworkPodSeen { - metrics.FirstNetworkPodStartSLIDuration.Set(podStartSLOduration) - p.firstNetworkPodSeen = true + } +} + +// calculateImagePullingTime computes the total time spent pulling images, +// accounting for overlapping pull sessions properly +func calculateImagePullingTime(sessions []imagePullSession) time.Duration { + if len(sessions) == 0 { + return 0 + } + + var totalTime time.Duration + var currentEnd time.Time + + for i, session := range sessions { + if session.end.IsZero() { + continue } + + if i == 0 || session.start.After(currentEnd) { + // First session or no overlap with previous session + totalTime += session.end.Sub(session.start) + currentEnd = session.end + } else if session.end.After(currentEnd) { + // Partial overlap - add only the non-overlapping part + totalTime += session.end.Sub(currentEnd) + currentEnd = session.end + } + // If session.end <= currentEnd, it's completely overlapped } + + return totalTime } func (p *basicPodStartupLatencyTracker) RecordImageStartedPulling(podUID types.UID) { @@ -136,9 +211,8 @@ func (p *basicPodStartupLatencyTracker) RecordImageStartedPulling(podUID types.U return } - if state.firstStartedPulling.IsZero() { - state.firstStartedPulling = p.clock.Now() - } + now := p.clock.Now() + state.imagePullSessionsStarts = append(state.imagePullSessionsStarts, now) } func (p *basicPodStartupLatencyTracker) RecordImageFinishedPulling(podUID types.UID) { @@ -150,8 +224,48 @@ func (p *basicPodStartupLatencyTracker) RecordImageFinishedPulling(podUID types. return } - if !state.firstStartedPulling.IsZero() { - state.lastFinishedPulling = p.clock.Now() // Now is always grater than values from the past. + now := p.clock.Now() + + // Complete the oldest pull session if we have active starts + if len(state.imagePullSessionsStarts) > 0 { + // Take the first (oldest) start and create a session + startTime := state.imagePullSessionsStarts[0] + session := imagePullSession{ + start: startTime, + end: now, + } + state.imagePullSessions = append(state.imagePullSessions, session) + state.imagePullSessionsStarts = state.imagePullSessionsStarts[1:] + } +} + +func (p *basicPodStartupLatencyTracker) RecordInitContainerStarted(podUID types.UID, startedAt time.Time) { + p.lock.Lock() + defer p.lock.Unlock() + + state := p.pods[podUID] + if state == nil { + return + } + + state.currentInitContainerStart = startedAt +} + +func (p *basicPodStartupLatencyTracker) RecordInitContainerFinished(podUID types.UID, finishedAt time.Time) { + p.lock.Lock() + defer p.lock.Unlock() + + state := p.pods[podUID] + if state == nil { + return + } + + if !state.currentInitContainerStart.IsZero() { + initDuration := finishedAt.Sub(state.currentInitContainerStart) + if initDuration > 0 { + state.totalInitContainerRuntime += initDuration + } + state.currentInitContainerStart = time.Time{} } } @@ -197,9 +311,43 @@ func hasPodStartedSLO(pod *v1.Pod) bool { return true } +// isStatefulPod determines if a pod is stateful according to the SLI documentation: +// "A stateful pod is defined as a pod that mounts at least one volume with sources +// other than secrets, config maps, downward API and empty dir." +// We also include Projected volumes since they are a collection of ephemeral types. +// ref: https://github.com/kubernetes/community/blob/master/sig-scalability/slos/pod_startup_latency.md +func isStatefulPod(pod *v1.Pod) bool { + for _, volume := range pod.Spec.Volumes { + // Check if this volume is NOT a stateless/ephemeral type + if volume.Secret == nil && + volume.ConfigMap == nil && + volume.DownwardAPI == nil && + volume.EmptyDir == nil && + volume.Projected == nil { + return true + } + } + return false +} + +// isPodUnschedulable determines if a pod should be excluded from SLI tracking +// according to the SLI definition: "By schedulable pod we mean a pod that has to be +// immediately (without actions from any other components) schedulable in the cluster +// without causing any preemption." +// Any pod with PodScheduled=False is not immediately schedulable and should be excluded. +func isPodUnschedulable(pod *v1.Pod) bool { + for _, condition := range pod.Status.Conditions { + if condition.Type == v1.PodScheduled && condition.Status == v1.ConditionFalse { + return true + } + } + return false +} + func (p *basicPodStartupLatencyTracker) DeletePodStartupState(podUID types.UID) { p.lock.Lock() defer p.lock.Unlock() delete(p.pods, podUID) + delete(p.excludedPods, podUID) } diff --git a/vendor/k8s.io/kubernetes/pkg/scheduler/schedule_one.go b/vendor/k8s.io/kubernetes/pkg/scheduler/schedule_one.go index 683bfaed69..fd7d4991d6 100644 --- a/vendor/k8s.io/kubernetes/pkg/scheduler/schedule_one.go +++ b/vendor/k8s.io/kubernetes/pkg/scheduler/schedule_one.go @@ -1081,6 +1081,10 @@ func (sched *Scheduler) handleSchedulingFailure(ctx context.Context, fwk framewo logger.Info("Pod has been assigned to node. Abort adding it back to queue.", "pod", klog.KObj(pod), "node", cachedPod.Spec.NodeName) // We need to call DonePod here because we don't call AddUnschedulableIfNotPresent in this case. } else { + if cachedPod.UID != podInfo.Pod.UID { + logger.V(2).Info("Pod was recreated while handling scheduling failure. Skip requeueing and status updates.", "pod", klog.KObj(pod), "oldUID", podInfo.Pod.UID, "newUID", cachedPod.UID) + return + } // As is from SharedInformer, we need to do a DeepCopy() here. // ignore this err since apiserver doesn't properly validate affinity terms // and we can't fix the validation for backwards compatibility. diff --git a/vendor/modules.txt b/vendor/modules.txt index 49f2ea8378..98a72b8a4f 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -1240,7 +1240,7 @@ gopkg.in/yaml.v2 # gopkg.in/yaml.v3 v3.0.1 ## explicit gopkg.in/yaml.v3 -# k8s.io/api v1.35.4 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/api +# k8s.io/api v1.35.5 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/api ## explicit; go 1.25.0 k8s.io/api/admission/v1 k8s.io/api/admission/v1beta1 @@ -1302,7 +1302,7 @@ k8s.io/api/storage/v1 k8s.io/api/storage/v1alpha1 k8s.io/api/storage/v1beta1 k8s.io/api/storagemigration/v1beta1 -# k8s.io/apiextensions-apiserver v1.35.4 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/apiextensions-apiserver +# k8s.io/apiextensions-apiserver v1.35.5 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/apiextensions-apiserver ## explicit; go 1.25.0 k8s.io/apiextensions-apiserver/pkg/apihelpers k8s.io/apiextensions-apiserver/pkg/apis/apiextensions @@ -1349,7 +1349,7 @@ k8s.io/apiextensions-apiserver/pkg/generated/openapi k8s.io/apiextensions-apiserver/pkg/registry/customresource k8s.io/apiextensions-apiserver/pkg/registry/customresource/tableconvertor k8s.io/apiextensions-apiserver/pkg/registry/customresourcedefinition -# k8s.io/apimachinery v1.35.4 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/apimachinery +# k8s.io/apimachinery v1.35.5 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/apimachinery ## explicit; go 1.25.0 k8s.io/apimachinery/pkg/api/equality k8s.io/apimachinery/pkg/api/errors @@ -1431,7 +1431,7 @@ k8s.io/apimachinery/pkg/watch k8s.io/apimachinery/third_party/forked/golang/json k8s.io/apimachinery/third_party/forked/golang/netutil k8s.io/apimachinery/third_party/forked/golang/reflect -# k8s.io/apiserver v1.35.4 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/apiserver +# k8s.io/apiserver v1.35.5 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/apiserver ## explicit; go 1.25.0 k8s.io/apiserver/pkg/admission k8s.io/apiserver/pkg/admission/configuration @@ -1625,13 +1625,13 @@ k8s.io/apiserver/plugin/pkg/authenticator/token/oidc k8s.io/apiserver/plugin/pkg/authenticator/token/webhook k8s.io/apiserver/plugin/pkg/authorizer/webhook k8s.io/apiserver/plugin/pkg/authorizer/webhook/metrics -# k8s.io/cli-runtime v1.35.4 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/cli-runtime +# k8s.io/cli-runtime v1.35.5 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/cli-runtime ## explicit; go 1.25.0 k8s.io/cli-runtime/pkg/genericclioptions k8s.io/cli-runtime/pkg/genericiooptions k8s.io/cli-runtime/pkg/printers k8s.io/cli-runtime/pkg/resource -# k8s.io/client-go v1.35.4 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/client-go +# k8s.io/client-go v1.35.5 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/client-go ## explicit; go 1.25.0 k8s.io/client-go/applyconfigurations k8s.io/client-go/applyconfigurations/admissionregistration/v1 @@ -2000,7 +2000,7 @@ k8s.io/client-go/util/keyutil k8s.io/client-go/util/retry k8s.io/client-go/util/watchlist k8s.io/client-go/util/workqueue -# k8s.io/cloud-provider v1.35.4 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/cloud-provider +# k8s.io/cloud-provider v1.35.5 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/cloud-provider ## explicit; go 1.25.0 k8s.io/cloud-provider k8s.io/cloud-provider/api @@ -2018,14 +2018,14 @@ k8s.io/cloud-provider/service/helpers k8s.io/cloud-provider/volume k8s.io/cloud-provider/volume/errors k8s.io/cloud-provider/volume/helpers -# k8s.io/cluster-bootstrap v1.35.4 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/cluster-bootstrap +# k8s.io/cluster-bootstrap v1.35.5 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/cluster-bootstrap ## explicit; go 1.25.0 k8s.io/cluster-bootstrap/token/api k8s.io/cluster-bootstrap/token/jws k8s.io/cluster-bootstrap/token/util k8s.io/cluster-bootstrap/util/secrets k8s.io/cluster-bootstrap/util/tokens -# k8s.io/component-base v1.35.4 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/component-base +# k8s.io/component-base v1.35.5 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/component-base ## explicit; go 1.25.0 k8s.io/component-base/cli k8s.io/component-base/cli/flag @@ -2062,7 +2062,7 @@ k8s.io/component-base/tracing/api/v1 k8s.io/component-base/version k8s.io/component-base/version/verflag k8s.io/component-base/zpages/features -# k8s.io/component-helpers v1.35.4 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/component-helpers +# k8s.io/component-helpers v1.35.5 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/component-helpers ## explicit; go 1.25.0 k8s.io/component-helpers/apimachinery/lease k8s.io/component-helpers/apps/poddisruptionbudget @@ -2080,7 +2080,7 @@ k8s.io/component-helpers/scheduling/corev1 k8s.io/component-helpers/scheduling/corev1/nodeaffinity k8s.io/component-helpers/storage/ephemeral k8s.io/component-helpers/storage/volume -# k8s.io/controller-manager v1.35.4 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/controller-manager +# k8s.io/controller-manager v1.35.5 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/controller-manager ## explicit; go 1.25.0 k8s.io/controller-manager/app k8s.io/controller-manager/config @@ -2097,22 +2097,22 @@ k8s.io/controller-manager/pkg/informerfactory k8s.io/controller-manager/pkg/leadermigration k8s.io/controller-manager/pkg/leadermigration/config k8s.io/controller-manager/pkg/leadermigration/options -# k8s.io/cri-api v1.35.4 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/cri-api +# k8s.io/cri-api v1.35.5 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/cri-api ## explicit; go 1.25.0 k8s.io/cri-api/pkg/apis k8s.io/cri-api/pkg/apis/runtime/v1 k8s.io/cri-api/pkg/errors -# k8s.io/cri-client v1.35.4 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/cri-client +# k8s.io/cri-client v1.35.5 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/cri-client ## explicit; go 1.25.0 k8s.io/cri-client/pkg k8s.io/cri-client/pkg/internal k8s.io/cri-client/pkg/logs k8s.io/cri-client/pkg/util -# k8s.io/csi-translation-lib v1.35.4 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/csi-translation-lib +# k8s.io/csi-translation-lib v1.35.5 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/csi-translation-lib ## explicit; go 1.25.0 k8s.io/csi-translation-lib k8s.io/csi-translation-lib/plugins -# k8s.io/dynamic-resource-allocation v1.35.4 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/dynamic-resource-allocation +# k8s.io/dynamic-resource-allocation v1.35.5 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/dynamic-resource-allocation ## explicit; go 1.25.0 k8s.io/dynamic-resource-allocation/api k8s.io/dynamic-resource-allocation/cel @@ -2125,14 +2125,14 @@ k8s.io/dynamic-resource-allocation/structured/internal/experimental k8s.io/dynamic-resource-allocation/structured/internal/incubating k8s.io/dynamic-resource-allocation/structured/internal/stable k8s.io/dynamic-resource-allocation/structured/schedulerapi -# k8s.io/endpointslice v1.35.4 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/endpointslice +# k8s.io/endpointslice v1.35.5 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/endpointslice ## explicit; go 1.25.0 k8s.io/endpointslice k8s.io/endpointslice/metrics k8s.io/endpointslice/topologycache k8s.io/endpointslice/trafficdist k8s.io/endpointslice/util -# k8s.io/externaljwt v1.35.4 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/externaljwt +# k8s.io/externaljwt v1.35.5 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/externaljwt ## explicit; go 1.25.0 k8s.io/externaljwt/apis/v1 # k8s.io/gengo/v2 v2.0.0-20250922181213-ec3ebc5fd46b @@ -2154,13 +2154,13 @@ k8s.io/klog/v2/internal/severity k8s.io/klog/v2/internal/sloghandler k8s.io/klog/v2/internal/verbosity k8s.io/klog/v2/textlogger -# k8s.io/kms v1.35.4 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/kms +# k8s.io/kms v1.35.5 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/kms ## explicit; go 1.25.0 k8s.io/kms/apis/v1beta1 k8s.io/kms/apis/v2 k8s.io/kms/pkg/service k8s.io/kms/pkg/util -# k8s.io/kube-aggregator v1.35.4 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/kube-aggregator +# k8s.io/kube-aggregator v1.35.5 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/kube-aggregator ## explicit; go 1.25.0 k8s.io/kube-aggregator/pkg/apis/apiregistration k8s.io/kube-aggregator/pkg/apis/apiregistration/install @@ -2193,7 +2193,7 @@ k8s.io/kube-aggregator/pkg/controllers/status/remote k8s.io/kube-aggregator/pkg/registry/apiservice k8s.io/kube-aggregator/pkg/registry/apiservice/etcd k8s.io/kube-aggregator/pkg/registry/apiservice/rest -# k8s.io/kube-controller-manager v1.35.4 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/kube-controller-manager +# k8s.io/kube-controller-manager v1.35.5 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/kube-controller-manager ## explicit; go 1.25.0 k8s.io/kube-controller-manager/config/v1alpha1 # k8s.io/kube-openapi v0.0.0-20260304202019-5b3e3fdb0acf @@ -2227,15 +2227,15 @@ k8s.io/kube-openapi/pkg/validation/spec k8s.io/kube-openapi/pkg/validation/strfmt k8s.io/kube-openapi/pkg/validation/strfmt/bson k8s.io/kube-openapi/pkg/validation/validate -# k8s.io/kube-proxy v1.35.4 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/kube-proxy +# k8s.io/kube-proxy v1.35.5 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/kube-proxy ## explicit; go 1.25.0 k8s.io/kube-proxy/config/v1alpha1 -# k8s.io/kube-scheduler v1.35.4 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/kube-scheduler +# k8s.io/kube-scheduler v1.35.5 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/kube-scheduler ## explicit; go 1.25.0 k8s.io/kube-scheduler/config/v1 k8s.io/kube-scheduler/extender/v1 k8s.io/kube-scheduler/framework -# k8s.io/kubectl v1.35.4 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/kubectl +# k8s.io/kubectl v1.35.5 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/kubectl ## explicit; go 1.25.0 k8s.io/kubectl/pkg/apps k8s.io/kubectl/pkg/cmd/apiresources @@ -2273,7 +2273,7 @@ k8s.io/kubectl/pkg/util/storage k8s.io/kubectl/pkg/util/templates k8s.io/kubectl/pkg/util/term k8s.io/kubectl/pkg/validation -# k8s.io/kubelet v1.35.4 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/kubelet +# k8s.io/kubelet v1.35.5 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/kubelet ## explicit; go 1.25.0 k8s.io/kubelet/config/v1 k8s.io/kubelet/config/v1alpha1 @@ -2296,7 +2296,7 @@ k8s.io/kubelet/pkg/cri/streaming k8s.io/kubelet/pkg/cri/streaming/portforward k8s.io/kubelet/pkg/cri/streaming/remotecommand k8s.io/kubelet/pkg/types -# k8s.io/kubernetes v1.35.4 => ./deps/github.com/openshift/kubernetes +# k8s.io/kubernetes v1.35.5 => ./deps/github.com/openshift/kubernetes ## explicit; go 1.25.0 k8s.io/kubernetes/cmd/kube-apiserver/app k8s.io/kubernetes/cmd/kube-apiserver/app/options @@ -3140,7 +3140,7 @@ k8s.io/kubernetes/third_party/forked/gonum/graph/simple k8s.io/kubernetes/third_party/forked/gonum/graph/traverse k8s.io/kubernetes/third_party/forked/libcontainer/apparmor k8s.io/kubernetes/third_party/forked/libcontainer/utils -# k8s.io/metrics v1.35.4 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/metrics +# k8s.io/metrics v1.35.5 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/metrics ## explicit; go 1.25.0 k8s.io/metrics/pkg/apis/custom_metrics k8s.io/metrics/pkg/apis/custom_metrics/v1beta1 @@ -3155,10 +3155,10 @@ k8s.io/metrics/pkg/client/clientset/versioned/typed/metrics/v1beta1 k8s.io/metrics/pkg/client/custom_metrics k8s.io/metrics/pkg/client/custom_metrics/scheme k8s.io/metrics/pkg/client/external_metrics -# k8s.io/mount-utils v1.35.4 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/mount-utils +# k8s.io/mount-utils v1.35.5 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/mount-utils ## explicit; go 1.25.0 k8s.io/mount-utils -# k8s.io/pod-security-admission v1.35.4 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/pod-security-admission +# k8s.io/pod-security-admission v1.35.5 => ./deps/github.com/openshift/kubernetes/staging/src/k8s.io/pod-security-admission ## explicit; go 1.25.0 k8s.io/pod-security-admission/admission k8s.io/pod-security-admission/admission/api