Skip to content

Commit 786f5cf

Browse files
authored
Update Hyper-V mutating webhook to controller-runtime v0.19 (#546)
* Update Hyper-V mutating webhook to controller-runtime v0.19 - Migrate webhook from raw kubebuilder to controller-runtime v0.19 API - Update Go to 1.24, k8s deps to v0.31.0 - Fix RuntimeClassName nil check and decoder type in webhook.go - Replace standalone deployment.yaml with Helm chart support - Make Helm deployment.yaml args and certMountPath configurable - Add values-hyperv.yaml for controller-runtime webhook overrides - Use existing webhook image (sigwindowstools/hyperv-runtimeclass-mutating-webhook) * Fix Hyper-V E2E: pin containerd to v2.1.6 and fix webhook scheduling - Pin containerd to v2.1.6 when HYPERV=true to avoid SandboxPlatform validation bug in containerd >= 2.2.1 (hcsshim >= v0.14.0-rc.1) - Move apply_hyperv_configuration after apply_hpc_webhook to avoid cert-manager conflict - Deploy hyperv webhook via Helm chart instead of standalone manifests - Fix webhook pod scheduling: untaint/retaint control-plane nodes * Exclude Helm templates from yamllint Helm template files use Go template syntax ({{ }}, {{- }}) which is not valid YAML. yamllint cannot parse these and reports false syntax errors. The helm-chart-validation workflow already validates charts with helm lint.
1 parent 0b964c0 commit 786f5cf

File tree

11 files changed

+270
-1059
lines changed

11 files changed

+270
-1059
lines changed

.yamllint.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
---
2+
ignore: |
3+
helpers/helm/templates/

capz/run-capz-e2e.sh

Lines changed: 26 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,24 @@ main() {
3030
export WINDOWS_CONTAINERD_URL="${WINDOWS_CONTAINERD_URL:-"https://github.com/containerd/containerd/releases/download/v1.7.16/containerd-1.7.16-windows-amd64.tar.gz"}"
3131
export GMSA="${GMSA:-""}"
3232
export HYPERV="${HYPERV:-""}"
33+
34+
# Pin containerd to v2.1.6 for Hyper-V testing to avoid hcsshim SandboxPlatform
35+
# validation bug in containerd >= 2.2.1 (hcsshim >= v0.14.0-rc.1).
36+
#
37+
# Root cause: containerd's config_windows.go sets SandboxIsolation=1 for the
38+
# runhcs-wcow-hypervisor runtime but omits SandboxPlatform, making shim options
39+
# non-empty. hcsshim PR #2473 added strict validation that calls
40+
# platforms.Parse("") on the empty SandboxPlatform, which fails. containerd
41+
# v2.1.6 bundles hcsshim v0.13.0 (pre-dates this validation).
42+
#
43+
# Upstream references:
44+
# - hcsshim validation: https://github.com/microsoft/hcsshim/pull/2473
45+
# - containerd missing default: https://github.com/containerd/containerd/blob/main/internal/cri/config/config_windows.go
46+
if [[ "${HYPERV}" == "true" && ("${WINDOWS_CONTAINERD_URL}" == "latest" || -z "${WINDOWS_CONTAINERD_URL}") ]]; then
47+
export WINDOWS_CONTAINERD_URL="https://github.com/containerd/containerd/releases/download/v2.1.6/containerd-2.1.6-windows-amd64.tar.gz"
48+
log "HYPERV=true: pinning containerd to v2.1.6 to avoid SandboxPlatform bug"
49+
fi
50+
3351
export KPNG="${WINDOWS_KPNG:-""}"
3452
export CALICO_VERSION="${CALICO_VERSION:-"v3.31.0"}"
3553
export TEMPLATE="${TEMPLATE:-"windows-ci.yaml"}"
@@ -66,8 +84,6 @@ main() {
6684
wait_for_nodes
6785
ensure_cloud_provider_taint_on_windows_nodes
6886
wait_for_windows_machinedeployment
69-
if [[ "${HYPERV}" == "true" ]]; then apply_hyperv_configuration; fi
70-
7187
if [[ ${#post_command[@]} -gt 0 ]]; then
7288
local exit_code
7389
log "post command detected; skipping default e2e tests"
@@ -77,6 +93,7 @@ main() {
7793
fi
7894

7995
apply_hpc_webhook
96+
if [[ "${HYPERV}" == "true" ]]; then apply_hyperv_configuration; fi
8097
run_e2e_test
8198
}
8299

@@ -476,41 +493,25 @@ apply_hpc_webhook(){
476493
}
477494

478495
apply_hyperv_configuration(){
479-
set -x
480496
log "applying configuration for testing hyperv isolated containers"
481497

482-
log "installing hyperv runtime class"
483-
kubectl apply -f "${SCRIPT_ROOT}/../helpers/hyper-v-mutating-webhook/hyperv-runtimeclass.yaml"
484-
485-
# ensure cert-manager and webhook pods land on Linux nodes
498+
# ensure webhook pod lands on Linux control-plane node
486499
log "untainting control-plane nodes"
487500
mapfile -t cp_nodes < <(kubectl get nodes | grep control-plane | awk '{print $1}')
488501
kubectl taint nodes "${cp_nodes[@]}" node-role.kubernetes.io/control-plane:NoSchedule- || true
489502

490-
log "tainting windows nodes"
491-
mapfile -t windows_nodes < <(kubectl get nodes -o wide | grep Windows | awk '{print $1}')
492-
kubectl taint nodes "${windows_nodes[@]}" os=windows:NoSchedule
493-
494-
log "installing cert-manager"
495-
kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.14.5/cert-manager.yaml
496-
497-
log "wait for cert-manager pods to start"
498-
timeout 5m kubectl wait --for=condition=ready pod --all -n cert-manager --timeout -1s
499-
500-
log "installing admission controller webhook"
501-
kubectl apply -f "${SCRIPT_ROOT}/../helpers/hyper-v-mutating-webhook/deployment.yaml"
502-
503-
log "wait for webhook pods to start"
504-
timeout 5m kubectl wait --for=condition=ready pod --all -n hyperv-webhook-system --timeout -1s
503+
log "installing hyperv webhook via helm"
504+
"$TOOLS_BIN_DIR"/helm install hyperv-webhook "${SCRIPT_ROOT}/../helpers/helm" \
505+
-f "${SCRIPT_ROOT}/../helpers/helm/values-hyperv.yaml" \
506+
--create-namespace
505507

506-
log "untainting Windows agent nodes"
507-
kubectl taint nodes "${windows_nodes[@]}" os=windows:NoSchedule-
508+
log "wait for hyperv webhook pods to start"
509+
timeout 5m kubectl wait --for=condition=ready pod --all -n hyperv-webhook --timeout -1s
508510

509511
log "tainting control-plane nodes again"
510512
kubectl taint nodes "${cp_nodes[@]}" node-role.kubernetes.io/control-plane:NoSchedule || true
511513

512514
log "done configuring testing for hyperv isolated containers"
513-
set +x
514515
}
515516

516517
run_post_command() {

helpers/helm/templates/deployment.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,10 @@ spec:
3939
{{- toYaml .Values.deployment.securityContext | nindent 12 }}
4040
image: "{{ include "webhook.imageRepository" . }}:{{ .Values.deployment.image.tag | default .Chart.AppVersion }}"
4141
imagePullPolicy: {{ .Values.deployment.image.pullPolicy }}
42+
{{- with .Values.deployment.args }}
4243
args:
43-
- --tls-cert-file=/etc/webhook/certs/tls.crt
44-
- --tls-private-key-file=/etc/webhook/certs/tls.key
45-
- --port={{ .Values.deployment.service.targetPort }}
44+
{{- toYaml . | nindent 10 }}
45+
{{- end }}
4646
ports:
4747
- name: webhook
4848
containerPort: {{ .Values.deployment.service.targetPort }}
@@ -58,7 +58,7 @@ spec:
5858
{{- end }}
5959
volumeMounts:
6060
- name: webhook-certs
61-
mountPath: /etc/webhook/certs
61+
mountPath: {{ .Values.deployment.certMountPath | default "/etc/webhook/certs" }}
6262
readOnly: true
6363
{{- with .Values.deployment.livenessProbe }}
6464
livenessProbe:

helpers/helm/templates/mutatingwebhookconfiguration.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ webhooks:
1616
service:
1717
name: {{ include "webhook.fullname" . }}
1818
namespace: {{ include "webhook.namespace" . }}
19-
path: "/mutate"
19+
path: {{ .Values.webhookConfiguration.path | default "/mutate" | quote }}
2020
port: {{ .Values.deployment.service.port }}
2121
{{- if not .Values.certificate.useCertManager }}
2222
caBundle: {{ .Values.certificate.manual.caCert | b64enc }}

helpers/helm/values-hyperv.yaml

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,43 @@ webhookType: hyperv
1010

1111
deployment:
1212
image:
13-
# Uses default registry from values.yaml: sigwindowstools
14-
# Override with: --set deployment.image.registry=myregistry.io/myorg
13+
repository: sigwindowstools/hyperv-runtimeclass-mutating-webhook
1514
tag: "" # Uses chart appVersion by default, override with --set deployment.image.tag=$VERSION
1615

16+
# controller-runtime manages TLS and flags internally, no args needed
17+
args: []
18+
19+
# controller-runtime default cert directory
20+
certMountPath: /tmp/k8s-webhook-server/serving-certs
21+
22+
service:
23+
targetPort: 9443
24+
25+
# controller-runtime serves health checks on a separate HTTP port
26+
livenessProbe:
27+
httpGet:
28+
path: /healthz
29+
port: 8081
30+
scheme: HTTP
31+
initialDelaySeconds: 15
32+
periodSeconds: 20
33+
timeoutSeconds: 1
34+
successThreshold: 1
35+
failureThreshold: 3
36+
37+
readinessProbe:
38+
httpGet:
39+
path: /readyz
40+
port: 8081
41+
scheme: HTTP
42+
initialDelaySeconds: 5
43+
periodSeconds: 10
44+
timeoutSeconds: 1
45+
successThreshold: 1
46+
failureThreshold: 3
47+
1748
webhookConfiguration:
49+
path: /mutate-v1-pod
1850
objectSelector:
1951
matchLabels:
2052
hyperv-isolation: "true"

helpers/helm/values.yaml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,17 @@ deployment:
3434
tag: "" # If empty, defaults to chart appVersion
3535

3636
imagePullSecrets: []
37+
38+
# Container args passed to the webhook binary
39+
# Default args are for the HPC webhook pattern (standalone TLS server)
40+
# Override in values-hyperv.yaml for controller-runtime based webhooks
41+
args:
42+
- --tls-cert-file=/etc/webhook/certs/tls.crt
43+
- --tls-private-key-file=/etc/webhook/certs/tls.key
44+
- --port=8443
45+
46+
# Path where TLS certificates are mounted in the container
47+
certMountPath: /etc/webhook/certs
3748

3849
service:
3950
type: ClusterIP

0 commit comments

Comments
 (0)