From ddf272808e73a4b2d5f9c6a4c2020d914fdefa41 Mon Sep 17 00:00:00 2001
From: Yan Sun <Yan.Sun3@amd.com>
Date: Sat, 11 Apr 2026 11:46:13 -0700
Subject: [PATCH] Fix helm e2e test for remediation workflow ConfigMapImage
 field (#1329)

The test "upgrade with rendering spec.remediationWorkflow" was failing
because it didn't expect the ConfigMapImage field in the DeviceConfigSpec.
The helm chart sets a default value for configMapImage in values.yaml,
which gets rendered into the DeviceConfig CR.

Updated the expected spec to include the ConfigMapImage field with the
default value from the helm chart.

Also added E2E_ANR_CONFIGMAP_IMAGE to the e2e test Makefile for
consistency with other configurable image variables.

Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
(cherry picked from commit d24f89f246581892a281bc689d9107a0917438f2)
---
 hack/k8s-patch/metadata-patch/values.yaml | 2 ++
 helm-charts-k8s/README.md                 | 3 +--
 helm-charts-k8s/values.yaml               | 2 ++
 tests/e2e/Makefile                        | 2 ++
 tests/helm-e2e/helm_e2e_test.go           | 1 +
 5 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/hack/k8s-patch/metadata-patch/values.yaml b/hack/k8s-patch/metadata-patch/values.yaml
index 195901c8..ce550d24 100644
--- a/hack/k8s-patch/metadata-patch/values.yaml
+++ b/hack/k8s-patch/metadata-patch/values.yaml
@@ -300,6 +300,8 @@ deviceConfig:
       nodeDrainPolicy: {}
       # -- Enable/disable automatic workflow start on node issues
       autoStartWorkflow: true
+      # -- Container image used to create the remediation ConfigMap. This image contains the default remediation ConfigMap configmap.yaml file.
+      configMapImage: ""
 # AMD GPU operator controller related configs
 controllerManager:
   manager:
diff --git a/helm-charts-k8s/README.md b/helm-charts-k8s/README.md
index 09885c1b..8553d92f 100644
--- a/helm-charts-k8s/README.md
+++ b/helm-charts-k8s/README.md
@@ -123,8 +123,6 @@ For bugs and feature requests, please file an issue on our [GitHub Issues](https
 
 The AMD GPU Operator is licensed under the [Apache License 2.0](LICENSE).
 
-## gpu-operator-charts
-
 ![Version: v0.0.1](https://img.shields.io/badge/Version-v0.0.1-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: dev](https://img.shields.io/badge/AppVersion-dev-informational?style=flat-square)
 
 AMD GPU Operator simplifies the deployment and management of AMD Instinct GPU accelerators within Kubernetes clusters.
@@ -253,6 +251,7 @@ Kubernetes: `>= 1.29.0-0`
 | deviceConfig.spec.metricsExporter.upgradePolicy.upgradeStrategy | string | `"RollingUpdate"` | the type of daemonset upgrade, RollingUpdate or OnDelete |
 | deviceConfig.spec.remediationWorkflow.autoStartWorkflow | bool | `true` | Enable/disable automatic workflow start on node issues |
 | deviceConfig.spec.remediationWorkflow.config | object | `{}` | Configuration for remediation workflow |
+| deviceConfig.spec.remediationWorkflow.configMapImage | string | `""` | Container image used to create the remediation ConfigMap. This image contains the default remediation ConfigMap configmap.yaml file. |
 | deviceConfig.spec.remediationWorkflow.enable | bool | `false` | enable/disable remediation workflow controller |
 | deviceConfig.spec.remediationWorkflow.maxParallelWorkflows | int | `0` | Set maximum number of remediation workflows that can run in parallel. Default is 0 which means no limit |
 | deviceConfig.spec.remediationWorkflow.nodeDrainPolicy | object | `{}` | Policy for draining nodes during remediation |
diff --git a/helm-charts-k8s/values.yaml b/helm-charts-k8s/values.yaml
index 195901c8..ce550d24 100644
--- a/helm-charts-k8s/values.yaml
+++ b/helm-charts-k8s/values.yaml
@@ -300,6 +300,8 @@ deviceConfig:
       nodeDrainPolicy: {}
       # -- Enable/disable automatic workflow start on node issues
       autoStartWorkflow: true
+      # -- Container image used to create the remediation ConfigMap. This image contains the default remediation ConfigMap configmap.yaml file.
+      configMapImage: ""
 # AMD GPU operator controller related configs
 controllerManager:
   manager:
diff --git a/tests/e2e/Makefile b/tests/e2e/Makefile
index 3dea053a..0f5ff001 100644
--- a/tests/e2e/Makefile
+++ b/tests/e2e/Makefile
@@ -18,6 +18,7 @@ E2E_TEST_RUNNER_IMAGE ?= rocm/test-runner:v1.4.0
 E2E_KUBEVIRT_DEVICE_PLUGIN_IMAGE ?= rocm/k8s-device-plugin:latest
 E2E_KUBEVIRT_NODE_LABELLER_IMAGE ?= rocm/k8s-device-plugin:labeller-latest
 E2E_UTILS_CONTAINER_IMAGE ?= docker.io/rocm/gpu-operator-utils:v1.4.0
+E2E_ANR_CONFIGMAP_IMAGE ?= docker.io/rocm/amd-gpu-operator-remediation-config-utils:latest
 E2E_NODE_DIAG_IMAGE ?= busybox:1.36
 E2E_DRA_DRIVER_IMAGE ?= rocm/k8s-gpu-dra-driver:latest
 
@@ -38,6 +39,7 @@ export E2E_AGFHC_TEST_RUNNER_IMAGE
 export E2E_KUBEVIRT_DEVICE_PLUGIN_IMAGE
 export E2E_KUBEVIRT_NODE_LABELLER_IMAGE
 export E2E_UTILS_CONTAINER_IMAGE
+export E2E_ANR_CONFIGMAP_IMAGE
 export E2E_NODE_DIAG_IMAGE
 export E2E_DRA_DRIVER_IMAGE
 
diff --git a/tests/helm-e2e/helm_e2e_test.go b/tests/helm-e2e/helm_e2e_test.go
index 8aed527a..162159d3 100644
--- a/tests/helm-e2e/helm_e2e_test.go
+++ b/tests/helm-e2e/helm_e2e_test.go
@@ -1002,6 +1002,7 @@ deviceConfig:
 					TtlForFailedWorkflows: "36h",
 					TesterImage:           "test.io/test/remediation-workflow-tester:v1.3.0",
 					AutoStartWorkflow:     &boolTrue,
+					ConfigMapImage:        "",
 				},
 			},
 			verifyFunc: s.verifyRemediationWorkflow,