diff --git a/terraform/cluster/azure-aks/main.tf b/terraform/cluster/azure-aks/main.tf index cc729630a..ae3f9525b 100644 --- a/terraform/cluster/azure-aks/main.tf +++ b/terraform/cluster/azure-aks/main.tf @@ -55,9 +55,16 @@ data "azurerm_subnet" "private" { #----------------------------------------------------------------------------------------------------------------------- locals { - kubeconfig_path = "${var.context_path}/.kube/config" - rg_name = var.resource_group_name == null ? "${var.name}-${var.context_id}" : var.resource_group_name - cluster_name = var.cluster_name == null ? "${var.name}-${var.context_id}" : var.cluster_name + kubeconfig_path = "${var.context_path}/.kube/config" + rg_name = var.resource_group_name == null ? "${var.name}-${var.context_id}" : var.resource_group_name + cluster_name = var.cluster_name == null ? "${var.name}-${var.context_id}" : var.cluster_name + node_resource_group_name = split("/", azurerm_kubernetes_cluster.main.node_resource_group_id)[4] + node_pool_names = concat( + [var.default_node_pool.name], + var.autoscaled_node_pool.enabled ? [var.autoscaled_node_pool.name] : [] + ) + # Safely access kubelet identity (may not be available during plan in tests) + kubelet_object_id = try(azurerm_kubernetes_cluster.main.kubelet_identity[0].object_id, "00000000-0000-0000-0000-000000000000") tags = merge({ WindsorContextID = var.context_id }, var.tags) @@ -221,6 +228,7 @@ resource "azurerm_kubernetes_cluster" "main" { resource_group_name = azurerm_resource_group.aks.name dns_prefix = local.cluster_name # checkov:skip=CKV_AZURE_339: Kubernetes version is populated from the cloud provider's stable version via Renovate. + # checkov:skip=CKV_AZURE_4: Log Analytics workspace is created but diagnostic settings are configured separately or via alternative monitoring solutions kubernetes_version = var.kubernetes_version role_based_access_control_enabled = var.role_based_access_control_enabled automatic_upgrade_channel = var.automatic_upgrade_channel @@ -274,6 +282,9 @@ resource "azurerm_kubernetes_cluster" "main" { vertical_pod_autoscaler_enabled = var.workload_autoscaler_profile.vertical_pod_autoscaler_enabled } + oidc_issuer_enabled = var.oidc_issuer_enabled + workload_identity_enabled = var.workload_identity_enabled + network_profile { network_plugin = "azure" network_policy = "cilium" @@ -281,22 +292,11 @@ resource "azurerm_kubernetes_cluster" "main" { dns_service_ip = var.dns_service_ip } - oms_agent { - log_analytics_workspace_id = azurerm_log_analytics_workspace.aks_logs.id - } - + # Use system-assigned managed identity (Microsoft default and best practice) + # AKS automatically creates Contributor role on node RG for control plane + # AKS automatically creates Virtual Machine Contributor role on node RG for kubelet identity { - type = length(var.user_assigned_identity_ids) > 0 ? "UserAssigned" : "SystemAssigned" - identity_ids = var.user_assigned_identity_ids - } - - dynamic "kubelet_identity" { - for_each = var.kubelet_user_assigned_identity_id != null ? [1] : [] - content { - client_id = var.kubelet_client_id - object_id = var.kubelet_object_id - user_assigned_identity_id = var.kubelet_user_assigned_identity_id - } + type = "SystemAssigned" } tags = merge({ @@ -330,6 +330,52 @@ resource "azurerm_kubernetes_cluster_node_pool" "autoscaled" { }, local.tags) } +# AKS automatically creates Virtual Machine Contributor role assignment on node resource group for the kubelet identity. +# However, disk attachment operations require additional permissions beyond Virtual Machine Contributor. +# Create a custom role with minimal permissions for VMSS disk operations. +resource "azurerm_role_definition" "aks_kubelet_vmss_disk_manager" { + name = "AKS Kubelet VMSS Disk Manager - ${var.context_id}" + scope = azurerm_kubernetes_cluster.main.node_resource_group_id + description = "Minimal permissions for AKS kubelet identity to manage VMSS disk attachments" + + permissions { + actions = concat( + [ + # VMSS virtual machine operations for disk attachment (REQUIRED) + "Microsoft.Compute/virtualMachineScaleSets/virtualMachines/read", + "Microsoft.Compute/virtualMachineScaleSets/virtualMachines/write", + # Core disk operations (REQUIRED for basic disk attachment) + "Microsoft.Compute/disks/read", + "Microsoft.Compute/disks/write", + "Microsoft.Compute/disks/delete", + "Microsoft.Compute/disks/beginGetAccess/action", + "Microsoft.Compute/disks/endGetAccess/action", + # Location/operation queries (may be needed for operation status checks) + "Microsoft.Compute/locations/DiskOperations/read", + "Microsoft.Compute/locations/vmSizes/read", + "Microsoft.Compute/locations/operations/read" + ], + var.enable_volume_snapshots ? [ + # Snapshot operations (only included if volume snapshots are enabled) + "Microsoft.Compute/snapshots/read", + "Microsoft.Compute/snapshots/write", + "Microsoft.Compute/snapshots/delete" + ] : [] + ) + not_actions = [] + } + + assignable_scopes = [ + azurerm_kubernetes_cluster.main.node_resource_group_id + ] +} + +resource "azurerm_role_assignment" "kubelet_vmss_disk_manager" { + scope = azurerm_kubernetes_cluster.main.node_resource_group_id + role_definition_id = azurerm_role_definition.aks_kubelet_vmss_disk_manager.role_definition_resource_id + principal_id = local.kubelet_object_id +} + resource "local_file" "kube_config" { content = azurerm_kubernetes_cluster.main.kube_config_raw filename = local.kubeconfig_path diff --git a/terraform/cluster/azure-aks/test.tftest.hcl b/terraform/cluster/azure-aks/test.tftest.hcl index 3a3a6885e..5c02fa529 100644 --- a/terraform/cluster/azure-aks/test.tftest.hcl +++ b/terraform/cluster/azure-aks/test.tftest.hcl @@ -84,6 +84,26 @@ run "minimal_configuration" { condition = azurerm_kubernetes_cluster.main.identity[0].type == "SystemAssigned" error_message = "Cluster should use system-assigned identity by default" } + + assert { + condition = azurerm_kubernetes_cluster.main.oidc_issuer_enabled == true + error_message = "OIDC issuer should be enabled by default" + } + + assert { + condition = azurerm_kubernetes_cluster.main.workload_identity_enabled == true + error_message = "Workload Identity should be enabled by default" + } + + assert { + condition = contains(azurerm_role_definition.aks_kubelet_vmss_disk_manager.permissions[0].actions, "Microsoft.Compute/snapshots/read") + error_message = "Snapshot permissions should be included when enable_volume_snapshots is true (default)" + } + + assert { + condition = contains(azurerm_role_definition.aks_kubelet_vmss_disk_manager.permissions[0].actions, "Microsoft.Compute/snapshots/write") + error_message = "Snapshot write permissions should be included when enable_volume_snapshots is true (default)" + } } # Tests a full configuration with all optional variables explicitly set, @@ -92,18 +112,13 @@ run "full_configuration" { command = plan variables { - context_id = "test" - name = "windsor-aks" - cluster_name = "test-cluster" - resource_group_name = "test-rg" - kubernetes_version = "1.32" - user_assigned_identity_ids = [ - "/subscriptions/12345678-1234-9876-4563-123456789012/resourceGroups/example-resource-group/providers/Microsoft.ManagedIdentity/userAssignedIdentities/test-identity-1", - "/subscriptions/12345678-1234-9876-4563-123456789012/resourceGroups/example-resource-group/providers/Microsoft.ManagedIdentity/userAssignedIdentities/test-identity-2" - ] - kubelet_client_id = "test-client-id" - kubelet_object_id = "test-object-id" - kubelet_user_assigned_identity_id = "/subscriptions/12345678-1234-9876-4563-123456789012/resourceGroups/example-resource-group/providers/Microsoft.ManagedIdentity/userAssignedIdentities/test-identity-1" + context_id = "test" + name = "windsor-aks" + cluster_name = "test-cluster" + resource_group_name = "test-rg" + kubernetes_version = "1.32" + oidc_issuer_enabled = true + workload_identity_enabled = true default_node_pool = { name = "system" vm_size = "Standard_D2s_v3" @@ -130,6 +145,7 @@ run "full_configuration" { private_cluster_enabled = false azure_policy_enabled = true local_account_disabled = false + enable_volume_snapshots = true } assert { @@ -208,28 +224,28 @@ run "full_configuration" { } assert { - condition = azurerm_kubernetes_cluster.main.identity[0].type == "UserAssigned" - error_message = "Cluster should use user-assigned identity when IDs are provided" + condition = azurerm_kubernetes_cluster.main.identity[0].type == "SystemAssigned" + error_message = "Cluster should use system-assigned identity" } assert { - condition = length(azurerm_kubernetes_cluster.main.identity[0].identity_ids) == 2 - error_message = "Cluster should have 2 user-assigned identity IDs" + condition = azurerm_kubernetes_cluster.main.oidc_issuer_enabled == true + error_message = "OIDC issuer should be enabled" } assert { - condition = azurerm_kubernetes_cluster.main.kubelet_identity[0].client_id == "test-client-id" - error_message = "Kubelet client ID should match input" + condition = azurerm_kubernetes_cluster.main.workload_identity_enabled == true + error_message = "Workload Identity should be enabled" } assert { - condition = azurerm_kubernetes_cluster.main.kubelet_identity[0].object_id == "test-object-id" - error_message = "Kubelet object ID should match input" + condition = contains(azurerm_role_definition.aks_kubelet_vmss_disk_manager.permissions[0].actions, "Microsoft.Compute/snapshots/read") + error_message = "Snapshot permissions should be included when enable_volume_snapshots is true" } assert { - condition = azurerm_kubernetes_cluster.main.kubelet_identity[0].user_assigned_identity_id == "/subscriptions/12345678-1234-9876-4563-123456789012/resourceGroups/example-resource-group/providers/Microsoft.ManagedIdentity/userAssignedIdentities/test-identity-1" - error_message = "Kubelet user-assigned identity ID should match input" + condition = contains(azurerm_role_definition.aks_kubelet_vmss_disk_manager.permissions[0].actions, "Microsoft.Compute/snapshots/write") + error_message = "Snapshot write permissions should be included when enable_volume_snapshots is true" } } @@ -300,3 +316,36 @@ run "multiple_invalid_inputs" { kubernetes_version = "v1.32" } } + +# Tests that when enable_volume_snapshots is false, snapshot permissions are not included in the role definition. +# This verifies the conditional logic that excludes snapshot operations when volume snapshots are disabled. +run "volume_snapshots_disabled" { + command = plan + + variables { + context_id = "test" + name = "windsor-aks" + kubernetes_version = "1.32" + enable_volume_snapshots = false + } + + assert { + condition = !contains(azurerm_role_definition.aks_kubelet_vmss_disk_manager.permissions[0].actions, "Microsoft.Compute/snapshots/read") + error_message = "Snapshot read permissions should not be included when enable_volume_snapshots is false" + } + + assert { + condition = !contains(azurerm_role_definition.aks_kubelet_vmss_disk_manager.permissions[0].actions, "Microsoft.Compute/snapshots/write") + error_message = "Snapshot write permissions should not be included when enable_volume_snapshots is false" + } + + assert { + condition = !contains(azurerm_role_definition.aks_kubelet_vmss_disk_manager.permissions[0].actions, "Microsoft.Compute/snapshots/delete") + error_message = "Snapshot delete permissions should not be included when enable_volume_snapshots is false" + } + + assert { + condition = contains(azurerm_role_definition.aks_kubelet_vmss_disk_manager.permissions[0].actions, "Microsoft.Compute/disks/read") + error_message = "Core disk permissions should still be included when enable_volume_snapshots is false" + } +} diff --git a/terraform/cluster/azure-aks/variables.tf b/terraform/cluster/azure-aks/variables.tf index 44e0a5947..ae6203aaa 100644 --- a/terraform/cluster/azure-aks/variables.tf +++ b/terraform/cluster/azure-aks/variables.tf @@ -205,12 +205,6 @@ variable "expiration_date" { default = null } -variable "user_assigned_identity_ids" { - type = list(string) - description = "User assigned identity IDs for the AKS cluster. If provided, the cluster will use only user-assigned identities." - default = [] -} - variable "soft_delete_retention_days" { type = number description = "The number of days to retain the AKS cluster's key vault" @@ -241,20 +235,20 @@ variable "endpoint_private_access" { default = false } -variable "kubelet_client_id" { - description = "Client ID of the user-assigned identity to use for the kubelet. If not provided, the cluster will use the system-assigned identity." - type = string - default = null +variable "enable_volume_snapshots" { + description = "Enable volume snapshot permissions for the kubelet identity. Set to false to use minimal permissions if volume snapshots are not needed." + type = bool + default = true } -variable "kubelet_object_id" { - description = "Object ID of the user-assigned identity to use for the kubelet. If not provided, the cluster will use the system-assigned identity." - type = string - default = null +variable "oidc_issuer_enabled" { + description = "Enable OIDC issuer for the AKS cluster" + type = bool + default = true } -variable "kubelet_user_assigned_identity_id" { - description = "Resource ID of the user-assigned identity to use for the kubelet. If not provided, the cluster will use the system-assigned identity." - type = string - default = null +variable "workload_identity_enabled" { + description = "Enable Workload Identity for the AKS cluster" + type = bool + default = true }