Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 64 additions & 18 deletions terraform/cluster/azure-aks/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,16 @@ data "azurerm_subnet" "private" {
#-----------------------------------------------------------------------------------------------------------------------

locals {
kubeconfig_path = "${var.context_path}/.kube/config"
rg_name = var.resource_group_name == null ? "${var.name}-${var.context_id}" : var.resource_group_name
cluster_name = var.cluster_name == null ? "${var.name}-${var.context_id}" : var.cluster_name
kubeconfig_path = "${var.context_path}/.kube/config"
rg_name = var.resource_group_name == null ? "${var.name}-${var.context_id}" : var.resource_group_name
cluster_name = var.cluster_name == null ? "${var.name}-${var.context_id}" : var.cluster_name
node_resource_group_name = split("/", azurerm_kubernetes_cluster.main.node_resource_group_id)[4]
node_pool_names = concat(
[var.default_node_pool.name],
var.autoscaled_node_pool.enabled ? [var.autoscaled_node_pool.name] : []
)
# Safely access kubelet identity (may not be available during plan in tests)
kubelet_object_id = try(azurerm_kubernetes_cluster.main.kubelet_identity[0].object_id, "00000000-0000-0000-0000-000000000000")
tags = merge({
WindsorContextID = var.context_id
}, var.tags)
Expand Down Expand Up @@ -221,6 +228,7 @@ resource "azurerm_kubernetes_cluster" "main" {
resource_group_name = azurerm_resource_group.aks.name
dns_prefix = local.cluster_name
# checkov:skip=CKV_AZURE_339: Kubernetes version is populated from the cloud provider's stable version via Renovate.
# checkov:skip=CKV_AZURE_4: Log Analytics workspace is created but diagnostic settings are configured separately or via alternative monitoring solutions
kubernetes_version = var.kubernetes_version
role_based_access_control_enabled = var.role_based_access_control_enabled
automatic_upgrade_channel = var.automatic_upgrade_channel
Expand Down Expand Up @@ -274,29 +282,21 @@ resource "azurerm_kubernetes_cluster" "main" {
vertical_pod_autoscaler_enabled = var.workload_autoscaler_profile.vertical_pod_autoscaler_enabled
}

oidc_issuer_enabled = var.oidc_issuer_enabled
workload_identity_enabled = var.workload_identity_enabled

network_profile {
network_plugin = "azure"
network_policy = "cilium"
service_cidr = var.service_cidr
dns_service_ip = var.dns_service_ip
}

oms_agent {
log_analytics_workspace_id = azurerm_log_analytics_workspace.aks_logs.id
}

# Use system-assigned managed identity (Microsoft default and best practice)
# AKS automatically creates Contributor role on node RG for control plane
# AKS automatically creates Virtual Machine Contributor role on node RG for kubelet
identity {
type = length(var.user_assigned_identity_ids) > 0 ? "UserAssigned" : "SystemAssigned"
identity_ids = var.user_assigned_identity_ids
}

dynamic "kubelet_identity" {
for_each = var.kubelet_user_assigned_identity_id != null ? [1] : []
content {
client_id = var.kubelet_client_id
object_id = var.kubelet_object_id
user_assigned_identity_id = var.kubelet_user_assigned_identity_id
}
type = "SystemAssigned"
}

tags = merge({
Expand Down Expand Up @@ -330,6 +330,52 @@ resource "azurerm_kubernetes_cluster_node_pool" "autoscaled" {
}, local.tags)
}

# AKS automatically creates Virtual Machine Contributor role assignment on node resource group for the kubelet identity.
# However, disk attachment operations require additional permissions beyond Virtual Machine Contributor.
# Create a custom role with minimal permissions for VMSS disk operations.
resource "azurerm_role_definition" "aks_kubelet_vmss_disk_manager" {
name = "AKS Kubelet VMSS Disk Manager - ${var.context_id}"
scope = azurerm_kubernetes_cluster.main.node_resource_group_id
description = "Minimal permissions for AKS kubelet identity to manage VMSS disk attachments"

permissions {
actions = concat(
[
# VMSS virtual machine operations for disk attachment (REQUIRED)
"Microsoft.Compute/virtualMachineScaleSets/virtualMachines/read",
"Microsoft.Compute/virtualMachineScaleSets/virtualMachines/write",
# Core disk operations (REQUIRED for basic disk attachment)
"Microsoft.Compute/disks/read",
"Microsoft.Compute/disks/write",
"Microsoft.Compute/disks/delete",
"Microsoft.Compute/disks/beginGetAccess/action",
"Microsoft.Compute/disks/endGetAccess/action",
# Location/operation queries (may be needed for operation status checks)
"Microsoft.Compute/locations/DiskOperations/read",
"Microsoft.Compute/locations/vmSizes/read",
"Microsoft.Compute/locations/operations/read"
],
var.enable_volume_snapshots ? [
# Snapshot operations (only included if volume snapshots are enabled)
"Microsoft.Compute/snapshots/read",
"Microsoft.Compute/snapshots/write",
"Microsoft.Compute/snapshots/delete"
] : []
)
not_actions = []
}

assignable_scopes = [
azurerm_kubernetes_cluster.main.node_resource_group_id
]
}

resource "azurerm_role_assignment" "kubelet_vmss_disk_manager" {
scope = azurerm_kubernetes_cluster.main.node_resource_group_id
role_definition_id = azurerm_role_definition.aks_kubelet_vmss_disk_manager.role_definition_resource_id
principal_id = local.kubelet_object_id
}

resource "local_file" "kube_config" {
content = azurerm_kubernetes_cluster.main.kube_config_raw
filename = local.kubeconfig_path
Expand Down
93 changes: 71 additions & 22 deletions terraform/cluster/azure-aks/test.tftest.hcl
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,26 @@ run "minimal_configuration" {
condition = azurerm_kubernetes_cluster.main.identity[0].type == "SystemAssigned"
error_message = "Cluster should use system-assigned identity by default"
}

assert {
condition = azurerm_kubernetes_cluster.main.oidc_issuer_enabled == true
error_message = "OIDC issuer should be enabled by default"
}

assert {
condition = azurerm_kubernetes_cluster.main.workload_identity_enabled == true
error_message = "Workload Identity should be enabled by default"
}

assert {
condition = contains(azurerm_role_definition.aks_kubelet_vmss_disk_manager.permissions[0].actions, "Microsoft.Compute/snapshots/read")
error_message = "Snapshot permissions should be included when enable_volume_snapshots is true (default)"
}

assert {
condition = contains(azurerm_role_definition.aks_kubelet_vmss_disk_manager.permissions[0].actions, "Microsoft.Compute/snapshots/write")
error_message = "Snapshot write permissions should be included when enable_volume_snapshots is true (default)"
}
}

# Tests a full configuration with all optional variables explicitly set,
Expand All @@ -92,18 +112,13 @@ run "full_configuration" {
command = plan

variables {
context_id = "test"
name = "windsor-aks"
cluster_name = "test-cluster"
resource_group_name = "test-rg"
kubernetes_version = "1.32"
user_assigned_identity_ids = [
"/subscriptions/12345678-1234-9876-4563-123456789012/resourceGroups/example-resource-group/providers/Microsoft.ManagedIdentity/userAssignedIdentities/test-identity-1",
"/subscriptions/12345678-1234-9876-4563-123456789012/resourceGroups/example-resource-group/providers/Microsoft.ManagedIdentity/userAssignedIdentities/test-identity-2"
]
kubelet_client_id = "test-client-id"
kubelet_object_id = "test-object-id"
kubelet_user_assigned_identity_id = "/subscriptions/12345678-1234-9876-4563-123456789012/resourceGroups/example-resource-group/providers/Microsoft.ManagedIdentity/userAssignedIdentities/test-identity-1"
context_id = "test"
name = "windsor-aks"
cluster_name = "test-cluster"
resource_group_name = "test-rg"
kubernetes_version = "1.32"
oidc_issuer_enabled = true
workload_identity_enabled = true
default_node_pool = {
name = "system"
vm_size = "Standard_D2s_v3"
Expand All @@ -130,6 +145,7 @@ run "full_configuration" {
private_cluster_enabled = false
azure_policy_enabled = true
local_account_disabled = false
enable_volume_snapshots = true
}

assert {
Expand Down Expand Up @@ -208,28 +224,28 @@ run "full_configuration" {
}

assert {
condition = azurerm_kubernetes_cluster.main.identity[0].type == "UserAssigned"
error_message = "Cluster should use user-assigned identity when IDs are provided"
condition = azurerm_kubernetes_cluster.main.identity[0].type == "SystemAssigned"
error_message = "Cluster should use system-assigned identity"
}

assert {
condition = length(azurerm_kubernetes_cluster.main.identity[0].identity_ids) == 2
error_message = "Cluster should have 2 user-assigned identity IDs"
condition = azurerm_kubernetes_cluster.main.oidc_issuer_enabled == true
error_message = "OIDC issuer should be enabled"
}

assert {
condition = azurerm_kubernetes_cluster.main.kubelet_identity[0].client_id == "test-client-id"
error_message = "Kubelet client ID should match input"
condition = azurerm_kubernetes_cluster.main.workload_identity_enabled == true
error_message = "Workload Identity should be enabled"
}

assert {
condition = azurerm_kubernetes_cluster.main.kubelet_identity[0].object_id == "test-object-id"
error_message = "Kubelet object ID should match input"
condition = contains(azurerm_role_definition.aks_kubelet_vmss_disk_manager.permissions[0].actions, "Microsoft.Compute/snapshots/read")
error_message = "Snapshot permissions should be included when enable_volume_snapshots is true"
}

assert {
condition = azurerm_kubernetes_cluster.main.kubelet_identity[0].user_assigned_identity_id == "/subscriptions/12345678-1234-9876-4563-123456789012/resourceGroups/example-resource-group/providers/Microsoft.ManagedIdentity/userAssignedIdentities/test-identity-1"
error_message = "Kubelet user-assigned identity ID should match input"
condition = contains(azurerm_role_definition.aks_kubelet_vmss_disk_manager.permissions[0].actions, "Microsoft.Compute/snapshots/write")
error_message = "Snapshot write permissions should be included when enable_volume_snapshots is true"
}
}

Expand Down Expand Up @@ -300,3 +316,36 @@ run "multiple_invalid_inputs" {
kubernetes_version = "v1.32"
}
}

# Tests that when enable_volume_snapshots is false, snapshot permissions are not included in the role definition.
# This verifies the conditional logic that excludes snapshot operations when volume snapshots are disabled.
run "volume_snapshots_disabled" {
command = plan

variables {
context_id = "test"
name = "windsor-aks"
kubernetes_version = "1.32"
enable_volume_snapshots = false
}

assert {
condition = !contains(azurerm_role_definition.aks_kubelet_vmss_disk_manager.permissions[0].actions, "Microsoft.Compute/snapshots/read")
error_message = "Snapshot read permissions should not be included when enable_volume_snapshots is false"
}

assert {
condition = !contains(azurerm_role_definition.aks_kubelet_vmss_disk_manager.permissions[0].actions, "Microsoft.Compute/snapshots/write")
error_message = "Snapshot write permissions should not be included when enable_volume_snapshots is false"
}

assert {
condition = !contains(azurerm_role_definition.aks_kubelet_vmss_disk_manager.permissions[0].actions, "Microsoft.Compute/snapshots/delete")
error_message = "Snapshot delete permissions should not be included when enable_volume_snapshots is false"
}

assert {
condition = contains(azurerm_role_definition.aks_kubelet_vmss_disk_manager.permissions[0].actions, "Microsoft.Compute/disks/read")
error_message = "Core disk permissions should still be included when enable_volume_snapshots is false"
}
}
30 changes: 12 additions & 18 deletions terraform/cluster/azure-aks/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -205,12 +205,6 @@ variable "expiration_date" {
default = null
}

variable "user_assigned_identity_ids" {
type = list(string)
description = "User assigned identity IDs for the AKS cluster. If provided, the cluster will use only user-assigned identities."
default = []
}

variable "soft_delete_retention_days" {
type = number
description = "The number of days to retain the AKS cluster's key vault"
Expand Down Expand Up @@ -241,20 +235,20 @@ variable "endpoint_private_access" {
default = false
}

variable "kubelet_client_id" {
description = "Client ID of the user-assigned identity to use for the kubelet. If not provided, the cluster will use the system-assigned identity."
type = string
default = null
variable "enable_volume_snapshots" {
description = "Enable volume snapshot permissions for the kubelet identity. Set to false to use minimal permissions if volume snapshots are not needed."
type = bool
default = true
}

variable "kubelet_object_id" {
description = "Object ID of the user-assigned identity to use for the kubelet. If not provided, the cluster will use the system-assigned identity."
type = string
default = null
variable "oidc_issuer_enabled" {
description = "Enable OIDC issuer for the AKS cluster"
type = bool
default = true
}

variable "kubelet_user_assigned_identity_id" {
description = "Resource ID of the user-assigned identity to use for the kubelet. If not provided, the cluster will use the system-assigned identity."
type = string
default = null
variable "workload_identity_enabled" {
description = "Enable Workload Identity for the AKS cluster"
type = bool
default = true
}
Loading