Skip to content
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pkg/operator/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ type Options struct {
AdditionalTags map[string]string `json:"additionalTags,omitempty"`
EnableAzureSDKLogging bool `json:"enableAzureSDKLogging,omitempty"` // Controls whether Azure SDK middleware logging is enabled
DiskEncryptionSetID string `json:"diskEncryptionSetId,omitempty"`
ManageExistingAKSMachines bool `json:"manageExistingAKSMachines,omitempty"` // If set to true, existing AKS machines created with PROVISION_MODE=aksmachineapi will be managed even with other provision modes. This option does not have any effect if PROVISION_MODE=aksmachineapi, as it will behave as if this option is set to true.
}

func (o *Options) AddFlags(fs *coreoptions.FlagSet) {
Expand All @@ -117,6 +118,7 @@ func (o *Options) AddFlags(fs *coreoptions.FlagSet) {
fs.StringVar(&o.SIGAccessTokenServerURL, "sig-access-token-server-url", env.WithDefaultString("SIG_ACCESS_TOKEN_SERVER_URL", ""), "The URL for the SIG access token server. Only used for AKS managed karpenter. UseSIG must be set tot true for this to take effect.")
fs.StringVar(&o.SIGSubscriptionID, "sig-subscription-id", env.WithDefaultString("SIG_SUBSCRIPTION_ID", ""), "The subscription ID of the shared image gallery.")
fs.StringVar(&o.DiskEncryptionSetID, "node-osdisk-diskencryptionset-id", env.WithDefaultString("NODE_OSDISK_DISKENCRYPTIONSET_ID", ""), "The ARM resource ID of the disk encryption set to use for customer-managed key (BYOK) encryption.")
fs.BoolVar(&o.ManageExistingAKSMachines, "manage-existing-aks-machines", env.WithDefaultBool("MANAGE_EXISTING_AKS_MACHINES", false), "If set to true, existing AKS machines created with PROVISION_MODE=aksmachineapi will be managed even with other provision modes. This option does not have any effect if PROVISION_MODE=aksmachineapi, as it will behave as if this option is set to true.")

additionalTagsFlag := k8sflag.NewMapStringString(&o.AdditionalTags)
if err := additionalTagsFlag.Set(env.WithDefaultString("ADDITIONAL_TAGS", "")); err != nil {
Expand Down
32 changes: 32 additions & 0 deletions pkg/operator/options/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ var _ = Describe("Options", func() {
"LINUX_ADMIN_USERNAME",
"ADDITIONAL_TAGS",
"ENABLE_AZURE_SDK_LOGGING",
"MANAGE_EXISTING_AKS_MACHINES",
}

var fs *coreoptions.FlagSet
Expand Down Expand Up @@ -123,6 +124,7 @@ var _ = Describe("Options", func() {
os.Setenv("KUBELET_IDENTITY_CLIENT_ID", "2345678-1234-1234-1234-123456789012")
os.Setenv("LINUX_ADMIN_USERNAME", "customadminusername")
os.Setenv("ADDITIONAL_TAGS", "test-tag=test-value")
os.Setenv("MANAGE_EXISTING_AKS_MACHINES", "true")
fs = &coreoptions.FlagSet{
FlagSet: flag.NewFlagSet("karpenter", flag.ContinueOnError),
}
Expand Down Expand Up @@ -152,6 +154,7 @@ var _ = Describe("Options", func() {
KubeletIdentityClientID: lo.ToPtr("2345678-1234-1234-1234-123456789012"),
AdditionalTags: map[string]string{"test-tag": "test-value"},
ClusterDNSServiceIP: lo.ToPtr("10.244.0.1"),
ManageExistingAKSMachines: lo.ToPtr(true),
})
Expect(opts).To(BeComparableTo(expectedOpts, cmpopts.IgnoreUnexported(options.Options{})))
})
Expand Down Expand Up @@ -491,6 +494,35 @@ var _ = Describe("Options", func() {
)
Expect(err).To(MatchError(ContainSubstring("validating options, additional-tags key \"<key1>\" contains invalid characters.")))
})

It("should default manage-existing-aks-machines to false", func() {
err := opts.Parse(
fs,
"--cluster-name", "my-name",
"--cluster-endpoint", "https://karpenter-000000000000.hcp.westus2.staging.azmk8s.io",
"--kubelet-bootstrap-token", "flag-bootstrap-token",
"--ssh-public-key", "flag-ssh-public-key",
"--vnet-subnet-id", "/subscriptions/12345678-1234-1234-1234-123456789012/resourceGroups/sillygeese/providers/Microsoft.Network/virtualNetworks/karpentervnet/subnets/karpentersub",
"--node-resource-group", "my-node-rg",
)
Expect(err).ToNot(HaveOccurred())
Expect(opts.ManageExistingAKSMachines).To(BeFalse())
})

It("should succeed with manage-existing-aks-machines set", func() {
err := opts.Parse(
fs,
"--cluster-name", "my-name",
"--cluster-endpoint", "https://karpenter-000000000000.hcp.westus2.staging.azmk8s.io",
"--kubelet-bootstrap-token", "flag-bootstrap-token",
"--ssh-public-key", "flag-ssh-public-key",
"--vnet-subnet-id", "/subscriptions/12345678-1234-1234-1234-123456789012/resourceGroups/sillygeese/providers/Microsoft.Network/virtualNetworks/karpentervnet/subnets/karpentersub",
"--node-resource-group", "my-node-rg",
"--manage-existing-aks-machines",
)
Expect(err).ToNot(HaveOccurred())
Expect(opts.ManageExistingAKSMachines).To(BeTrue())
})
})

Context("Admin Username Validation", func() {
Expand Down
36 changes: 36 additions & 0 deletions pkg/providers/instance/aksmachineinstanceutils.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/*
Portions Copyright (c) Microsoft Corporation.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package instance

import (
"net/http"
"strings"

sdkerrors "github.com/Azure/azure-sdk-for-go-extensions/pkg/errors"
)

func IsAKSMachineOrMachinesPoolNotFound(err error) bool {
if err == nil {
return false
}
azErr := sdkerrors.IsResponseError(err)
if azErr != nil && (azErr.StatusCode == http.StatusNotFound || // Covers AKS machines pool not found on PUT machine, GET machine, GET (list) machines, POST agent pool (DELETE machines), and AKS machine not found on GET machine
(azErr.StatusCode == http.StatusBadRequest && azErr.ErrorCode == "InvalidParameter" && strings.Contains(azErr.Error(), "Cannot find any valid machines"))) { // Covers AKS machine not found on POST agent pool (DELETE machines)
return true
}
return false
}
105 changes: 105 additions & 0 deletions pkg/providers/instance/aksmachineinstanceutils_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
/*
Portions Copyright (c) Microsoft Corporation.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package instance

import (
"fmt"
"io"
"net/http"
"strings"

"github.com/Azure/azure-sdk-for-go/sdk/azcore"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)

// createAzureResponseError creates a proper Azure SDK error with the given error code and message
func createAzureResponseError(errorCode, errorMessage string, statusCode int) error {
errorBody := fmt.Sprintf(`{"error": {"code": "%s", "message": "%s"}}`, errorCode, errorMessage)
return &azcore.ResponseError{
ErrorCode: errorCode,
StatusCode: statusCode,
RawResponse: &http.Response{
Body: io.NopCloser(strings.NewReader(errorBody)),
},
}
}

var _ = Describe("AKSMachineInstanceUtils Helper Functions", func() {

Context("IsAKSMachineOrMachinesPoolNotFound", func() {
It("should return false for nil error", func() {
result := IsAKSMachineOrMachinesPoolNotFound(nil)
Expect(result).To(BeFalse())
})

It("should return true for HTTP 404 status code", func() {
azureError := &azcore.ResponseError{
ErrorCode: "lol",
StatusCode: 404,
RawResponse: nil,
}

result := IsAKSMachineOrMachinesPoolNotFound(azureError)
Expect(result).To(BeTrue())
})

It("should return true for InvalidParameter error with 'Cannot find any valid machines' message", func() {
// Create the exact error message from your example
errorMessage := "Cannot find any valid machines to delete. Please check your input machine names. The valid machines to delete in agent pool 'testmpool' are: testmachine."
azureError := createAzureResponseError("InvalidParameter", errorMessage, 400)

result := IsAKSMachineOrMachinesPoolNotFound(azureError)
Expect(result).To(BeTrue())
})

It("should return false for HTTP 400 with InvalidParameter but different message", func() {
// Create an InvalidParameter error with a different message that shouldn't match
differentMessage := "InvalidParameter: Some other validation error"
azureError := createAzureResponseError("InvalidParameter", differentMessage, 400)

result := IsAKSMachineOrMachinesPoolNotFound(azureError)
Expect(result).To(BeFalse())
})

It("should return false for other HTTP status codes", func() {
azureError := &azcore.ResponseError{
ErrorCode: "Unauthorized",
StatusCode: 401,
RawResponse: nil,
}

result := IsAKSMachineOrMachinesPoolNotFound(azureError)
Expect(result).To(BeFalse())

azureError = &azcore.ResponseError{
ErrorCode: "InternalOperationError",
StatusCode: 500,
RawResponse: nil,
}

result = IsAKSMachineOrMachinesPoolNotFound(azureError)
Expect(result).To(BeFalse())
})

It("should return false for non-Azure SDK errors", func() {
result := IsAKSMachineOrMachinesPoolNotFound(fmt.Errorf("some generic error"))
Expect(result).To(BeFalse())
})
})
})
46 changes: 45 additions & 1 deletion pkg/providers/instance/azure_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"github.com/Azure/azure-sdk-for-go/sdk/azcore"
"github.com/Azure/azure-sdk-for-go/sdk/azcore/runtime"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v7"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v8"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/network/armnetwork"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resourcegraph/armresourcegraph"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/resources/armsubscriptions"
Expand All @@ -41,6 +42,17 @@ import (
armopts "github.com/Azure/karpenter-provider-azure/pkg/utils/opts"
)

type AKSMachinesAPI interface {
BeginCreateOrUpdate(ctx context.Context, resourceGroupName string, resourceName string, agentPoolName string, aksMachineName string, parameters armcontainerservice.Machine, options *armcontainerservice.MachinesClientBeginCreateOrUpdateOptions) (*runtime.Poller[armcontainerservice.MachinesClientCreateOrUpdateResponse], error)
Get(ctx context.Context, resourceGroupName string, resourceName string, agentPoolName string, aksMachineName string, options *armcontainerservice.MachinesClientGetOptions) (armcontainerservice.MachinesClientGetResponse, error)
NewListPager(resourceGroupName string, resourceName string, agentPoolName string, options *armcontainerservice.MachinesClientListOptions) *runtime.Pager[armcontainerservice.MachinesClientListResponse]
}

type AKSAgentPoolsAPI interface {
Get(ctx context.Context, resourceGroupName string, resourceName string, agentPoolName string, options *armcontainerservice.AgentPoolsClientGetOptions) (armcontainerservice.AgentPoolsClientGetResponse, error)
BeginDeleteMachines(ctx context.Context, resourceGroupName string, resourceName string, agentPoolName string, aksMachines armcontainerservice.AgentPoolDeleteMachinesParameter, options *armcontainerservice.AgentPoolsClientBeginDeleteMachinesOptions) (*runtime.Poller[armcontainerservice.AgentPoolsClientDeleteMachinesResponse], error)
}

type VirtualMachinesAPI interface {
BeginCreateOrUpdate(ctx context.Context, resourceGroupName string, vmName string, parameters armcompute.VirtualMachine, options *armcompute.VirtualMachinesClientBeginCreateOrUpdateOptions) (*runtime.Poller[armcompute.VirtualMachinesClientCreateOrUpdateResponse], error)
Get(ctx context.Context, resourceGroupName string, vmName string, options *armcompute.VirtualMachinesClientGetOptions) (armcompute.VirtualMachinesClientGetResponse, error)
Expand Down Expand Up @@ -72,6 +84,8 @@ type SubnetsAPI interface {
type AZClient struct {
azureResourceGraphClient AzureResourceGraphAPI
virtualMachinesClient VirtualMachinesAPI
aksMachinesClient AKSMachinesAPI
agentPoolsClient AKSAgentPoolsAPI
virtualMachinesExtensionClient VirtualMachineExtensionsAPI
networkInterfacesClient NetworkInterfacesAPI
subnetsClient SubnetsAPI
Expand All @@ -93,6 +107,8 @@ func (c *AZClient) SubnetsClient() SubnetsAPI {
func NewAZClientFromAPI(
virtualMachinesClient VirtualMachinesAPI,
azureResourceGraphClient AzureResourceGraphAPI,
aksMachinesClient AKSMachinesAPI,
agentPoolsClient AKSAgentPoolsAPI,
virtualMachinesExtensionClient VirtualMachineExtensionsAPI,
interfacesClient NetworkInterfacesAPI,
subnetsClient SubnetsAPI,
Expand All @@ -107,6 +123,8 @@ func NewAZClientFromAPI(
return &AZClient{
virtualMachinesClient: virtualMachinesClient,
azureResourceGraphClient: azureResourceGraphClient,
aksMachinesClient: aksMachinesClient,
agentPoolsClient: agentPoolsClient,
virtualMachinesExtensionClient: virtualMachinesExtensionClient,
networkInterfacesClient: interfacesClient,
subnetsClient: subnetsClient,
Expand Down Expand Up @@ -184,7 +202,9 @@ func NewAZClient(ctx context.Context, cfg *auth.Config, env *auth.Environment, c
// TODO Move this over to track 2 when skewer is migrated
skuClient := skuclient.NewSkuClient(cfg.SubscriptionID, cred, env.Cloud)

var nodeBootstrappingClient imagefamilytypes.NodeBootstrappingAPI = nil
var nodeBootstrappingClient imagefamilytypes.NodeBootstrappingAPI
var aksMachinesClient AKSMachinesAPI
var agentPoolsClient AKSAgentPoolsAPI
if o.ProvisionMode == consts.ProvisionModeBootstrappingClient {
nodeBootstrappingClient, err = imagefamily.NewNodeBootstrappingClient(
ctx,
Expand All @@ -199,10 +219,34 @@ func NewAZClient(ctx context.Context, cfg *auth.Config, env *auth.Environment, c
return nil, err
}
}
if o.ManageExistingAKSMachines {
aksMachinesClient, err = armcontainerservice.NewMachinesClient(cfg.SubscriptionID, cred, opts)
if err != nil {
return nil, err
}
agentPoolsClient, err = armcontainerservice.NewAgentPoolsClient(cfg.SubscriptionID, cred, opts)
if err != nil {
return nil, err
}
} else {
// Try create true clients
_, err = armcontainerservice.NewMachinesClient(cfg.SubscriptionID, cred, opts)
if err != nil {
log.FromContext(ctx).Info("failed to create true AKS machines client, but tolerated due to currently on dry client", "error", err)
}
_, err = armcontainerservice.NewAgentPoolsClient(cfg.SubscriptionID, cred, opts)
if err != nil {
log.FromContext(ctx).Info("failed to create true AKS agent pools client, but tolerated due to currently on dry client", "error", err)
}
aksMachinesClient = NewNoAKSMachinesClient()
agentPoolsClient = NewNoAKSAgentPoolsClient()
}

return NewAZClientFromAPI(
virtualMachinesClient,
azureResourceGraphClient,
aksMachinesClient,
agentPoolsClient,
extensionsClient,
interfacesClient,
subnetsClient,
Expand Down
74 changes: 74 additions & 0 deletions pkg/providers/instance/azure_dry_client.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
/*
Portions Copyright (c) Microsoft Corporation.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package instance

import (
"context"
"net/http"

"github.com/Azure/azure-sdk-for-go/sdk/azcore"
"github.com/Azure/azure-sdk-for-go/sdk/azcore/runtime"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/containerservice/armcontainerservice/v8"
)

var agentPoolNotFoundRespError = &azcore.ResponseError{
ErrorCode: "NotFound",
StatusCode: http.StatusNotFound,
}

type noAKSMachinesClient struct{}

func NewNoAKSMachinesClient() AKSMachinesAPI {
return &noAKSMachinesClient{}
}

func (d *noAKSMachinesClient) BeginCreateOrUpdate(ctx context.Context, resourceGroupName string, resourceName string, agentPoolName string, aksMachineName string, parameters armcontainerservice.Machine, options *armcontainerservice.MachinesClientBeginCreateOrUpdateOptions) (*runtime.Poller[armcontainerservice.MachinesClientCreateOrUpdateResponse], error) {
// As if agent pool is not found
return nil, agentPoolNotFoundRespError
}

func (d *noAKSMachinesClient) Get(ctx context.Context, resourceGroupName string, resourceName string, agentPoolName string, aksMachineName string, options *armcontainerservice.MachinesClientGetOptions) (armcontainerservice.MachinesClientGetResponse, error) {
// As if agent pool is not found
return armcontainerservice.MachinesClientGetResponse{}, agentPoolNotFoundRespError
}

func (d *noAKSMachinesClient) NewListPager(resourceGroupName string, resourceName string, agentPoolName string, options *armcontainerservice.MachinesClientListOptions) *runtime.Pager[armcontainerservice.MachinesClientListResponse] {
// As if agent pool is not found
return runtime.NewPager(runtime.PagingHandler[armcontainerservice.MachinesClientListResponse]{
More: func(armcontainerservice.MachinesClientListResponse) bool { return false },
Fetcher: func(context.Context, *armcontainerservice.MachinesClientListResponse) (armcontainerservice.MachinesClientListResponse, error) {
return armcontainerservice.MachinesClientListResponse{}, agentPoolNotFoundRespError
},
})
}

type noAKSAgentPoolsClient struct{}

// NewNoAKSAgentPoolsClient creates a new dry AKS agent pools client, attempting to create real client internally
func NewNoAKSAgentPoolsClient() AKSAgentPoolsAPI {
return &noAKSAgentPoolsClient{}
}

func (d *noAKSAgentPoolsClient) Get(ctx context.Context, resourceGroupName string, resourceName string, agentPoolName string, options *armcontainerservice.AgentPoolsClientGetOptions) (armcontainerservice.AgentPoolsClientGetResponse, error) {
// As if agent pool is not found
return armcontainerservice.AgentPoolsClientGetResponse{}, agentPoolNotFoundRespError
}

func (d *noAKSAgentPoolsClient) BeginDeleteMachines(ctx context.Context, resourceGroupName string, resourceName string, agentPoolName string, aksMachines armcontainerservice.AgentPoolDeleteMachinesParameter, options *armcontainerservice.AgentPoolsClientBeginDeleteMachinesOptions) (*runtime.Poller[armcontainerservice.AgentPoolsClientDeleteMachinesResponse], error) {
// As if agent pool is not found
return nil, agentPoolNotFoundRespError
}
Loading
Loading