Skip to content

Commit b150cef

Browse files
authored
Set node.cloudprovider.kuberentes.io/uninitialized taint on new nodes so cloud-node-manager processes them (#530)
1 parent 46bfe8d commit b150cef

File tree

1 file changed

+61
-5
lines changed

1 file changed

+61
-5
lines changed

capz/run-capz-e2e.sh

Lines changed: 61 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ set -o functrace
77

88
SCRIPT_PATH=$(realpath "${BASH_SOURCE[0]}")
99
SCRIPT_ROOT=$(dirname "${SCRIPT_PATH}")
10+
export MANAGEMENT_KUBECONFIG="${SCRIPT_ROOT}/management.kubeconfig"
1011
export CAPZ_DIR="${CAPZ_DIR:-"${GOPATH}/src/sigs.k8s.io/cluster-api-provider-azure"}"
1112
: "${CAPZ_DIR:?Environment variable empty or not defined.}"
1213
if [[ ! -d $CAPZ_DIR ]]; then
@@ -61,6 +62,8 @@ main() {
6162
apply_workload_configuration
6263
apply_cloud_provider_azure
6364
wait_for_nodes
65+
ensure_cloud_provider_taint_on_windows_nodes
66+
wait_for_windows_machinedeployment
6467
if [[ "${HYPERV}" == "true" ]]; then apply_hyperv_configuration; fi
6568
run_e2e_test
6669
}
@@ -113,6 +116,9 @@ run_capz_e2e_cleanup() {
113116

114117
# currently KUBECONFIG is set to the workload cluster so reset to the management cluster
115118
unset KUBECONFIG
119+
if [[ -f "${MANAGEMENT_KUBECONFIG}" ]]; then
120+
export KUBECONFIG="${MANAGEMENT_KUBECONFIG}"
121+
fi
116122

117123
SKIP_LOG_COLLECTION="${SKIP_LOG_COLLECTION:-"false"}"
118124
if [[ ! "$SKIP_LOG_COLLECTION" == "true" ]]; then
@@ -219,7 +225,8 @@ create_cluster(){
219225
export AZURE_LOCATION
220226
fi
221227

222-
az aks get-credentials --resource-group "${CLUSTER_NAME}" --name "${CLUSTER_NAME}" --overwrite-existing
228+
az aks get-credentials --resource-group "${CLUSTER_NAME}" --name "${CLUSTER_NAME}" -f "${MANAGEMENT_KUBECONFIG}" --overwrite-existing
229+
export KUBECONFIG="${MANAGEMENT_KUBECONFIG}"
223230

224231
# some scenarios require knowing the vnet configuration of the management cluster in order to work in a restricted networking environment
225232
aks_infra_rg_name=$(az aks show -g "${CLUSTER_NAME}" --name "${CLUSTER_NAME}" --query nodeResourceGroup --output tsv)
@@ -286,13 +293,60 @@ create_cluster(){
286293

287294
# set the kube config to the workload cluster
288295
# the kubeconfig is dropped to the current folder but move it to a location that is well known to avoid issues if end up in wrong folder due to other scripts.
296+
local workload_kubeconfig_path="$PWD/${CLUSTER_NAME}.kubeconfig"
289297
if [[ "$PWD" != "$SCRIPT_ROOT" ]]; then
290-
mv "$PWD"/"${CLUSTER_NAME}".kubeconfig "$SCRIPT_ROOT"/"${CLUSTER_NAME}".kubeconfig
298+
cp "$workload_kubeconfig_path" "$SCRIPT_ROOT/${CLUSTER_NAME}.kubeconfig"
299+
workload_kubeconfig_path="$SCRIPT_ROOT/${CLUSTER_NAME}.kubeconfig"
291300
fi
292-
export KUBECONFIG="$SCRIPT_ROOT"/"${CLUSTER_NAME}".kubeconfig
301+
export KUBECONFIG="$workload_kubeconfig_path"
302+
303+
log "create_cluster complete"
304+
}
305+
306+
wait_for_windows_machinedeployment() {
307+
local md_name="${CLUSTER_NAME}-md-win"
308+
local kubeconfig="${MANAGEMENT_KUBECONFIG}"
309+
310+
log "entering wait_for_windows_machinedeployment for ${md_name}"
311+
312+
if [[ ! -f "${kubeconfig}" ]]; then
313+
log "management kubeconfig ${kubeconfig} not found; skipping MachineDeployment wait"
314+
return
315+
fi
316+
317+
log "waiting for MachineDeployment ${md_name} to exist on management cluster"
318+
timeout --foreground 900 bash -c "until kubectl --kubeconfig \"${kubeconfig}\" get machinedeployment ${md_name} -n default > /dev/null 2>&1; do sleep 5; done"
319+
320+
log "waiting for MachineDeployment ${md_name} to become Available"
321+
kubectl --kubeconfig "${kubeconfig}" wait --for=condition=Available --timeout=20m "machinedeployment/${md_name}" -n default
322+
}
323+
324+
ensure_cloud_provider_taint_on_windows_nodes() {
325+
log "tainting Windows nodes with node.cloudprovider.kubernetes.io/uninitialized=true:NoSchedule"
326+
327+
local windows_nodes
328+
windows_nodes=$(kubectl get nodes -l kubernetes.io/os=windows -o name 2>/dev/null || true)
329+
330+
if [[ -z "${windows_nodes}" ]]; then
331+
log "no Windows nodes found to taint"
332+
return
333+
fi
334+
335+
# Taint all Windows nodes
336+
echo "${windows_nodes}" | while read -r node; do
337+
[[ -z "${node}" ]] && continue
338+
local node_name="${node#node/}"
339+
log "tainting node ${node_name}"
340+
kubectl taint nodes "${node_name}" node.cloudprovider.kubernetes.io/uninitialized=true:NoSchedule --overwrite
341+
done
342+
343+
local count
344+
count=$(echo "${windows_nodes}" | wc -l)
345+
log "tainted ${count} Windows node(s)"
293346
}
294347

295348
apply_workload_configuration(){
349+
log "entering apply_workload_configuration"
296350
log "wait for cluster to stabilize"
297351
timeout --foreground 300 bash -c "until kubectl get --raw /version --request-timeout 5s > /dev/null 2>&1; do sleep 3; done"
298352

@@ -343,6 +397,7 @@ EOF
343397
}
344398

345399
apply_cloud_provider_azure() {
400+
log "entering apply_cloud_provider_azure"
346401
echo "KUBERNETES_VERSION = ${KUBERNETES_VERSION}"
347402

348403
echo "Building cloud provider images"
@@ -484,15 +539,16 @@ run_e2e_test() {
484539

485540
wait_for_nodes() {
486541

542+
log "entering wait_for_nodes"
487543

488544
log "Waiting for ${CONTROL_PLANE_MACHINE_COUNT} control plane machine(s) and ${WINDOWS_WORKER_MACHINE_COUNT} windows machine(s) to become Ready"
489545
kubectl get nodes -o wide
490546
kubectl get pods -A -o wide
491547

492548
# switch KUBECONFIG to point to management cluster so we can check for provisioning status on
493549
# if any of the machines are in a failed state
494-
az aks get-credentials --resource-group "${CLUSTER_NAME}" --name "${CLUSTER_NAME}" -f management.kubeconfig --overwrite-existing
495-
export KUBECONFIG=./management.kubeconfig
550+
az aks get-credentials --resource-group "${CLUSTER_NAME}" --name "${CLUSTER_NAME}" -f "${MANAGEMENT_KUBECONFIG}" --overwrite-existing
551+
export KUBECONFIG="${MANAGEMENT_KUBECONFIG}"
496552

497553
kubectl get AzureMachines --all-namespaces
498554
# Ensure that all nodes are registered with the API server before checking for readiness

0 commit comments

Comments
 (0)