From 99fe75a6b05598c04a01f31dae843b83d0242037 Mon Sep 17 00:00:00 2001 From: Igor Dvorzhak Date: Sat, 16 Jul 2022 00:31:37 -0700 Subject: [PATCH 1/3] Explictly request ephemeral storage to prevent evictions --- cloudbuild/run-presubmit-on-k8s.sh | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/cloudbuild/run-presubmit-on-k8s.sh b/cloudbuild/run-presubmit-on-k8s.sh index 5fd613e4a..5409c6cd6 100644 --- a/cloudbuild/run-presubmit-on-k8s.sh +++ b/cloudbuild/run-presubmit-on-k8s.sh @@ -10,15 +10,37 @@ readonly POD_NAME=presubmit-${DATAPROC_IMAGE_VERSION//./-}-${BUILD_ID//_/-} gcloud container clusters get-credentials "${CLOUDSDK_CONTAINER_CLUSTER}" +readonly OVERRIDES=" +{ + \"spec\": { + \"template\":{ + \"spec\": { + \"containers\": [{ + \"name\":\"${POD_NAME}\", + \"image\":\"${IMAGE}\", + \"resources\": { + \"requests\": { + \"cpu\": \"0.6\", + \"memory\": \"1.5Gi\", + \"ephemeral-storage\": \"2Gi\" + } + } + }] + } + } + } +}" + kubectl run "${POD_NAME}" \ --image="${IMAGE}" \ - --pod-running-timeout=15m \ + --pod-running-timeout=10m \ + --overrides="${OVERRIDES}" \ --restart=Never \ --env="COMMIT_SHA=${COMMIT_SHA}" \ --env="IMAGE_VERSION=${DATAPROC_IMAGE_VERSION}" \ --command -- bash /init-actions/cloudbuild/presubmit.sh -# Delete POD on exit and desribe it before deletion if exit was unsuccessful +# Delete POD on exit and describe it before deletion if exit was unsuccessful trap '[[ $? != 0 ]] && kubectl describe "pod/${POD_NAME}"; kubectl delete pods "${POD_NAME}"' EXIT kubectl wait --for=condition=Ready "pod/${POD_NAME}" --timeout=600s From 1248090d2f14a38bb7b8163c458520cc3993153b Mon Sep 17 00:00:00 2001 From: Igor Dvorzhak Date: Sat, 16 Jul 2022 00:51:13 -0700 Subject: [PATCH 2/3] Use --requests flag --- cloudbuild/run-presubmit-on-k8s.sh | 23 +---------------------- 1 file changed, 1 insertion(+), 22 deletions(-) diff --git a/cloudbuild/run-presubmit-on-k8s.sh b/cloudbuild/run-presubmit-on-k8s.sh index 5409c6cd6..38f92f3e5 100644 --- a/cloudbuild/run-presubmit-on-k8s.sh +++ b/cloudbuild/run-presubmit-on-k8s.sh @@ -10,31 +10,10 @@ readonly POD_NAME=presubmit-${DATAPROC_IMAGE_VERSION//./-}-${BUILD_ID//_/-} gcloud container clusters get-credentials "${CLOUDSDK_CONTAINER_CLUSTER}" -readonly OVERRIDES=" -{ - \"spec\": { - \"template\":{ - \"spec\": { - \"containers\": [{ - \"name\":\"${POD_NAME}\", - \"image\":\"${IMAGE}\", - \"resources\": { - \"requests\": { - \"cpu\": \"0.6\", - \"memory\": \"1.5Gi\", - \"ephemeral-storage\": \"2Gi\" - } - } - }] - } - } - } -}" - kubectl run "${POD_NAME}" \ --image="${IMAGE}" \ --pod-running-timeout=10m \ - --overrides="${OVERRIDES}" \ + --requests='cpu=0.6,memory=1.5Gi,ephemeral-storage=2Gi' \ --restart=Never \ --env="COMMIT_SHA=${COMMIT_SHA}" \ --env="IMAGE_VERSION=${DATAPROC_IMAGE_VERSION}" \ From 7ae1974078585dc4fa6c055fac46ca06dfe9b33a Mon Sep 17 00:00:00 2001 From: Igor Dvorzhak Date: Sat, 16 Jul 2022 01:06:57 -0700 Subject: [PATCH 3/3] Adjust requested resources --- cloudbuild/run-presubmit-on-k8s.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cloudbuild/run-presubmit-on-k8s.sh b/cloudbuild/run-presubmit-on-k8s.sh index 38f92f3e5..fb97c1dfa 100644 --- a/cloudbuild/run-presubmit-on-k8s.sh +++ b/cloudbuild/run-presubmit-on-k8s.sh @@ -12,8 +12,8 @@ gcloud container clusters get-credentials "${CLOUDSDK_CONTAINER_CLUSTER}" kubectl run "${POD_NAME}" \ --image="${IMAGE}" \ - --pod-running-timeout=10m \ - --requests='cpu=0.6,memory=1.5Gi,ephemeral-storage=2Gi' \ + --pod-running-timeout=15m \ + --requests='cpu=750m,memory=2Gi,ephemeral-storage=2Gi' \ --restart=Never \ --env="COMMIT_SHA=${COMMIT_SHA}" \ --env="IMAGE_VERSION=${DATAPROC_IMAGE_VERSION}" \ @@ -22,15 +22,15 @@ kubectl run "${POD_NAME}" \ # Delete POD on exit and describe it before deletion if exit was unsuccessful trap '[[ $? != 0 ]] && kubectl describe "pod/${POD_NAME}"; kubectl delete pods "${POD_NAME}"' EXIT -kubectl wait --for=condition=Ready "pod/${POD_NAME}" --timeout=600s +kubectl wait --for=condition=Ready "pod/${POD_NAME}" --timeout=15m kubectl logs -f "${POD_NAME}" # Wait until POD will be terminated -wait_secs=200 +wait_secs=300 while ((wait_secs > 0)) && ! kubectl describe "pod/${POD_NAME}" | grep -q Terminated; do - sleep 5 - ((wait_secs-=5)) + sleep 10 + ((wait_secs-=10)) done readonly EXIT_CODE=$(kubectl get pod "${POD_NAME}" \