From 6d7b2fd7c23c6d9ab035f739195198e50c8ffec9 Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Tue, 11 Dec 2018 18:26:48 -0800 Subject: [PATCH 1/5] Speed up running the kubernetes integration tests locally by allowing folks to skip the tgz dist build and extraction --- .../scripts/setup-integration-test-env.sh | 12 ++++++------ .../deploy/k8s/integrationtest/KubernetesSuite.scala | 12 ++++++++++-- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh b/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh index 36e30d7b2cffb..e70101bbc08cf 100755 --- a/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh +++ b/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh @@ -58,15 +58,15 @@ while (( "$#" )); do shift done -if [[ $SPARK_TGZ == "N/A" ]]; +rm -rf $UNPACKED_SPARK_TGZ +if [[ $SPARK_TGZ == "N/A" && $IMAGE_TAG == "N/A" ]]; then - echo "Must specify a Spark tarball to build Docker images against with --spark-tgz." && exit 1; + echo "Must specify a Spark tarball to build Docker images against with --spark-tgz OR image with --image-tag." && exit 1; +else + mkdir -p $UNPACKED_SPARK_TGZ + tar -xzvf $SPARK_TGZ --strip-components=1 -C $UNPACKED_SPARK_TGZ; fi -rm -rf $UNPACKED_SPARK_TGZ -mkdir -p $UNPACKED_SPARK_TGZ -tar -xzvf $SPARK_TGZ --strip-components=1 -C $UNPACKED_SPARK_TGZ; - if [[ $IMAGE_TAG == "N/A" ]]; then IMAGE_TAG=$(uuidgen); diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala index f8f4b4177f3bd..da73b274fa7e4 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala @@ -103,8 +103,16 @@ class KubernetesSuite extends SparkFunSuite System.clearProperty(key) } - val sparkDirProp = System.getProperty(CONFIG_KEY_UNPACK_DIR) - require(sparkDirProp != null, "Spark home directory must be provided in system properties.") + val possible_spark_dirs = List( + // If someone specified the tgz for the tests look at the extraction dir + System.getProperty(CONFIG_KEY_UNPACK_DIR), + // If otherwise use my working dir + 3 up + new File(Paths.get(System.getProperty("user.dir")).toFile, ("../" * 3)).getAbsolutePath() + ) + val sparkDirProp = possible_spark_dirs.filter(x => + new File(Paths.get(x).toFile, "bin/spark-submit").exists).headOption.getOrElse(null) + require(sparkDirProp != null, + s"Spark home directory must be provided in system properties tested $possible_spark_dirs") sparkHomeDir = Paths.get(sparkDirProp) require(sparkHomeDir.toFile.isDirectory, s"No directory found for spark home specified at $sparkHomeDir.") From af7e2363ede5ecd2329b3db8537186d1e7a890db Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Thu, 7 Mar 2019 12:20:31 -0800 Subject: [PATCH 2/5] Use the spark.test.home property suggested by vanzin --- .../spark/deploy/k8s/integrationtest/KubernetesSuite.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala index da73b274fa7e4..16b8714f10ac3 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala @@ -106,8 +106,8 @@ class KubernetesSuite extends SparkFunSuite val possible_spark_dirs = List( // If someone specified the tgz for the tests look at the extraction dir System.getProperty(CONFIG_KEY_UNPACK_DIR), - // If otherwise use my working dir + 3 up - new File(Paths.get(System.getProperty("user.dir")).toFile, ("../" * 3)).getAbsolutePath() + // Try the spark test home + sys.props("spark.test.home") ) val sparkDirProp = possible_spark_dirs.filter(x => new File(Paths.get(x).toFile, "bin/spark-submit").exists).headOption.getOrElse(null) From c131a7a69877a83b89da2a82ac86b45803a76890 Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Thu, 7 Mar 2019 14:06:55 -0800 Subject: [PATCH 3/5] Run from spark locally and build image if no image tag or tgz is specified. --- .../scripts/setup-integration-test-env.sh | 30 ++++++++++++------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh b/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh index e70101bbc08cf..f992c6a9aa8e7 100755 --- a/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh +++ b/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh @@ -16,6 +16,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # +set -ex TEST_ROOT_DIR=$(git rev-parse --show-toplevel) UNPACKED_SPARK_TGZ="$TEST_ROOT_DIR/target/spark-dist-unpacked" IMAGE_TAG_OUTPUT_FILE="$TEST_ROOT_DIR/target/image-tag.txt" @@ -58,50 +59,59 @@ while (( "$#" )); do shift done -rm -rf $UNPACKED_SPARK_TGZ +rm -rf "$UNPACKED_SPARK_TGZ" if [[ $SPARK_TGZ == "N/A" && $IMAGE_TAG == "N/A" ]]; then - echo "Must specify a Spark tarball to build Docker images against with --spark-tgz OR image with --image-tag." && exit 1; -else + # If there is no spark image tag to test with and no src dir, build from current + SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" + SPARK_INPUT_DIR="$(cd "$SCRIPT_DIR/"../../../../ >/dev/null 2>&1 && pwd )" + DOCKER_FILE_BASE_PATH="$SPARK_INPUT_DIR/resource-managers/kubernetes/docker/src/main/dockerfiles/spark" +elif [[ $IMAGE_TAG == "N/A" ]]; +then + # If there is a test src tarball and no image tag we will want to build from that mkdir -p $UNPACKED_SPARK_TGZ tar -xzvf $SPARK_TGZ --strip-components=1 -C $UNPACKED_SPARK_TGZ; + SPARK_INPUT_DIR="$UNPACKED_SPARK_TGZ" + DOCKER_FILE_BASE_PATH="$SPARK_INPUT_DIR/kubernetes/dockerfiles/spark" fi + +# If there is a specific Spark image skip building and extraction/copy if [[ $IMAGE_TAG == "N/A" ]]; then IMAGE_TAG=$(uuidgen); - cd $UNPACKED_SPARK_TGZ + cd $SPARK_INPUT_DIR # Build PySpark image - LANGUAGE_BINDING_BUILD_ARGS="-p $UNPACKED_SPARK_TGZ/kubernetes/dockerfiles/spark/bindings/python/Dockerfile" + LANGUAGE_BINDING_BUILD_ARGS="-p $DOCKER_FILE_BASE_PATH/bindings/python/Dockerfile" # Build SparkR image - LANGUAGE_BINDING_BUILD_ARGS="$LANGUAGE_BINDING_BUILD_ARGS -R $UNPACKED_SPARK_TGZ/kubernetes/dockerfiles/spark/bindings/R/Dockerfile" + LANGUAGE_BINDING_BUILD_ARGS="$LANGUAGE_BINDING_BUILD_ARGS -R $DOCKER_FILE_BASE_PATH/bindings/R/Dockerfile" case $DEPLOY_MODE in cloud) # Build images - $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -r $IMAGE_REPO -t $IMAGE_TAG $LANGUAGE_BINDING_BUILD_ARGS build + $SPARK_INPUT_DIR/bin/docker-image-tool.sh -r $IMAGE_REPO -t $IMAGE_TAG $LANGUAGE_BINDING_BUILD_ARGS build # Push images appropriately if [[ $IMAGE_REPO == gcr.io* ]] ; then gcloud docker -- push $IMAGE_REPO/spark:$IMAGE_TAG else - $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -r $IMAGE_REPO -t $IMAGE_TAG push + $SPARK_INPUT_DIR/bin/docker-image-tool.sh -r $IMAGE_REPO -t $IMAGE_TAG push fi ;; docker-for-desktop) # Only need to build as this will place it in our local Docker repo which is all # we need for Docker for Desktop to work so no need to also push - $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -r $IMAGE_REPO -t $IMAGE_TAG $LANGUAGE_BINDING_BUILD_ARGS build + $SPARK_INPUT_DIR/bin/docker-image-tool.sh -r $IMAGE_REPO -t $IMAGE_TAG $LANGUAGE_BINDING_BUILD_ARGS build ;; minikube) # Only need to build and if we do this with the -m option for minikube we will # build the images directly using the minikube Docker daemon so no need to push - $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -m -r $IMAGE_REPO -t $IMAGE_TAG $LANGUAGE_BINDING_BUILD_ARGS build + $SPARK_INPUT_DIR/bin/docker-image-tool.sh -m -r $IMAGE_REPO -t $IMAGE_TAG $LANGUAGE_BINDING_BUILD_ARGS build ;; *) echo "Unrecognized deploy mode $DEPLOY_MODE" && exit 1 From 78f6ee42c344ed27d6a6c1e15664bfac3eeff3a0 Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Thu, 7 Mar 2019 14:31:17 -0800 Subject: [PATCH 4/5] I _think_ we are currently depending on soft failure because of R packaging issues in Jenkins, this is scary --- .../integration-tests/scripts/setup-integration-test-env.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh b/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh index f992c6a9aa8e7..ab0123c2c5f08 100755 --- a/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh +++ b/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh @@ -16,7 +16,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # -set -ex +set -x TEST_ROOT_DIR=$(git rev-parse --show-toplevel) UNPACKED_SPARK_TGZ="$TEST_ROOT_DIR/target/spark-dist-unpacked" IMAGE_TAG_OUTPUT_FILE="$TEST_ROOT_DIR/target/image-tag.txt" From fbb54713c84f5f2fba523a8040f0b555d7b51cf7 Mon Sep 17 00:00:00 2001 From: Holden Karau Date: Wed, 13 Mar 2019 15:55:32 -0700 Subject: [PATCH 5/5] Turn -e mode on and also disable trrying to build the R image since it fails, make a note that it will need to be fixed inSPARK-25152 --- .../integration-tests/scripts/setup-integration-test-env.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh b/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh index ab0123c2c5f08..84c42cb19ef5c 100755 --- a/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh +++ b/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh @@ -16,7 +16,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # -set -x +set -ex TEST_ROOT_DIR=$(git rev-parse --show-toplevel) UNPACKED_SPARK_TGZ="$TEST_ROOT_DIR/target/spark-dist-unpacked" IMAGE_TAG_OUTPUT_FILE="$TEST_ROOT_DIR/target/image-tag.txt" @@ -85,8 +85,8 @@ then # Build PySpark image LANGUAGE_BINDING_BUILD_ARGS="-p $DOCKER_FILE_BASE_PATH/bindings/python/Dockerfile" - # Build SparkR image - LANGUAGE_BINDING_BUILD_ARGS="$LANGUAGE_BINDING_BUILD_ARGS -R $DOCKER_FILE_BASE_PATH/bindings/R/Dockerfile" + # Build SparkR image -- disabled since this fails, re-enable as part of SPARK-25152 + # LANGUAGE_BINDING_BUILD_ARGS="$LANGUAGE_BINDING_BUILD_ARGS -R $DOCKER_FILE_BASE_PATH/bindings/R/Dockerfile" case $DEPLOY_MODE in cloud)