From 93a2471d65d3d1e0ab9333266c41ad119788da94 Mon Sep 17 00:00:00 2001 From: Aaron Davidson Date: Wed, 19 Mar 2014 18:15:14 -0700 Subject: [PATCH 1/3] SPARK-1286: Make usage of spark-env.sh idempotent Various spark scripts load spark-env.sh. This can cause growth of any variables that may be appended to (SPARK_CLASSPATH, SPARK_REPL_OPTS) and it makes the precedence order for options specified in spark-env.sh less clear. One use-case for the latter is that we want to set options from the command-line of spark-shell, but these options will be overridden by subsequent loading of spark-env.sh. If we were to load the spark-env.sh first and then set our command-line options, we could guarantee correct precedence order. --- bin/compute-classpath.sh | 5 +---- bin/load-spark-env | 35 +++++++++++++++++++++++++++++++++++ bin/pyspark | 5 +---- bin/run-example | 5 +---- bin/spark-class | 5 +---- bin/spark-shell | 4 +--- sbin/slaves.sh | 4 +--- sbin/spark-daemon.sh | 4 +--- sbin/start-master.sh | 4 +--- sbin/start-slaves.sh | 4 +--- sbin/stop-slaves.sh | 4 +--- 11 files changed, 45 insertions(+), 34 deletions(-) create mode 100644 bin/load-spark-env diff --git a/bin/compute-classpath.sh b/bin/compute-classpath.sh index 278969655de48..d2a65dc8b2331 100755 --- a/bin/compute-classpath.sh +++ b/bin/compute-classpath.sh @@ -25,10 +25,7 @@ SCALA_VERSION=2.10 # Figure out where Spark is installed FWDIR="$(cd `dirname $0`/..; pwd)" -# Load environment variables from conf/spark-env.sh, if it exists -if [ -e "$FWDIR/conf/spark-env.sh" ] ; then - . $FWDIR/conf/spark-env.sh -fi +. $FWDIR/bin/load-spark-env # Build up classpath CLASSPATH="$SPARK_CLASSPATH:$FWDIR/conf" diff --git a/bin/load-spark-env b/bin/load-spark-env new file mode 100644 index 0000000000000..6ea75e6f8adb1 --- /dev/null +++ b/bin/load-spark-env @@ -0,0 +1,35 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# This script loads spark-env.sh if it exists, and ensures it is only loaded once. +# spark-env.sh is loaded from SPARK_CONF_DIR if set, or within the current directory's +# conf/ subdirectory. + +if [ -z "$SPARK_ENV_LOADED" ]; then + export SPARK_ENV_LOADED=1 + + # Returns the parent of the directory this script lives in. + FWDIR="$(cd `dirname $0`/..; pwd)" + + SPARK_CONF_DIR=${SPARK_CONF_DIR:-"$FWDIR/conf"} + + if [ -f "${SPARK_CONF_DIR}/spark-env.sh" ]; then + . "${SPARK_CONF_DIR}/spark-env.sh" + fi +fi diff --git a/bin/pyspark b/bin/pyspark index ed6f8da73035a..30eee05b91486 100755 --- a/bin/pyspark +++ b/bin/pyspark @@ -36,10 +36,7 @@ if [ ! -f "$FWDIR/RELEASE" ]; then fi fi -# Load environment variables from conf/spark-env.sh, if it exists -if [ -e "$FWDIR/conf/spark-env.sh" ] ; then - . $FWDIR/conf/spark-env.sh -fi +. $FWDIR/bin/load-spark-env # Figure out which Python executable to use if [ -z "$PYSPARK_PYTHON" ] ; then diff --git a/bin/run-example b/bin/run-example index adba7dd97aaf8..3f0785de067d3 100755 --- a/bin/run-example +++ b/bin/run-example @@ -30,10 +30,7 @@ FWDIR="$(cd `dirname $0`/..; pwd)" # Export this as SPARK_HOME export SPARK_HOME="$FWDIR" -# Load environment variables from conf/spark-env.sh, if it exists -if [ -e "$FWDIR/conf/spark-env.sh" ] ; then - . $FWDIR/conf/spark-env.sh -fi +. $FWDIR/bin/load-spark-env if [ -z "$1" ]; then echo "Usage: run-example []" >&2 diff --git a/bin/spark-class b/bin/spark-class index 229ae2cebbab3..884257b6b3637 100755 --- a/bin/spark-class +++ b/bin/spark-class @@ -30,10 +30,7 @@ FWDIR="$(cd `dirname $0`/..; pwd)" # Export this as SPARK_HOME export SPARK_HOME="$FWDIR" -# Load environment variables from conf/spark-env.sh, if it exists -if [ -e "$FWDIR/conf/spark-env.sh" ] ; then - . $FWDIR/conf/spark-env.sh -fi +. $FWDIR/bin/load-spark-env if [ -z "$1" ]; then echo "Usage: spark-class []" >&2 diff --git a/bin/spark-shell b/bin/spark-shell index 7d3fe3aca7f1d..0f22c7e395b17 100755 --- a/bin/spark-shell +++ b/bin/spark-shell @@ -81,9 +81,7 @@ done # Set MASTER from spark-env if possible DEFAULT_SPARK_MASTER_PORT=7077 if [ -z "$MASTER" ]; then - if [ -e "$FWDIR/conf/spark-env.sh" ]; then - . "$FWDIR/conf/spark-env.sh" - fi + . $FWDIR/bin/load-spark-env if [ "x" != "x$SPARK_MASTER_IP" ]; then if [ "y" != "y$SPARK_MASTER_PORT" ]; then SPARK_MASTER_PORT="${SPARK_MASTER_PORT}" diff --git a/sbin/slaves.sh b/sbin/slaves.sh index a5bc2183d87f3..037a74ed5cec1 100755 --- a/sbin/slaves.sh +++ b/sbin/slaves.sh @@ -63,9 +63,7 @@ then shift fi -if [ -f "${SPARK_CONF_DIR}/spark-env.sh" ]; then - . "${SPARK_CONF_DIR}/spark-env.sh" -fi +. "$SPARK_PREFIX/bin/load-spark-env" if [ "$HOSTLIST" = "" ]; then if [ "$SPARK_SLAVES" = "" ]; then diff --git a/sbin/spark-daemon.sh b/sbin/spark-daemon.sh index 2be2b3d7c0933..a0ea714f1296e 100755 --- a/sbin/spark-daemon.sh +++ b/sbin/spark-daemon.sh @@ -86,9 +86,7 @@ spark_rotate_log () fi } -if [ -f "${SPARK_CONF_DIR}/spark-env.sh" ]; then - . "${SPARK_CONF_DIR}/spark-env.sh" -fi +. "$SPARK_PREFIX/bin/load-spark-env" if [ "$SPARK_IDENT_STRING" = "" ]; then export SPARK_IDENT_STRING="$USER" diff --git a/sbin/start-master.sh b/sbin/start-master.sh index 03a3428aea9f1..9f422b27f9824 100755 --- a/sbin/start-master.sh +++ b/sbin/start-master.sh @@ -39,9 +39,7 @@ done . "$sbin/spark-config.sh" -if [ -f "${SPARK_CONF_DIR}/spark-env.sh" ]; then - . "${SPARK_CONF_DIR}/spark-env.sh" -fi +. "$SPARK_PREFIX/bin/load-spark-env" if [ "$SPARK_MASTER_PORT" = "" ]; then SPARK_MASTER_PORT=7077 diff --git a/sbin/start-slaves.sh b/sbin/start-slaves.sh index da641cfe3c6fa..21d57cd8b2097 100755 --- a/sbin/start-slaves.sh +++ b/sbin/start-slaves.sh @@ -38,9 +38,7 @@ done . "$sbin/spark-config.sh" -if [ -f "${SPARK_CONF_DIR}/spark-env.sh" ]; then - . "${SPARK_CONF_DIR}/spark-env.sh" -fi +. "$SPARK_PREFIX/bin/load-spark-env" # Find the port number for the master if [ "$SPARK_MASTER_PORT" = "" ]; then diff --git a/sbin/stop-slaves.sh b/sbin/stop-slaves.sh index 6bf393ccd4b09..50aa61885a877 100755 --- a/sbin/stop-slaves.sh +++ b/sbin/stop-slaves.sh @@ -22,9 +22,7 @@ sbin=`cd "$sbin"; pwd` . "$sbin/spark-config.sh" -if [ -f "${SPARK_CONF_DIR}/spark-env.sh" ]; then - . "${SPARK_CONF_DIR}/spark-env.sh" -fi +. "$SPARK_PREFIX/bin/load-spark-env" # do before the below calls as they exec if [ -e "$sbin"/../tachyon/bin/tachyon ]; then From 8da836067cc97978d405f8ea609a1c7b894a4f06 Mon Sep 17 00:00:00 2001 From: Aaron Davidson Date: Wed, 19 Mar 2014 23:01:43 -0700 Subject: [PATCH 2/3] Add .sh extension to load-spark-env.sh --- bin/compute-classpath.sh | 2 +- bin/{load-spark-env => load-spark-env.sh} | 0 bin/pyspark | 2 +- bin/run-example | 2 +- bin/spark-class | 2 +- bin/spark-shell | 2 +- sbin/slaves.sh | 2 +- sbin/spark-daemon.sh | 2 +- sbin/start-master.sh | 2 +- sbin/start-slaves.sh | 2 +- sbin/stop-slaves.sh | 2 +- 11 files changed, 10 insertions(+), 10 deletions(-) rename bin/{load-spark-env => load-spark-env.sh} (100%) diff --git a/bin/compute-classpath.sh b/bin/compute-classpath.sh index d2a65dc8b2331..8b44da2010585 100755 --- a/bin/compute-classpath.sh +++ b/bin/compute-classpath.sh @@ -25,7 +25,7 @@ SCALA_VERSION=2.10 # Figure out where Spark is installed FWDIR="$(cd `dirname $0`/..; pwd)" -. $FWDIR/bin/load-spark-env +. $FWDIR/bin/load-spark-env.sh # Build up classpath CLASSPATH="$SPARK_CLASSPATH:$FWDIR/conf" diff --git a/bin/load-spark-env b/bin/load-spark-env.sh similarity index 100% rename from bin/load-spark-env rename to bin/load-spark-env.sh diff --git a/bin/pyspark b/bin/pyspark index 30eee05b91486..67e1f61eeb1e5 100755 --- a/bin/pyspark +++ b/bin/pyspark @@ -36,7 +36,7 @@ if [ ! -f "$FWDIR/RELEASE" ]; then fi fi -. $FWDIR/bin/load-spark-env +. $FWDIR/bin/load-spark-env.sh # Figure out which Python executable to use if [ -z "$PYSPARK_PYTHON" ] ; then diff --git a/bin/run-example b/bin/run-example index 3f0785de067d3..5af95a08c6c41 100755 --- a/bin/run-example +++ b/bin/run-example @@ -30,7 +30,7 @@ FWDIR="$(cd `dirname $0`/..; pwd)" # Export this as SPARK_HOME export SPARK_HOME="$FWDIR" -. $FWDIR/bin/load-spark-env +. $FWDIR/bin/load-spark-env.sh if [ -z "$1" ]; then echo "Usage: run-example []" >&2 diff --git a/bin/spark-class b/bin/spark-class index 884257b6b3637..b872619723961 100755 --- a/bin/spark-class +++ b/bin/spark-class @@ -30,7 +30,7 @@ FWDIR="$(cd `dirname $0`/..; pwd)" # Export this as SPARK_HOME export SPARK_HOME="$FWDIR" -. $FWDIR/bin/load-spark-env +. $FWDIR/bin/load-spark-env.sh if [ -z "$1" ]; then echo "Usage: spark-class []" >&2 diff --git a/bin/spark-shell b/bin/spark-shell index 0f22c7e395b17..861ab606540cd 100755 --- a/bin/spark-shell +++ b/bin/spark-shell @@ -81,7 +81,7 @@ done # Set MASTER from spark-env if possible DEFAULT_SPARK_MASTER_PORT=7077 if [ -z "$MASTER" ]; then - . $FWDIR/bin/load-spark-env + . $FWDIR/bin/load-spark-env.sh if [ "x" != "x$SPARK_MASTER_IP" ]; then if [ "y" != "y$SPARK_MASTER_PORT" ]; then SPARK_MASTER_PORT="${SPARK_MASTER_PORT}" diff --git a/sbin/slaves.sh b/sbin/slaves.sh index 037a74ed5cec1..f89547fef9e46 100755 --- a/sbin/slaves.sh +++ b/sbin/slaves.sh @@ -63,7 +63,7 @@ then shift fi -. "$SPARK_PREFIX/bin/load-spark-env" +. "$SPARK_PREFIX/bin/load-spark-env.sh" if [ "$HOSTLIST" = "" ]; then if [ "$SPARK_SLAVES" = "" ]; then diff --git a/sbin/spark-daemon.sh b/sbin/spark-daemon.sh index a0ea714f1296e..323f675b17848 100755 --- a/sbin/spark-daemon.sh +++ b/sbin/spark-daemon.sh @@ -86,7 +86,7 @@ spark_rotate_log () fi } -. "$SPARK_PREFIX/bin/load-spark-env" +. "$SPARK_PREFIX/bin/load-spark-env.sh" if [ "$SPARK_IDENT_STRING" = "" ]; then export SPARK_IDENT_STRING="$USER" diff --git a/sbin/start-master.sh b/sbin/start-master.sh index 9f422b27f9824..c5c02491f78e1 100755 --- a/sbin/start-master.sh +++ b/sbin/start-master.sh @@ -39,7 +39,7 @@ done . "$sbin/spark-config.sh" -. "$SPARK_PREFIX/bin/load-spark-env" +. "$SPARK_PREFIX/bin/load-spark-env.sh" if [ "$SPARK_MASTER_PORT" = "" ]; then SPARK_MASTER_PORT=7077 diff --git a/sbin/start-slaves.sh b/sbin/start-slaves.sh index 21d57cd8b2097..4912d0c0c7dfd 100755 --- a/sbin/start-slaves.sh +++ b/sbin/start-slaves.sh @@ -38,7 +38,7 @@ done . "$sbin/spark-config.sh" -. "$SPARK_PREFIX/bin/load-spark-env" +. "$SPARK_PREFIX/bin/load-spark-env.sh" # Find the port number for the master if [ "$SPARK_MASTER_PORT" = "" ]; then diff --git a/sbin/stop-slaves.sh b/sbin/stop-slaves.sh index 50aa61885a877..7c2201100ef97 100755 --- a/sbin/stop-slaves.sh +++ b/sbin/stop-slaves.sh @@ -22,7 +22,7 @@ sbin=`cd "$sbin"; pwd` . "$sbin/spark-config.sh" -. "$SPARK_PREFIX/bin/load-spark-env" +. "$SPARK_PREFIX/bin/load-spark-env.sh" # do before the below calls as they exec if [ -e "$sbin"/../tachyon/bin/tachyon ]; then From e291f91a5d9d84679048134a33f171264832b24b Mon Sep 17 00:00:00 2001 From: Aaron Davidson Date: Sun, 23 Mar 2014 10:07:20 -0700 Subject: [PATCH 3/3] Use "private" variables in load-spark-env.sh --- bin/load-spark-env.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bin/load-spark-env.sh b/bin/load-spark-env.sh index 6ea75e6f8adb1..476dd826551fd 100644 --- a/bin/load-spark-env.sh +++ b/bin/load-spark-env.sh @@ -25,11 +25,11 @@ if [ -z "$SPARK_ENV_LOADED" ]; then export SPARK_ENV_LOADED=1 # Returns the parent of the directory this script lives in. - FWDIR="$(cd `dirname $0`/..; pwd)" + parent_dir="$(cd `dirname $0`/..; pwd)" - SPARK_CONF_DIR=${SPARK_CONF_DIR:-"$FWDIR/conf"} + use_conf_dir=${SPARK_CONF_DIR:-"$parent_dir/conf"} - if [ -f "${SPARK_CONF_DIR}/spark-env.sh" ]; then - . "${SPARK_CONF_DIR}/spark-env.sh" + if [ -f "${use_conf_dir}/spark-env.sh" ]; then + . "${use_conf_dir}/spark-env.sh" fi fi