Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions scripts/aws/run_systemds_script.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ fi

source systemds_cluster.config

aws s3 cp $1 s3://system-ds-bucket/ --exclude "*" --include "*.dml"
aws s3 cp $1 s3://${BUCKET} --exclude "*" --include "*.dml"

if [ ! -z "$2" ]
then
Expand All @@ -50,7 +50,7 @@ STEP_INFO=$(aws emr add-steps --cluster-id $CLUSTER_ID --steps "Type=Spark,
--driver-memory,$SPARK_DRIVER_MEMORY,
--num-executors,$SPARK_NUM_EXECUTORS,
--conf,spark.driver.maxResultSize=0,
$SYSTEMDS_JAR_PATH, -f, s3://system-ds-bucket/$dml_filename, -exec, $SYSTEMDS_EXEC_MODE,$args,-stats, -explain]")
$SYSTEMDS_JAR_PATH, -f, s3://$BUCKET/$dml_filename, -exec, $SYSTEMDS_EXEC_MODE,$args,-stats, -explain]")

STEP_ID=$(echo $STEP_INFO | jq .StepIds | tr -d '"' | tr -d ']' | tr -d '[' | tr -d '[:space:]' )
echo "Waiting for the step to finish"
Expand Down
21 changes: 15 additions & 6 deletions scripts/aws/spinup_systemds_cluster.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,12 @@ set_config "SPARK_EXECUTOR_MEMORY" $SPARK_EXECUTOR_MEMORY
set_config "SPARK_DRIVER_MEMORY" "1G"
set_config "BUCKET" $BUCKET-$(((RANDOM % 999) + 1000))

#Create systemDS bucket
aws s3api create-bucket --bucket $BUCKET --region $REGION &> /dev/null
aws s3api create-bucket --bucket $BUCKET-logs --region $REGION &> /dev/null
#Source again to update the changes for the current session
source systemds_cluster.config

#Create systemDS bucket (LocationConstraint configuration required regions outside of us-east-1)
aws s3api create-bucket --bucket $BUCKET --region $REGION --create-bucket-configuration LocationConstraint=$REGION &> /dev/null
aws s3api create-bucket --bucket $BUCKET-logs --region $REGION --create-bucket-configuration LocationConstraint=$REGION &> /dev/null

# Upload Jar and scripts to s3
aws s3 sync $SYSTEMDS_TARGET_DIRECTORY s3://$BUCKET --exclude "*" --include "*.dml" --include "*config.xml" --include "*DS.jar*"
Expand All @@ -60,11 +63,17 @@ if [ ! -f ${KEYPAIR_NAME}.pem ]; then
echo "${KEYPAIR_NAME}.pem private key created!"
fi

#Get the first available subnet in the default VPC of the configured region
DEFAULT_SUBNET=$(aws ec2 describe-subnets --region $REGION \
--filter "Name=defaultForAz,Values=true" --query "Subnets[0].SubnetId" --output text)

#Create the cluster
#Note: Ganglia not available since emr-6.15.0: exchanged with AmazonCloudWatchAgent
CLUSTER_INFO=$(aws emr create-cluster \
--applications Name=Ganglia Name=Spark \
--applications Name=AmazonCloudWatchAgent Name=Spark \
--ec2-attributes '{"KeyName":"'${KEYPAIR_NAME}'",
"InstanceProfile":"EMR_EC2_DefaultRole"}'\
"InstanceProfile":"EMR_EC2_DefaultRole",
"SubnetId": "'${DEFAULT_SUBNET}'"}'\
--service-role EMR_DefaultRole \
--enable-debugging \
--release-label $EMR_VERSION \
Expand Down Expand Up @@ -104,6 +113,6 @@ echo "Cluster info:"
export CLUSTER_URL=$(aws emr describe-cluster --cluster-id $CLUSTER_ID | jq .Cluster.MasterPublicDnsName | tr -d '"')

aws emr ssh --cluster-id $CLUSTER_ID --key-pair-file ${KEYPAIR_NAME}.pem --region $REGION \
--command 'aws s3 cp s3://system-ds-bucket/target . --recursive --exclude "*" --include "*DS.jar*"'
--command 'aws s3 cp s3://'${BUCKET}' . --recursive --exclude "*" --include "*DS.jar*"'

echo "Spinup finished."
4 changes: 2 additions & 2 deletions scripts/aws/systemds_cluster.config
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@

KEYPAIR_NAME="SystemDSkeynamex"
REGION="us-east-1"
BUCKET="systemds-bucket"
EMR_VERSION="emr-5.28.0"
BUCKET=systemds-bucket
EMR_VERSION="emr-7.0.0"

INSTANCES_TYPE="m5.xlarge"
MASTER_INSTANCES_COUNT=1
Expand Down