Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions scripts/aws/run_systemds_script.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ fi

source systemds_cluster.config

aws s3 cp $1 s3://system-ds-bucket/ --exclude "*" --include "*.dml"
aws s3 cp $1 s3://${BUCKET} --exclude "*" --include "*.dml"

if [ ! -z "$2" ]
then
Expand All @@ -41,7 +41,7 @@ fi

dml_filename=$(basename $1)

STEP_INFO=$(aws emr add-steps --cluster-id $CLUSTER_ID --steps "Type=Spark,
STEP_INFO=$(aws emr add-steps --cluster-id $CLUSTER_ID --region $REGION --steps "Type=Spark,
Name='SystemDS Spark Program',
ActionOnFailure=CONTINUE,
Args=[
Expand All @@ -50,11 +50,11 @@ STEP_INFO=$(aws emr add-steps --cluster-id $CLUSTER_ID --steps "Type=Spark,
--driver-memory,$SPARK_DRIVER_MEMORY,
--num-executors,$SPARK_NUM_EXECUTORS,
--conf,spark.driver.maxResultSize=0,
$SYSTEMDS_JAR_PATH, -f, s3://system-ds-bucket/$dml_filename, -exec, $SYSTEMDS_EXEC_MODE,$args,-stats, -explain]")
$SYSTEMDS_JAR_PATH, -f, s3://$BUCKET/$dml_filename, -exec, $SYSTEMDS_EXEC_MODE,$args,-stats, -explain]")

STEP_ID=$(echo $STEP_INFO | jq .StepIds | tr -d '"' | tr -d ']' | tr -d '[' | tr -d '[:space:]' )
echo "Waiting for the step to finish"
aws emr wait step-complete --cluster-id $CLUSTER_ID --step-id $STEP_ID
aws emr wait step-complete --cluster-id $CLUSTER_ID --step-id $STEP_ID --region $REGION

aws emr ssh --cluster-id $CLUSTER_ID --key-pair-file ${KEYPAIR_NAME}.pem --command "cat /mnt/var/log/hadoop/steps/$STEP_ID/stderr"
aws emr ssh --cluster-id $CLUSTER_ID --key-pair-file ${KEYPAIR_NAME}.pem --command "cat /mnt/var/log/hadoop/steps/$STEP_ID/stdout"
aws emr ssh --cluster-id $CLUSTER_ID --key-pair-file ${KEYPAIR_NAME}.pem --region $REGION --command "cat /mnt/var/log/hadoop/steps/$STEP_ID/stderr"
aws emr ssh --cluster-id $CLUSTER_ID --key-pair-file ${KEYPAIR_NAME}.pem --region $REGION --command "cat /mnt/var/log/hadoop/steps/$STEP_ID/stdout"
38 changes: 28 additions & 10 deletions scripts/aws/spinup_systemds_cluster.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,14 @@ set_config "SPARK_EXECUTOR_MEMORY" $SPARK_EXECUTOR_MEMORY
set_config "SPARK_DRIVER_MEMORY" "1G"
set_config "BUCKET" $BUCKET-$(((RANDOM % 999) + 1000))

#Source again to update the changes for the current session
source systemds_cluster.config

#Create systemDS bucket
aws s3api create-bucket --bucket $BUCKET --region $REGION &> /dev/null
aws s3api create-bucket --bucket $BUCKET-logs --region $REGION &> /dev/null
#LocationConstraint configuration required regions outside of us-east-1
if [ "$REGION" = "us-east-1" ]; then LOCATION_CONSTRAINT=""; else LOCATION_CONSTRAINT="--create-bucket-configuration LocationConstraint=$REGION"; fi
aws s3api create-bucket --bucket $BUCKET --region $REGION $LOCATION_CONSTRAINT &> /dev/null
aws s3api create-bucket --bucket $BUCKET-logs --region $REGION $LOCATION_CONSTRAINT &> /dev/null

# Upload Jar and scripts to s3
aws s3 sync $SYSTEMDS_TARGET_DIRECTORY s3://$BUCKET --exclude "*" --include "*.dml" --include "*config.xml" --include "*DS.jar*"
Expand All @@ -60,11 +65,17 @@ if [ ! -f ${KEYPAIR_NAME}.pem ]; then
echo "${KEYPAIR_NAME}.pem private key created!"
fi

#Get the first available subnet in the default VPC of the configured region
DEFAULT_SUBNET=$(aws ec2 describe-subnets --region $REGION \
--filter "Name=defaultForAz,Values=true" --query "Subnets[0].SubnetId" --output text)

#Create the cluster
#Note: Ganglia not available since emr-6.15.0: exchanged with AmazonCloudWatchAgent
CLUSTER_INFO=$(aws emr create-cluster \
--applications Name=Ganglia Name=Spark \
--applications Name=AmazonCloudWatchAgent Name=Spark \
--ec2-attributes '{"KeyName":"'${KEYPAIR_NAME}'",
"InstanceProfile":"EMR_EC2_DefaultRole"}'\
"InstanceProfile":"EMR_EC2_DefaultRole",
"SubnetId": "'${DEFAULT_SUBNET}'"}'\
--service-role EMR_DefaultRole \
--enable-debugging \
--release-label $EMR_VERSION \
Expand All @@ -78,7 +89,13 @@ CLUSTER_INFO=$(aws emr create-cluster \
"InstanceGroupType":"CORE",
"InstanceType":"'${INSTANCES_TYPE}'",
"Name":"Core Instance Group"}]'\
--configurations '[{"Classification":"spark","Properties":{"maximizeResourceAllocation": "true"}}]'\
--configurations '[{"Classification":"spark","Properties":{"maximizeResourceAllocation": "true"}},
{"Classification": "spark-env",
"Configurations": [{
"Classification": "export",
"Properties": {"JAVA_HOME": "/usr/lib/jvm/jre-11"}
}]
}]'\
--scale-down-behavior TERMINATE_AT_TASK_COMPLETION \
--region $REGION)

Expand All @@ -89,21 +106,22 @@ set_config "CLUSTER_ID" $CLUSTER_ID
ip_address=$(curl ipecho.net/plain ; echo)

#Add your ip to the security group
aws ec2 create-security-group --group-name ElasticMapReduce-master --description "info" &> /dev/null
aws ec2 create-security-group --group-name ElasticMapReduce-master --description "info" --region $REGION &> /dev/null
aws ec2 authorize-security-group-ingress \
--group-name ElasticMapReduce-master \
--protocol tcp \
--port 22 \
--cidr "${ip_address}"/24 &> /dev/null
--cidr "${ip_address}"/24 \
--region $REGION &> /dev/null

# Wait for cluster to start
echo "Waiting for cluster running state"
aws emr wait cluster-running --cluster-id $CLUSTER_ID
aws emr wait cluster-running --cluster-id $CLUSTER_ID --region $REGION

echo "Cluster info:"
export CLUSTER_URL=$(aws emr describe-cluster --cluster-id $CLUSTER_ID | jq .Cluster.MasterPublicDnsName | tr -d '"')
export CLUSTER_URL=$(aws emr describe-cluster --cluster-id $CLUSTER_ID --region $REGION | jq .Cluster.MasterPublicDnsName | tr -d '"')

aws emr ssh --cluster-id $CLUSTER_ID --key-pair-file ${KEYPAIR_NAME}.pem --region $REGION \
--command 'aws s3 cp s3://system-ds-bucket/target . --recursive --exclude "*" --include "*DS.jar*"'
--command 'aws s3 cp s3://'${BUCKET}' . --recursive --exclude "*" --include "*DS.jar*"'

echo "Spinup finished."
4 changes: 2 additions & 2 deletions scripts/aws/systemds_cluster.config
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@

KEYPAIR_NAME="SystemDSkeynamex"
REGION="us-east-1"
BUCKET="systemds-bucket"
EMR_VERSION="emr-5.28.0"
BUCKET=systemds-bucket
EMR_VERSION="emr-7.0.0"

INSTANCES_TYPE="m5.xlarge"
MASTER_INSTANCES_COUNT=1
Expand Down
4 changes: 2 additions & 2 deletions scripts/aws/terminate_systemds_cluster.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@

source systemds_cluster.config

aws emr terminate-clusters --cluster-ids $CLUSTER_ID
aws emr terminate-clusters --cluster-ids $CLUSTER_ID --region $REGION

# Wait for cluster to start
echo "Waiting for cluster terminated state"
aws emr wait cluster-terminated --cluster-id $CLUSTER_ID
aws emr wait cluster-terminated --cluster-id $CLUSTER_ID --region $REGION

echo "Cluster: ${CLUSTER_ID} terminated."