Skip to content

MetaSim Pre-Merge CI Checks #279

MetaSim Pre-Merge CI Checks

MetaSim Pre-Merge CI Checks #279

Workflow file for this run

name: MetaSim Pre-Merge CI Checks
on:
workflow_dispatch:
merge_group:
types: [checks_requested]
pull_request_target:
types:
- auto_merge_enabled
branches:
- main
- develop
env:
REGION: us-west-2
KEY_NAME: Github_CI_SSH_KEY_PAIR
INSTANCE_TYPE: g5.2xlarge
EC2_USER_NAME: ubuntu
AZ: us-west-2a
MAX_RETRIES: "5"
RETRY_WAIT_TIME: "30"
CACHE_BUCKET_PREFIX: "metasim-build-cache"
ECR_REPOSITORY: "roboverse-dev"
jobs:
pre-merge-tests-impl:
if: github.event_name == 'merge_group' || github.event_name == 'workflow_dispatch'
permissions:
contents: read
pull-requests: write
issues: write
runs-on: codebuild-EC2_Launcher2-${{ github.run_id }}-${{ github.run_attempt }}
timeout-minutes: 720
steps:
- name: Checkout code
uses: actions/checkout@v4
- run: aws --version
############# Prebuild ############
- name: pre_build
env:
SSH_KEY: ${{ secrets.EC2_SSH_KEY }}
run: |
# Get AWS account ID
AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
echo "AWS_ACCOUNT_ID=$AWS_ACCOUNT_ID" >> $GITHUB_ENV
if [ -z "$AWS_ACCOUNT_ID" ]; then
echo "Error: Failed to get AWS account ID"
exit 1
fi
echo "Preparing S3 bucket..."
CACHE_BUCKET="${CACHE_BUCKET_PREFIX}-${AWS_ACCOUNT_ID}"
aws s3api head-bucket --bucket $CACHE_BUCKET || \
aws s3 mb s3://$CACHE_BUCKET --region $REGION
# Configure S3 bucket lifecycle rule for cache expiration
aws s3api put-bucket-lifecycle-configuration \
--bucket $CACHE_BUCKET \
--lifecycle-configuration '{
"Rules": [
{
"ID": "ExpireBuildKitCache",
"Status": "Enabled",
"Filter": {
"Prefix": ""
},
"Expiration": {
"Days": 14
}
}
]
}'
echo "CACHE_BUCKET=$CACHE_BUCKET" >> $GITHUB_ENV
echo "Launching EC2 instance to run tests..."
INSTANCE_ID=$(aws ec2 run-instances \
--image-id ami-0b7f5f52689b2c0d0 \
--instance-type $INSTANCE_TYPE \
--region $REGION \
--key-name $KEY_NAME \
--security-group-ids sg-03f9110d8d39282ad \
--subnet-id subnet-0c56793ce29caa78b \
--iam-instance-profile Name="RoboverseCi" \
--block-device-mappings '[{"DeviceName":"/dev/sda1","Ebs":{"VolumeSize":500}}]' \
--output text \
--query 'Instances[0].InstanceId')
echo "INSTANCE_ID=$INSTANCE_ID" >> $GITHUB_ENV
# Create ECR repository if it doesn't exist
aws ecr describe-repositories --repository-names $ECR_REPOSITORY || \
aws ecr create-repository --repository-name $ECR_REPOSITORY
echo "Waiting for instance $INSTANCE_ID to be running..."
aws ec2 wait instance-running \
--instance-ids $INSTANCE_ID \
--region $REGION
echo "Getting instance IP address..."
EC2_INSTANCE_IP=$(aws ec2 describe-instances \
--region $REGION \
--filters "Name=instance-state-name,Values=running" "Name=instance-id,Values=$INSTANCE_ID" \
--query 'Reservations[*].Instances[*].[PrivateIpAddress]' \
--output text)
echo "EC2_INSTANCE_IP=$EC2_INSTANCE_IP" >> $GITHUB_ENV
echo "Setting up SSH configuration..."
mkdir -p ~/.ssh
aws ec2 describe-key-pairs \
--include-public-key \
--key-name $KEY_NAME \
--query 'KeyPairs[0].PublicKey' \
--output text > ~/.ssh/id_rsa.pub
echo "$SSH_KEY" > ~/.ssh/id_rsa
chmod 400 ~/.ssh/id_*
printf "Host %s\n\tStrictHostKeyChecking no\n\tUserKnownHostsFile=/dev/null\n" "$EC2_INSTANCE_IP" >> ~/.ssh/config
echo "Sending SSH public key to instance..."
aws ec2-instance-connect send-ssh-public-key \
--instance-id $INSTANCE_ID \
--availability-zone $AZ \
--ssh-public-key file://~/.ssh/id_rsa.pub \
--instance-os-user $EC2_USER_NAME
############# Build #############
- name: build
run: |
echo "====Copying source code...===="
wait_time=$RETRY_WAIT_TIME
SRC_DIR=$(basename $GITHUB_WORKSPACE)
echo "====Check environment variables...===="
echo "GITHUB_WORKSPACE=$GITHUB_WORKSPACE"
echo "CODEBUILD_SRC_DIR=$CODEBUILD_SRC_DIR"
echo "EC2_USER_NAME=$EC2_USER_NAME"
echo "SRC_DIR=$SRC_DIR"
echo "RETRY_WAIT_TIME=$RETRY_WAIT_TIME"
echo "MAX_RETRIES=$MAX_RETRIES"
echo "====Repo file check...===="
ls ./
# ==== before buildx build ====
DOCKERFILE_HASH=$(sha256sum Dockerfile | cut -c1-16)
IMAGE_URI="$AWS_ACCOUNT_ID.dkr.ecr.$REGION.amazonaws.com/$ECR_REPOSITORY:df-$DOCKERFILE_HASH"
echo "IMAGE_URI=$IMAGE_URI"
retry_count=0
# change to parent directory to copy files
cd ..
while [ $retry_count -lt $MAX_RETRIES ]; do
if [ $retry_count -gt 0 ]; then
wait_time=$((wait_time * 2))
echo "Retry attempt $((retry_count + 1))/$MAX_RETRIES. Waiting $wait_time seconds..."
sleep $wait_time
fi
if scp -o ConnectTimeout=10 -o StrictHostKeyChecking=no -r $SRC_DIR $EC2_USER_NAME@$EC2_INSTANCE_IP:~; then
echo "SCP command succeeded"
break
fi
retry_count=$((retry_count + 1))
done
if [ $retry_count -eq $MAX_RETRIES ]; then
echo "SCP command failed after $MAX_RETRIES attempts"
exit 1
fi
# login
ECR_LOGIN_TOKEN=$(aws ecr get-login-password --region $REGION)
echo "====Running tests on EC2 instance...===="
ssh -o ConnectTimeout=10 -o StrictHostKeyChecking=no $EC2_USER_NAME@$EC2_INSTANCE_IP "
set -euo pipefail
# Login to ECR using token from CodeBuild
echo \"$ECR_LOGIN_TOKEN\" | docker login --username AWS --password-stdin $AWS_ACCOUNT_ID.dkr.ecr.$REGION.amazonaws.com
# Configure BuildKit environment
export DOCKER_BUILDKIT=1
export BUILDKIT_INLINE_CACHE=1
docker buildx create --name metasim-builder --driver docker-container \
--driver-opt env.AWS_REGION=$REGION \
--driver-opt env.AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \
--driver-opt env.AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \
--bootstrap
docker buildx use metasim-builder
cd \"\$HOME/${SRC_DIR}\"
# docker build
if docker pull "$IMAGE_URI" 2>/dev/null ; then
echo "Image $IMAGE_URI already exists. Skipping build."
else
echo "===Starting docker build.==="
docker buildx build --progress=plain --platform linux/amd64 \
-t "$IMAGE_URI" \
--cache-from type=registry,ref=$AWS_ACCOUNT_ID.dkr.ecr.$REGION.amazonaws.com/$ECR_REPOSITORY:cache,mode=max \
--cache-to type=registry,ref=$AWS_ACCOUNT_ID.dkr.ecr.$REGION.amazonaws.com/$ECR_REPOSITORY:cache,mode=max \
--build-arg DOCKER_UID=1000 \
--build-arg DOCKER_GID=1000 \
--build-arg DOCKER_USER=$EC2_USER_NAME \
-f Dockerfile \
--load .
docker push "$IMAGE_URI"
fi
# begin run test
GENERAL_TEST_EXIT_CODE=0
MUJOCO_TEST_EXIT_CODE=0
SAPIEN3_TEST_EXIT_CODE=0
ISAACSIM_TEST_EXIT_CODE=0
ISAACGYM_TEST_EXIT_CODE=0
# run all test
# Run general tests (no simulator required)
docker run --rm --entrypoint bash --runtime=nvidia --network=host \
--name metasim-autotest \
--user 1000:1000 --privileged \
-e LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu \
-e ACCEPT_EULA=Y -e PRIVACY_CONSENT=Y -e OMNI_KIT_ACCEPT_EULA=YES \
-v /usr/local/cuda:/usr/local/cuda \
-v "$(pwd)":/home/$EC2_USER_NAME/RoboVerse \
"$IMAGE_URI" \
-c "bash -lc 'set -o pipefail; \
/home/$EC2_USER_NAME/conda/envs/metasim/bin/python3 -m pytest -k general -vv \
| tee /home/$EC2_USER_NAME/${SRC_DIR}/pytest-general.log'" \
|| GENERAL_TEST_EXIT_CODE=$?
docker run --rm --entrypoint bash --runtime=nvidia --network=host \
--name metasim-autotest \
--user 1000:1000 --privileged \
-e LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu \
-e ACCEPT_EULA=Y -e PRIVACY_CONSENT=Y -e OMNI_KIT_ACCEPT_EULA=YES \
-v /usr/local/cuda:/usr/local/cuda \
-v "$(pwd)":/home/$EC2_USER_NAME/RoboVerse \
"$IMAGE_URI" \
-c "bash -lc 'set -o pipefail; \
/home/$EC2_USER_NAME/conda/envs/metasim/bin/python3 -m pytest -k mujoco -vv \
| tee /home/$EC2_USER_NAME/${SRC_DIR}/pytest-mujoco.log'" \
|| MUJOCO_TEST_EXIT_CODE=$?
docker run --rm --entrypoint bash --runtime=nvidia --network=host \
--name metasim-autotest \
--user 1000:1000 --privileged \
-e LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu \
-e ACCEPT_EULA=Y -e PRIVACY_CONSENT=Y -e OMNI_KIT_ACCEPT_EULA=YES \
-v /usr/local/cuda:/usr/local/cuda \
-v "$(pwd)":/home/$EC2_USER_NAME/RoboVerse \
"$IMAGE_URI" \
-c "bash -lc 'set -o pipefail; \
/home/$EC2_USER_NAME/conda/envs/metasim/bin/python3 -m pytest -k sapien3 -vv \
| tee /home/$EC2_USER_NAME/${SRC_DIR}/pytest-sapien3.log'" \
|| SAPIEN3_TEST_EXIT_CODE=$?
docker run --rm --entrypoint bash --runtime=nvidia --network=host \
--name metasim-autotest \
--user 1000:1000 --privileged \
-e LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu \
-e ACCEPT_EULA=Y -e PRIVACY_CONSENT=Y -e OMNI_KIT_ACCEPT_EULA=YES \
-v /usr/local/cuda:/usr/local/cuda \
-v "$(pwd)":/home/$EC2_USER_NAME/RoboVerse \
"$IMAGE_URI" \
-c "bash -lc 'set -o pipefail; \
/home/$EC2_USER_NAME/conda/envs/metasim/bin/python3 -m pytest -k isaacsim -vv \
| tee /home/$EC2_USER_NAME/${SRC_DIR}/pytest-isaacsim.log'" \
|| ISAACSIM_TEST_EXIT_CODE=$?
docker run --rm --entrypoint bash --runtime=nvidia --network=host \
--name metasim-autotest \
--user 1000:1000 --privileged \
-e LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu \
-e ACCEPT_EULA=Y -e PRIVACY_CONSENT=Y -e OMNI_KIT_ACCEPT_EULA=YES \
-v /usr/local/cuda:/usr/local/cuda \
-v "$(pwd)":/home/$EC2_USER_NAME/RoboVerse \
"$IMAGE_URI" \
-c "bash -lc 'set -o pipefail; \
/home/$EC2_USER_NAME/conda/envs/metasim_isaacgym/bin/python3 /home/$EC2_USER_NAME/RoboVerse/metasim/test/isaacgym_entry.py -k isaacgym -vv \
| tee /home/$EC2_USER_NAME/${SRC_DIR}/pytest-isaacgym.log'" \
|| ISAACGYM_TEST_EXIT_CODE=$?
# TODO check if test_exit_code necessary
touch ~/$SRC_DIR/test_exit_codes.txt
{
echo \"GENERAL_TEST_EXIT_CODE=\$GENERAL_TEST_EXIT_CODE\"
echo \"MUJOCO_TEST_EXIT_CODE=\$MUJOCO_TEST_EXIT_CODE\"
echo \"SAPIEN3_TEST_EXIT_CODE=\$SAPIEN3_TEST_EXIT_CODE\"
echo \"ISAACSIM_TEST_EXIT_CODE=\$ISAACSIM_TEST_EXIT_CODE\"
echo \"ISAACGYM_TEST_EXIT_CODE=\$ISAACGYM_TEST_EXIT_CODE\"
} > ~/${SRC_DIR}/test_exit_codes.txt
" || { echo "Test execution failed"; exit 1; }
echo "===Copying test reports...==="
scp -o ConnectTimeout=10 -o StrictHostKeyChecking=no $EC2_USER_NAME@$EC2_INSTANCE_IP:~/$SRC_DIR/test_exit_codes.txt $CODEBUILD_SRC_DIR/
source $CODEBUILD_SRC_DIR/test_exit_codes.txt
echo "General test exit code: ${GENERAL_TEST_EXIT_CODE}"
echo "Mujoco test exit code: ${MUJOCO_TEST_EXIT_CODE}"
echo "Sapien3 test exit code: ${SAPIEN3_TEST_EXIT_CODE}"
echo "IsaacSim test exit code: ${ISAACSIM_TEST_EXIT_CODE}"
echo "IsaacGym test exit code: ${ISAACGYM_TEST_EXIT_CODE}"
EXIT_CODE=0
if [ "${GENERAL_TEST_EXIT_CODE:-0}" -ne 0 ]; then
echo "=== General tests failed. Fetching logs... ==="
scp -o ConnectTimeout=10 -o StrictHostKeyChecking=no \
$EC2_USER_NAME@$EC2_INSTANCE_IP:~/$SRC_DIR/pytest-general.log \
$CODEBUILD_SRC_DIR/ || true
echo "===== General pytest log ====="
cat $CODEBUILD_SRC_DIR/pytest-general.log || true
EXIT_CODE=1
fi
if [ "${MUJOCO_TEST_EXIT_CODE:-0}" -ne 0 ]; then
echo "=== Mujoco tests failed. Fetching logs... ==="
scp -o ConnectTimeout=10 -o StrictHostKeyChecking=no \
$EC2_USER_NAME@$EC2_INSTANCE_IP:~/$SRC_DIR/pytest-mujoco.log \
$CODEBUILD_SRC_DIR/ || true
echo "===== Mujoco pytest log ====="
cat $CODEBUILD_SRC_DIR/pytest-mujoco.log || true
EXIT_CODE=1
fi
if [ "${SAPIEN3_TEST_EXIT_CODE:-0}" -ne 0 ]; then
echo "=== Sapien3 tests failed. Fetching logs... ==="
scp -o ConnectTimeout=10 -o StrictHostKeyChecking=no \
$EC2_USER_NAME@$EC2_INSTANCE_IP:~/$SRC_DIR/pytest-sapien3.log \
$CODEBUILD_SRC_DIR/ || true
echo "===== Sapien3 pytest log ====="
cat $CODEBUILD_SRC_DIR/pytest-sapien3.log || true
EXIT_CODE=1
fi
if [ "${ISAACSIM_TEST_EXIT_CODE:-0}" -ne 0 ]; then
echo "=== IsaacSim tests failed. Fetching logs... ==="
scp -o ConnectTimeout=10 -o StrictHostKeyChecking=no \
$EC2_USER_NAME@$EC2_INSTANCE_IP:~/$SRC_DIR/pytest-isaacsim.log \
$CODEBUILD_SRC_DIR/ || true
echo "===== IsaacSim pytest log ====="
cat $CODEBUILD_SRC_DIR/pytest-isaacsim.log || true
EXIT_CODE=1
fi
if [ "${ISAACGYM_TEST_EXIT_CODE:-0}" -ne 0 ]; then
echo "=== IsaacGym tests failed. Fetching logs... ==="
scp -o ConnectTimeout=10 -o StrictHostKeyChecking=no \
$EC2_USER_NAME@$EC2_INSTANCE_IP:~/$SRC_DIR/pytest-isaacgym.log \
$CODEBUILD_SRC_DIR/ || true
echo "===== IsaacGym pytest log ====="
cat $CODEBUILD_SRC_DIR/pytest-isaacgym.log || true
EXIT_CODE=1
fi
if [ "$EXIT_CODE" -ne 0 ]; then
echo "Tests failed with exit code $EXIT_CODE"
exit 1
else
echo "===All tests passed!==="
fi
########### Postbuild #########
- name: post_build
if: always() # always try to terminate the instance
run: |
echo "Cleaning up resources..."
if [ -n "$INSTANCE_ID" ]; then
echo "Terminating EC2 instance $INSTANCE_ID..."
aws ec2 terminate-instances --instance-ids $INSTANCE_ID --region $REGION || true
fi
- name: Prepare test logs for upload
if: always()
run: |
# Copy test logs from CODEBUILD_SRC_DIR to workspace root for artifact upload
if [ -d "$CODEBUILD_SRC_DIR" ]; then
cp -v $CODEBUILD_SRC_DIR/pytest-*.log . 2>/dev/null || echo "No pytest logs found"
cp -v $CODEBUILD_SRC_DIR/test_exit_codes.txt . 2>/dev/null || echo "No exit codes file found"
else
echo "CODEBUILD_SRC_DIR not set, files should already be in workspace"
fi
- name: Upload test logs as artifacts
if: always()
uses: actions/upload-artifact@v4
with:
name: test-logs
path: |
pytest-*.log
test_exit_codes.txt
if-no-files-found: warn
retention-days: 7
pre-merge-tests:
if: always()
needs: [workflow-integrity-check, pre-merge-tests-impl]
runs-on: ubuntu-latest
steps:
- run: |
if [[ "${{ github.event_name }}" == "pull_request_target" ]]; then
if [[ "${{ needs.workflow-integrity-check.result }}" != "success" ]]; then
echo "❌ Workflow integrity check failed."
exit 1
fi
echo "✅ Workflow integrity verified. Ready for merge queue."
elif [[ "${{ github.event_name }}" == "merge_group" || "${{ github.event_name }}" == "workflow_dispatch" ]]; then
if [[ "${{ needs.pre-merge-tests-impl.result }}" != "success" ]]; then
echo "❌ Tests failed."
exit 1
fi
echo "✅ Tests passed."
fi
workflow-integrity-check:
runs-on: ubuntu-latest
if: github.event_name == 'pull_request_target'
permissions:
pull-requests: read
steps:
- name: Check for workflow changes
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
PR_NUMBER: ${{ github.event.pull_request.number }}
run: |
set -euo pipefail
echo "Checking if .github/workflows/premerge-ci.yml is modified in PR #$PR_NUMBER..."
CHANGES=$(gh pr diff "$PR_NUMBER" --name-only -R ${{ github.repository }})
if echo "$CHANGES" | grep -q "^.github/workflows/premerge-ci.yml$"; then
echo "❌ Critical workflow modification detected!"
echo "For security reasons, this workflow file cannot be modified via Pull Request."
echo "Please revert changes to .github/workflows/premerge-ci.yml to pass this check."
exit 1
fi
echo "✅ Workflow integrity verified (file not modified)."