MetaSim Pre-Merge CI Checks #276
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: MetaSim Pre-Merge CI Checks | |
| on: | |
| workflow_dispatch: | |
| merge_group: | |
| types: [checks_requested] | |
| pull_request_target: | |
| types: | |
| - auto_merge_enabled | |
| branches: | |
| - main | |
| - develop | |
| env: | |
| REGION: us-west-2 | |
| KEY_NAME: Github_CI_SSH_KEY_PAIR | |
| INSTANCE_TYPE: g5.2xlarge | |
| EC2_USER_NAME: ubuntu | |
| AZ: us-west-2a | |
| MAX_RETRIES: "5" | |
| RETRY_WAIT_TIME: "30" | |
| CACHE_BUCKET_PREFIX: "metasim-build-cache" | |
| ECR_REPOSITORY: "roboverse-dev" | |
| jobs: | |
| pre-merge-tests: | |
| if: github.event_name == 'merge_group' || github.event_name == 'workflow_dispatch' | |
| permissions: | |
| contents: read | |
| pull-requests: write | |
| issues: write | |
| runs-on: codebuild-EC2_Launcher2-${{ github.run_id }}-${{ github.run_attempt }} | |
| timeout-minutes: 720 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - run: aws --version | |
| ############# Prebuild ############ | |
| - name: pre_build | |
| env: | |
| SSH_KEY: ${{ secrets.EC2_SSH_KEY }} | |
| run: | | |
| # Get AWS account ID | |
| AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text) | |
| echo "AWS_ACCOUNT_ID=$AWS_ACCOUNT_ID" >> $GITHUB_ENV | |
| if [ -z "$AWS_ACCOUNT_ID" ]; then | |
| echo "Error: Failed to get AWS account ID" | |
| exit 1 | |
| fi | |
| echo "Preparing S3 bucket..." | |
| CACHE_BUCKET="${CACHE_BUCKET_PREFIX}-${AWS_ACCOUNT_ID}" | |
| aws s3api head-bucket --bucket $CACHE_BUCKET || \ | |
| aws s3 mb s3://$CACHE_BUCKET --region $REGION | |
| # Configure S3 bucket lifecycle rule for cache expiration | |
| aws s3api put-bucket-lifecycle-configuration \ | |
| --bucket $CACHE_BUCKET \ | |
| --lifecycle-configuration '{ | |
| "Rules": [ | |
| { | |
| "ID": "ExpireBuildKitCache", | |
| "Status": "Enabled", | |
| "Filter": { | |
| "Prefix": "" | |
| }, | |
| "Expiration": { | |
| "Days": 14 | |
| } | |
| } | |
| ] | |
| }' | |
| echo "CACHE_BUCKET=$CACHE_BUCKET" >> $GITHUB_ENV | |
| echo "Launching EC2 instance to run tests..." | |
| INSTANCE_ID=$(aws ec2 run-instances \ | |
| --image-id ami-0b7f5f52689b2c0d0 \ | |
| --instance-type $INSTANCE_TYPE \ | |
| --region $REGION \ | |
| --key-name $KEY_NAME \ | |
| --security-group-ids sg-03f9110d8d39282ad \ | |
| --subnet-id subnet-0c56793ce29caa78b \ | |
| --iam-instance-profile Name="RoboverseCi" \ | |
| --block-device-mappings '[{"DeviceName":"/dev/sda1","Ebs":{"VolumeSize":500}}]' \ | |
| --output text \ | |
| --query 'Instances[0].InstanceId') | |
| echo "INSTANCE_ID=$INSTANCE_ID" >> $GITHUB_ENV | |
| # Create ECR repository if it doesn't exist | |
| aws ecr describe-repositories --repository-names $ECR_REPOSITORY || \ | |
| aws ecr create-repository --repository-name $ECR_REPOSITORY | |
| echo "Waiting for instance $INSTANCE_ID to be running..." | |
| aws ec2 wait instance-running \ | |
| --instance-ids $INSTANCE_ID \ | |
| --region $REGION | |
| echo "Getting instance IP address..." | |
| EC2_INSTANCE_IP=$(aws ec2 describe-instances \ | |
| --region $REGION \ | |
| --filters "Name=instance-state-name,Values=running" "Name=instance-id,Values=$INSTANCE_ID" \ | |
| --query 'Reservations[*].Instances[*].[PrivateIpAddress]' \ | |
| --output text) | |
| echo "EC2_INSTANCE_IP=$EC2_INSTANCE_IP" >> $GITHUB_ENV | |
| echo "Setting up SSH configuration..." | |
| mkdir -p ~/.ssh | |
| aws ec2 describe-key-pairs \ | |
| --include-public-key \ | |
| --key-name $KEY_NAME \ | |
| --query 'KeyPairs[0].PublicKey' \ | |
| --output text > ~/.ssh/id_rsa.pub | |
| echo "$SSH_KEY" > ~/.ssh/id_rsa | |
| chmod 400 ~/.ssh/id_* | |
| printf "Host %s\n\tStrictHostKeyChecking no\n\tUserKnownHostsFile=/dev/null\n" "$EC2_INSTANCE_IP" >> ~/.ssh/config | |
| echo "Sending SSH public key to instance..." | |
| aws ec2-instance-connect send-ssh-public-key \ | |
| --instance-id $INSTANCE_ID \ | |
| --availability-zone $AZ \ | |
| --ssh-public-key file://~/.ssh/id_rsa.pub \ | |
| --instance-os-user $EC2_USER_NAME | |
| ############# Build ############# | |
| - name: build | |
| run: | | |
| echo "====Copying source code...====" | |
| wait_time=$RETRY_WAIT_TIME | |
| SRC_DIR=$(basename $GITHUB_WORKSPACE) | |
| echo "====Check environment variables...====" | |
| echo "GITHUB_WORKSPACE=$GITHUB_WORKSPACE" | |
| echo "CODEBUILD_SRC_DIR=$CODEBUILD_SRC_DIR" | |
| echo "EC2_USER_NAME=$EC2_USER_NAME" | |
| echo "SRC_DIR=$SRC_DIR" | |
| echo "RETRY_WAIT_TIME=$RETRY_WAIT_TIME" | |
| echo "MAX_RETRIES=$MAX_RETRIES" | |
| echo "====Repo file check...====" | |
| ls ./ | |
| # ==== before buildx build ==== | |
| DOCKERFILE_HASH=$(sha256sum Dockerfile | cut -c1-16) | |
| IMAGE_URI="$AWS_ACCOUNT_ID.dkr.ecr.$REGION.amazonaws.com/$ECR_REPOSITORY:df-$DOCKERFILE_HASH" | |
| echo "IMAGE_URI=$IMAGE_URI" | |
| retry_count=0 | |
| # change to parent directory to copy files | |
| cd .. | |
| while [ $retry_count -lt $MAX_RETRIES ]; do | |
| if [ $retry_count -gt 0 ]; then | |
| wait_time=$((wait_time * 2)) | |
| echo "Retry attempt $((retry_count + 1))/$MAX_RETRIES. Waiting $wait_time seconds..." | |
| sleep $wait_time | |
| fi | |
| if scp -o ConnectTimeout=10 -o StrictHostKeyChecking=no -r $SRC_DIR $EC2_USER_NAME@$EC2_INSTANCE_IP:~; then | |
| echo "SCP command succeeded" | |
| break | |
| fi | |
| retry_count=$((retry_count + 1)) | |
| done | |
| if [ $retry_count -eq $MAX_RETRIES ]; then | |
| echo "SCP command failed after $MAX_RETRIES attempts" | |
| exit 1 | |
| fi | |
| # login | |
| ECR_LOGIN_TOKEN=$(aws ecr get-login-password --region $REGION) | |
| echo "====Running tests on EC2 instance...====" | |
| ssh -o ConnectTimeout=10 -o StrictHostKeyChecking=no $EC2_USER_NAME@$EC2_INSTANCE_IP " | |
| set -euo pipefail | |
| # Login to ECR using token from CodeBuild | |
| echo \"$ECR_LOGIN_TOKEN\" | docker login --username AWS --password-stdin $AWS_ACCOUNT_ID.dkr.ecr.$REGION.amazonaws.com | |
| # Configure BuildKit environment | |
| export DOCKER_BUILDKIT=1 | |
| export BUILDKIT_INLINE_CACHE=1 | |
| docker buildx create --name metasim-builder --driver docker-container \ | |
| --driver-opt env.AWS_REGION=$REGION \ | |
| --driver-opt env.AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \ | |
| --driver-opt env.AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \ | |
| --bootstrap | |
| docker buildx use metasim-builder | |
| cd \"\$HOME/${SRC_DIR}\" | |
| # docker build | |
| if docker pull "$IMAGE_URI" 2>/dev/null ; then | |
| echo "Image $IMAGE_URI already exists. Skipping build." | |
| else | |
| echo "===Starting docker build.===" | |
| docker buildx build --progress=plain --platform linux/amd64 \ | |
| -t "$IMAGE_URI" \ | |
| --cache-from type=registry,ref=$AWS_ACCOUNT_ID.dkr.ecr.$REGION.amazonaws.com/$ECR_REPOSITORY:cache,mode=max \ | |
| --cache-to type=registry,ref=$AWS_ACCOUNT_ID.dkr.ecr.$REGION.amazonaws.com/$ECR_REPOSITORY:cache,mode=max \ | |
| --build-arg DOCKER_UID=1000 \ | |
| --build-arg DOCKER_GID=1000 \ | |
| --build-arg DOCKER_USER=$EC2_USER_NAME \ | |
| -f Dockerfile \ | |
| --load . | |
| docker push "$IMAGE_URI" | |
| fi | |
| # begin run test | |
| GENERAL_TEST_EXIT_CODE=0 | |
| MUJOCO_TEST_EXIT_CODE=0 | |
| SAPIEN3_TEST_EXIT_CODE=0 | |
| ISAACSIM_TEST_EXIT_CODE=0 | |
| ISAACGYM_TEST_EXIT_CODE=0 | |
| # run all test | |
| # Run general tests (no simulator required) | |
| docker run --rm --entrypoint bash --runtime=nvidia --network=host \ | |
| --name metasim-autotest \ | |
| --user 1000:1000 --privileged \ | |
| -e LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu \ | |
| -e ACCEPT_EULA=Y -e PRIVACY_CONSENT=Y -e OMNI_KIT_ACCEPT_EULA=YES \ | |
| -v /usr/local/cuda:/usr/local/cuda \ | |
| -v "$(pwd)":/home/$EC2_USER_NAME/RoboVerse \ | |
| "$IMAGE_URI" \ | |
| -c "bash -lc 'set -o pipefail; \ | |
| /home/$EC2_USER_NAME/conda/envs/metasim/bin/python3 -m pytest -k general -vv \ | |
| | tee /home/$EC2_USER_NAME/${SRC_DIR}/pytest-general.log'" \ | |
| || GENERAL_TEST_EXIT_CODE=$? | |
| docker run --rm --entrypoint bash --runtime=nvidia --network=host \ | |
| --name metasim-autotest \ | |
| --user 1000:1000 --privileged \ | |
| -e LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu \ | |
| -e ACCEPT_EULA=Y -e PRIVACY_CONSENT=Y -e OMNI_KIT_ACCEPT_EULA=YES \ | |
| -v /usr/local/cuda:/usr/local/cuda \ | |
| -v "$(pwd)":/home/$EC2_USER_NAME/RoboVerse \ | |
| "$IMAGE_URI" \ | |
| -c "bash -lc 'set -o pipefail; \ | |
| /home/$EC2_USER_NAME/conda/envs/metasim/bin/python3 -m pytest -k mujoco -vv \ | |
| | tee /home/$EC2_USER_NAME/${SRC_DIR}/pytest-mujoco.log'" \ | |
| || MUJOCO_TEST_EXIT_CODE=$? | |
| docker run --rm --entrypoint bash --runtime=nvidia --network=host \ | |
| --name metasim-autotest \ | |
| --user 1000:1000 --privileged \ | |
| -e LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu \ | |
| -e ACCEPT_EULA=Y -e PRIVACY_CONSENT=Y -e OMNI_KIT_ACCEPT_EULA=YES \ | |
| -v /usr/local/cuda:/usr/local/cuda \ | |
| -v "$(pwd)":/home/$EC2_USER_NAME/RoboVerse \ | |
| "$IMAGE_URI" \ | |
| -c "bash -lc 'set -o pipefail; \ | |
| /home/$EC2_USER_NAME/conda/envs/metasim/bin/python3 -m pytest -k sapien3 -vv \ | |
| | tee /home/$EC2_USER_NAME/${SRC_DIR}/pytest-sapien3.log'" \ | |
| || SAPIEN3_TEST_EXIT_CODE=$? | |
| docker run --rm --entrypoint bash --runtime=nvidia --network=host \ | |
| --name metasim-autotest \ | |
| --user 1000:1000 --privileged \ | |
| -e LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu \ | |
| -e ACCEPT_EULA=Y -e PRIVACY_CONSENT=Y -e OMNI_KIT_ACCEPT_EULA=YES \ | |
| -v /usr/local/cuda:/usr/local/cuda \ | |
| -v "$(pwd)":/home/$EC2_USER_NAME/RoboVerse \ | |
| "$IMAGE_URI" \ | |
| -c "bash -lc 'set -o pipefail; \ | |
| /home/$EC2_USER_NAME/conda/envs/metasim/bin/python3 -m pytest -k isaacsim -vv \ | |
| | tee /home/$EC2_USER_NAME/${SRC_DIR}/pytest-isaacsim.log'" \ | |
| || ISAACSIM_TEST_EXIT_CODE=$? | |
| docker run --rm --entrypoint bash --runtime=nvidia --network=host \ | |
| --name metasim-autotest \ | |
| --user 1000:1000 --privileged \ | |
| -e LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu \ | |
| -e ACCEPT_EULA=Y -e PRIVACY_CONSENT=Y -e OMNI_KIT_ACCEPT_EULA=YES \ | |
| -v /usr/local/cuda:/usr/local/cuda \ | |
| -v "$(pwd)":/home/$EC2_USER_NAME/RoboVerse \ | |
| "$IMAGE_URI" \ | |
| -c "bash -lc 'set -o pipefail; \ | |
| /home/$EC2_USER_NAME/conda/envs/metasim_isaacgym/bin/python3 /home/$EC2_USER_NAME/RoboVerse/metasim/test/isaacgym_entry.py -k isaacgym -vv \ | |
| | tee /home/$EC2_USER_NAME/${SRC_DIR}/pytest-isaacgym.log'" \ | |
| || ISAACGYM_TEST_EXIT_CODE=$? | |
| # TODO check if test_exit_code necessary | |
| touch ~/$SRC_DIR/test_exit_codes.txt | |
| { | |
| echo \"GENERAL_TEST_EXIT_CODE=\$GENERAL_TEST_EXIT_CODE\" | |
| echo \"MUJOCO_TEST_EXIT_CODE=\$MUJOCO_TEST_EXIT_CODE\" | |
| echo \"SAPIEN3_TEST_EXIT_CODE=\$SAPIEN3_TEST_EXIT_CODE\" | |
| echo \"ISAACSIM_TEST_EXIT_CODE=\$ISAACSIM_TEST_EXIT_CODE\" | |
| echo \"ISAACGYM_TEST_EXIT_CODE=\$ISAACGYM_TEST_EXIT_CODE\" | |
| } > ~/${SRC_DIR}/test_exit_codes.txt | |
| " || { echo "Test execution failed"; exit 1; } | |
| echo "===Copying test reports...===" | |
| scp -o ConnectTimeout=10 -o StrictHostKeyChecking=no $EC2_USER_NAME@$EC2_INSTANCE_IP:~/$SRC_DIR/test_exit_codes.txt $CODEBUILD_SRC_DIR/ | |
| source $CODEBUILD_SRC_DIR/test_exit_codes.txt | |
| echo "General test exit code: ${GENERAL_TEST_EXIT_CODE}" | |
| echo "Mujoco test exit code: ${MUJOCO_TEST_EXIT_CODE}" | |
| echo "Sapien3 test exit code: ${SAPIEN3_TEST_EXIT_CODE}" | |
| echo "IsaacSim test exit code: ${ISAACSIM_TEST_EXIT_CODE}" | |
| echo "IsaacGym test exit code: ${ISAACGYM_TEST_EXIT_CODE}" | |
| EXIT_CODE=0 | |
| if [ "${GENERAL_TEST_EXIT_CODE:-0}" -ne 0 ]; then | |
| echo "=== General tests failed. Fetching logs... ===" | |
| scp -o ConnectTimeout=10 -o StrictHostKeyChecking=no \ | |
| $EC2_USER_NAME@$EC2_INSTANCE_IP:~/$SRC_DIR/pytest-general.log \ | |
| $CODEBUILD_SRC_DIR/ || true | |
| echo "===== General pytest log =====" | |
| cat $CODEBUILD_SRC_DIR/pytest-general.log || true | |
| EXIT_CODE=1 | |
| fi | |
| if [ "${MUJOCO_TEST_EXIT_CODE:-0}" -ne 0 ]; then | |
| echo "=== Mujoco tests failed. Fetching logs... ===" | |
| scp -o ConnectTimeout=10 -o StrictHostKeyChecking=no \ | |
| $EC2_USER_NAME@$EC2_INSTANCE_IP:~/$SRC_DIR/pytest-mujoco.log \ | |
| $CODEBUILD_SRC_DIR/ || true | |
| echo "===== Mujoco pytest log =====" | |
| cat $CODEBUILD_SRC_DIR/pytest-mujoco.log || true | |
| EXIT_CODE=1 | |
| fi | |
| if [ "${SAPIEN3_TEST_EXIT_CODE:-0}" -ne 0 ]; then | |
| echo "=== Sapien3 tests failed. Fetching logs... ===" | |
| scp -o ConnectTimeout=10 -o StrictHostKeyChecking=no \ | |
| $EC2_USER_NAME@$EC2_INSTANCE_IP:~/$SRC_DIR/pytest-sapien3.log \ | |
| $CODEBUILD_SRC_DIR/ || true | |
| echo "===== Sapien3 pytest log =====" | |
| cat $CODEBUILD_SRC_DIR/pytest-sapien3.log || true | |
| EXIT_CODE=1 | |
| fi | |
| if [ "${ISAACSIM_TEST_EXIT_CODE:-0}" -ne 0 ]; then | |
| echo "=== IsaacSim tests failed. Fetching logs... ===" | |
| scp -o ConnectTimeout=10 -o StrictHostKeyChecking=no \ | |
| $EC2_USER_NAME@$EC2_INSTANCE_IP:~/$SRC_DIR/pytest-isaacsim.log \ | |
| $CODEBUILD_SRC_DIR/ || true | |
| echo "===== IsaacSim pytest log =====" | |
| cat $CODEBUILD_SRC_DIR/pytest-isaacsim.log || true | |
| EXIT_CODE=1 | |
| fi | |
| if [ "${ISAACGYM_TEST_EXIT_CODE:-0}" -ne 0 ]; then | |
| echo "=== IsaacGym tests failed. Fetching logs... ===" | |
| scp -o ConnectTimeout=10 -o StrictHostKeyChecking=no \ | |
| $EC2_USER_NAME@$EC2_INSTANCE_IP:~/$SRC_DIR/pytest-isaacgym.log \ | |
| $CODEBUILD_SRC_DIR/ || true | |
| echo "===== IsaacGym pytest log =====" | |
| cat $CODEBUILD_SRC_DIR/pytest-isaacgym.log || true | |
| EXIT_CODE=1 | |
| fi | |
| if [ "$EXIT_CODE" -ne 0 ]; then | |
| echo "Tests failed with exit code $EXIT_CODE" | |
| exit 1 | |
| else | |
| echo "===All tests passed!===" | |
| fi | |
| ########### Postbuild ######### | |
| - name: post_build | |
| if: always() # always try to terminate the instance | |
| run: | | |
| echo "Cleaning up resources..." | |
| if [ -n "$INSTANCE_ID" ]; then | |
| echo "Terminating EC2 instance $INSTANCE_ID..." | |
| aws ec2 terminate-instances --instance-ids $INSTANCE_ID --region $REGION || true | |
| fi | |
| - name: Prepare test logs for upload | |
| if: always() | |
| run: | | |
| # Copy test logs from CODEBUILD_SRC_DIR to workspace root for artifact upload | |
| if [ -d "$CODEBUILD_SRC_DIR" ]; then | |
| cp -v $CODEBUILD_SRC_DIR/pytest-*.log . 2>/dev/null || echo "No pytest logs found" | |
| cp -v $CODEBUILD_SRC_DIR/test_exit_codes.txt . 2>/dev/null || echo "No exit codes file found" | |
| else | |
| echo "CODEBUILD_SRC_DIR not set, files should already be in workspace" | |
| fi | |
| - name: Upload test logs as artifacts | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: test-logs | |
| path: | | |
| pytest-*.log | |
| test_exit_codes.txt | |
| if-no-files-found: warn | |
| retention-days: 7 | |
| workflow-integrity-check: | |
| runs-on: ubuntu-latest | |
| if: github.event_name == 'pull_request_target' | |
| permissions: | |
| pull-requests: read | |
| steps: | |
| - name: Check for workflow changes | |
| env: | |
| GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| PR_NUMBER: ${{ github.event.pull_request.number }} | |
| run: | | |
| set -euo pipefail | |
| echo "Checking if .github/workflows/premerge-ci.yml is modified in PR #$PR_NUMBER..." | |
| CHANGES=$(gh pr diff "$PR_NUMBER" --name-only) | |
| if echo "$CHANGES" | grep -q "^.github/workflows/premerge-ci.yml$"; then | |
| echo "❌ Critical workflow modification detected!" | |
| echo "For security reasons, this workflow file cannot be modified via Pull Request." | |
| echo "Please revert changes to .github/workflows/premerge-ci.yml to pass this check." | |
| exit 1 | |
| fi | |
| echo "✅ Workflow integrity verified (file not modified)." |