-
Notifications
You must be signed in to change notification settings - Fork 155
421 lines (370 loc) · 17.1 KB
/
premerge-ci.yml
File metadata and controls
421 lines (370 loc) · 17.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
name: MetaSim Pre-Merge CI Checks
on:
workflow_dispatch:
merge_group:
types: [checks_requested]
pull_request_target:
types:
- auto_merge_enabled
branches:
- main
- develop
env:
REGION: us-west-2
KEY_NAME: Github_CI_SSH_KEY_PAIR
INSTANCE_TYPE: g5.2xlarge
EC2_USER_NAME: ubuntu
AZ: us-west-2a
MAX_RETRIES: "5"
RETRY_WAIT_TIME: "30"
CACHE_BUCKET_PREFIX: "metasim-build-cache"
ECR_REPOSITORY: "roboverse-dev"
jobs:
pre-merge-tests:
if: github.event_name == 'merge_group' || github.event_name == 'workflow_dispatch'
permissions:
contents: read
pull-requests: write
issues: write
runs-on: codebuild-EC2_Launcher2-${{ github.run_id }}-${{ github.run_attempt }}
timeout-minutes: 720
steps:
- name: Checkout code
uses: actions/checkout@v4
- run: aws --version
############# Prebuild ############
- name: pre_build
env:
SSH_KEY: ${{ secrets.EC2_SSH_KEY }}
run: |
# Get AWS account ID
AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
echo "AWS_ACCOUNT_ID=$AWS_ACCOUNT_ID" >> $GITHUB_ENV
if [ -z "$AWS_ACCOUNT_ID" ]; then
echo "Error: Failed to get AWS account ID"
exit 1
fi
echo "Preparing S3 bucket..."
CACHE_BUCKET="${CACHE_BUCKET_PREFIX}-${AWS_ACCOUNT_ID}"
aws s3api head-bucket --bucket $CACHE_BUCKET || \
aws s3 mb s3://$CACHE_BUCKET --region $REGION
# Configure S3 bucket lifecycle rule for cache expiration
aws s3api put-bucket-lifecycle-configuration \
--bucket $CACHE_BUCKET \
--lifecycle-configuration '{
"Rules": [
{
"ID": "ExpireBuildKitCache",
"Status": "Enabled",
"Filter": {
"Prefix": ""
},
"Expiration": {
"Days": 14
}
}
]
}'
echo "CACHE_BUCKET=$CACHE_BUCKET" >> $GITHUB_ENV
echo "Launching EC2 instance to run tests..."
INSTANCE_ID=$(aws ec2 run-instances \
--image-id ami-0b7f5f52689b2c0d0 \
--instance-type $INSTANCE_TYPE \
--region $REGION \
--key-name $KEY_NAME \
--security-group-ids sg-03f9110d8d39282ad \
--subnet-id subnet-0c56793ce29caa78b \
--iam-instance-profile Name="RoboverseCi" \
--block-device-mappings '[{"DeviceName":"/dev/sda1","Ebs":{"VolumeSize":500}}]' \
--output text \
--query 'Instances[0].InstanceId')
echo "INSTANCE_ID=$INSTANCE_ID" >> $GITHUB_ENV
# Create ECR repository if it doesn't exist
aws ecr describe-repositories --repository-names $ECR_REPOSITORY || \
aws ecr create-repository --repository-name $ECR_REPOSITORY
echo "Waiting for instance $INSTANCE_ID to be running..."
aws ec2 wait instance-running \
--instance-ids $INSTANCE_ID \
--region $REGION
echo "Getting instance IP address..."
EC2_INSTANCE_IP=$(aws ec2 describe-instances \
--region $REGION \
--filters "Name=instance-state-name,Values=running" "Name=instance-id,Values=$INSTANCE_ID" \
--query 'Reservations[*].Instances[*].[PrivateIpAddress]' \
--output text)
echo "EC2_INSTANCE_IP=$EC2_INSTANCE_IP" >> $GITHUB_ENV
echo "Setting up SSH configuration..."
mkdir -p ~/.ssh
aws ec2 describe-key-pairs \
--include-public-key \
--key-name $KEY_NAME \
--query 'KeyPairs[0].PublicKey' \
--output text > ~/.ssh/id_rsa.pub
echo "$SSH_KEY" > ~/.ssh/id_rsa
chmod 400 ~/.ssh/id_*
printf "Host %s\n\tStrictHostKeyChecking no\n\tUserKnownHostsFile=/dev/null\n" "$EC2_INSTANCE_IP" >> ~/.ssh/config
echo "Sending SSH public key to instance..."
aws ec2-instance-connect send-ssh-public-key \
--instance-id $INSTANCE_ID \
--availability-zone $AZ \
--ssh-public-key file://~/.ssh/id_rsa.pub \
--instance-os-user $EC2_USER_NAME
############# Build #############
- name: build
run: |
echo "====Copying source code...===="
wait_time=$RETRY_WAIT_TIME
SRC_DIR=$(basename $GITHUB_WORKSPACE)
echo "====Check environment variables...===="
echo "GITHUB_WORKSPACE=$GITHUB_WORKSPACE"
echo "CODEBUILD_SRC_DIR=$CODEBUILD_SRC_DIR"
echo "EC2_USER_NAME=$EC2_USER_NAME"
echo "SRC_DIR=$SRC_DIR"
echo "RETRY_WAIT_TIME=$RETRY_WAIT_TIME"
echo "MAX_RETRIES=$MAX_RETRIES"
echo "====Repo file check...===="
ls ./
# ==== before buildx build ====
DOCKERFILE_HASH=$(sha256sum Dockerfile | cut -c1-16)
IMAGE_URI="$AWS_ACCOUNT_ID.dkr.ecr.$REGION.amazonaws.com/$ECR_REPOSITORY:df-$DOCKERFILE_HASH"
echo "IMAGE_URI=$IMAGE_URI"
retry_count=0
# change to parent directory to copy files
cd ..
while [ $retry_count -lt $MAX_RETRIES ]; do
if [ $retry_count -gt 0 ]; then
wait_time=$((wait_time * 2))
echo "Retry attempt $((retry_count + 1))/$MAX_RETRIES. Waiting $wait_time seconds..."
sleep $wait_time
fi
if scp -o ConnectTimeout=10 -o StrictHostKeyChecking=no -r $SRC_DIR $EC2_USER_NAME@$EC2_INSTANCE_IP:~; then
echo "SCP command succeeded"
break
fi
retry_count=$((retry_count + 1))
done
if [ $retry_count -eq $MAX_RETRIES ]; then
echo "SCP command failed after $MAX_RETRIES attempts"
exit 1
fi
# login
ECR_LOGIN_TOKEN=$(aws ecr get-login-password --region $REGION)
echo "====Running tests on EC2 instance...===="
ssh -o ConnectTimeout=10 -o StrictHostKeyChecking=no $EC2_USER_NAME@$EC2_INSTANCE_IP "
set -euo pipefail
# Login to ECR using token from CodeBuild
echo \"$ECR_LOGIN_TOKEN\" | docker login --username AWS --password-stdin $AWS_ACCOUNT_ID.dkr.ecr.$REGION.amazonaws.com
# Configure BuildKit environment
export DOCKER_BUILDKIT=1
export BUILDKIT_INLINE_CACHE=1
docker buildx create --name metasim-builder --driver docker-container \
--driver-opt env.AWS_REGION=$REGION \
--driver-opt env.AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \
--driver-opt env.AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \
--bootstrap
docker buildx use metasim-builder
cd \"\$HOME/${SRC_DIR}\"
# docker build
if docker pull "$IMAGE_URI" 2>/dev/null ; then
echo "Image $IMAGE_URI already exists. Skipping build."
else
echo "===Starting docker build.==="
docker buildx build --progress=plain --platform linux/amd64 \
-t "$IMAGE_URI" \
--cache-from type=registry,ref=$AWS_ACCOUNT_ID.dkr.ecr.$REGION.amazonaws.com/$ECR_REPOSITORY:cache,mode=max \
--cache-to type=registry,ref=$AWS_ACCOUNT_ID.dkr.ecr.$REGION.amazonaws.com/$ECR_REPOSITORY:cache,mode=max \
--build-arg DOCKER_UID=1000 \
--build-arg DOCKER_GID=1000 \
--build-arg DOCKER_USER=$EC2_USER_NAME \
-f Dockerfile \
--load .
docker push "$IMAGE_URI"
fi
# begin run test
GENERAL_TEST_EXIT_CODE=0
MUJOCO_TEST_EXIT_CODE=0
SAPIEN3_TEST_EXIT_CODE=0
ISAACSIM_TEST_EXIT_CODE=0
ISAACGYM_TEST_EXIT_CODE=0
# run all test
# Run general tests (no simulator required)
docker run --rm --entrypoint bash --runtime=nvidia --network=host \
--name metasim-autotest \
--user 1000:1000 --privileged \
-e LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu \
-e ACCEPT_EULA=Y -e PRIVACY_CONSENT=Y -e OMNI_KIT_ACCEPT_EULA=YES \
-v /usr/local/cuda:/usr/local/cuda \
-v "$(pwd)":/home/$EC2_USER_NAME/RoboVerse \
"$IMAGE_URI" \
-c "bash -lc 'set -o pipefail; \
/home/$EC2_USER_NAME/conda/envs/metasim/bin/python3 -m pytest -k general -vv \
| tee /home/$EC2_USER_NAME/${SRC_DIR}/pytest-general.log'" \
|| GENERAL_TEST_EXIT_CODE=$?
docker run --rm --entrypoint bash --runtime=nvidia --network=host \
--name metasim-autotest \
--user 1000:1000 --privileged \
-e LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu \
-e ACCEPT_EULA=Y -e PRIVACY_CONSENT=Y -e OMNI_KIT_ACCEPT_EULA=YES \
-v /usr/local/cuda:/usr/local/cuda \
-v "$(pwd)":/home/$EC2_USER_NAME/RoboVerse \
"$IMAGE_URI" \
-c "bash -lc 'set -o pipefail; \
/home/$EC2_USER_NAME/conda/envs/metasim/bin/python3 -m pytest -k mujoco -vv \
| tee /home/$EC2_USER_NAME/${SRC_DIR}/pytest-mujoco.log'" \
|| MUJOCO_TEST_EXIT_CODE=$?
docker run --rm --entrypoint bash --runtime=nvidia --network=host \
--name metasim-autotest \
--user 1000:1000 --privileged \
-e LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu \
-e ACCEPT_EULA=Y -e PRIVACY_CONSENT=Y -e OMNI_KIT_ACCEPT_EULA=YES \
-v /usr/local/cuda:/usr/local/cuda \
-v "$(pwd)":/home/$EC2_USER_NAME/RoboVerse \
"$IMAGE_URI" \
-c "bash -lc 'set -o pipefail; \
/home/$EC2_USER_NAME/conda/envs/metasim/bin/python3 -m pytest -k sapien3 -vv \
| tee /home/$EC2_USER_NAME/${SRC_DIR}/pytest-sapien3.log'" \
|| SAPIEN3_TEST_EXIT_CODE=$?
docker run --rm --entrypoint bash --runtime=nvidia --network=host \
--name metasim-autotest \
--user 1000:1000 --privileged \
-e LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu \
-e ACCEPT_EULA=Y -e PRIVACY_CONSENT=Y -e OMNI_KIT_ACCEPT_EULA=YES \
-v /usr/local/cuda:/usr/local/cuda \
-v "$(pwd)":/home/$EC2_USER_NAME/RoboVerse \
"$IMAGE_URI" \
-c "bash -lc 'set -o pipefail; \
/home/$EC2_USER_NAME/conda/envs/metasim/bin/python3 -m pytest -k isaacsim -vv \
| tee /home/$EC2_USER_NAME/${SRC_DIR}/pytest-isaacsim.log'" \
|| ISAACSIM_TEST_EXIT_CODE=$?
docker run --rm --entrypoint bash --runtime=nvidia --network=host \
--name metasim-autotest \
--user 1000:1000 --privileged \
-e LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu \
-e ACCEPT_EULA=Y -e PRIVACY_CONSENT=Y -e OMNI_KIT_ACCEPT_EULA=YES \
-v /usr/local/cuda:/usr/local/cuda \
-v "$(pwd)":/home/$EC2_USER_NAME/RoboVerse \
"$IMAGE_URI" \
-c "bash -lc 'set -o pipefail; \
/home/$EC2_USER_NAME/conda/envs/metasim_isaacgym/bin/python3 /home/$EC2_USER_NAME/RoboVerse/metasim/test/isaacgym_entry.py -k isaacgym -vv \
| tee /home/$EC2_USER_NAME/${SRC_DIR}/pytest-isaacgym.log'" \
|| ISAACGYM_TEST_EXIT_CODE=$?
# TODO check if test_exit_code necessary
touch ~/$SRC_DIR/test_exit_codes.txt
{
echo \"GENERAL_TEST_EXIT_CODE=\$GENERAL_TEST_EXIT_CODE\"
echo \"MUJOCO_TEST_EXIT_CODE=\$MUJOCO_TEST_EXIT_CODE\"
echo \"SAPIEN3_TEST_EXIT_CODE=\$SAPIEN3_TEST_EXIT_CODE\"
echo \"ISAACSIM_TEST_EXIT_CODE=\$ISAACSIM_TEST_EXIT_CODE\"
echo \"ISAACGYM_TEST_EXIT_CODE=\$ISAACGYM_TEST_EXIT_CODE\"
} > ~/${SRC_DIR}/test_exit_codes.txt
" || { echo "Test execution failed"; exit 1; }
echo "===Copying test reports...==="
scp -o ConnectTimeout=10 -o StrictHostKeyChecking=no $EC2_USER_NAME@$EC2_INSTANCE_IP:~/$SRC_DIR/test_exit_codes.txt $CODEBUILD_SRC_DIR/
source $CODEBUILD_SRC_DIR/test_exit_codes.txt
echo "General test exit code: ${GENERAL_TEST_EXIT_CODE}"
echo "Mujoco test exit code: ${MUJOCO_TEST_EXIT_CODE}"
echo "Sapien3 test exit code: ${SAPIEN3_TEST_EXIT_CODE}"
echo "IsaacSim test exit code: ${ISAACSIM_TEST_EXIT_CODE}"
echo "IsaacGym test exit code: ${ISAACGYM_TEST_EXIT_CODE}"
EXIT_CODE=0
if [ "${GENERAL_TEST_EXIT_CODE:-0}" -ne 0 ]; then
echo "=== General tests failed. Fetching logs... ==="
scp -o ConnectTimeout=10 -o StrictHostKeyChecking=no \
$EC2_USER_NAME@$EC2_INSTANCE_IP:~/$SRC_DIR/pytest-general.log \
$CODEBUILD_SRC_DIR/ || true
echo "===== General pytest log ====="
cat $CODEBUILD_SRC_DIR/pytest-general.log || true
EXIT_CODE=1
fi
if [ "${MUJOCO_TEST_EXIT_CODE:-0}" -ne 0 ]; then
echo "=== Mujoco tests failed. Fetching logs... ==="
scp -o ConnectTimeout=10 -o StrictHostKeyChecking=no \
$EC2_USER_NAME@$EC2_INSTANCE_IP:~/$SRC_DIR/pytest-mujoco.log \
$CODEBUILD_SRC_DIR/ || true
echo "===== Mujoco pytest log ====="
cat $CODEBUILD_SRC_DIR/pytest-mujoco.log || true
EXIT_CODE=1
fi
if [ "${SAPIEN3_TEST_EXIT_CODE:-0}" -ne 0 ]; then
echo "=== Sapien3 tests failed. Fetching logs... ==="
scp -o ConnectTimeout=10 -o StrictHostKeyChecking=no \
$EC2_USER_NAME@$EC2_INSTANCE_IP:~/$SRC_DIR/pytest-sapien3.log \
$CODEBUILD_SRC_DIR/ || true
echo "===== Sapien3 pytest log ====="
cat $CODEBUILD_SRC_DIR/pytest-sapien3.log || true
EXIT_CODE=1
fi
if [ "${ISAACSIM_TEST_EXIT_CODE:-0}" -ne 0 ]; then
echo "=== IsaacSim tests failed. Fetching logs... ==="
scp -o ConnectTimeout=10 -o StrictHostKeyChecking=no \
$EC2_USER_NAME@$EC2_INSTANCE_IP:~/$SRC_DIR/pytest-isaacsim.log \
$CODEBUILD_SRC_DIR/ || true
echo "===== IsaacSim pytest log ====="
cat $CODEBUILD_SRC_DIR/pytest-isaacsim.log || true
EXIT_CODE=1
fi
if [ "${ISAACGYM_TEST_EXIT_CODE:-0}" -ne 0 ]; then
echo "=== IsaacGym tests failed. Fetching logs... ==="
scp -o ConnectTimeout=10 -o StrictHostKeyChecking=no \
$EC2_USER_NAME@$EC2_INSTANCE_IP:~/$SRC_DIR/pytest-isaacgym.log \
$CODEBUILD_SRC_DIR/ || true
echo "===== IsaacGym pytest log ====="
cat $CODEBUILD_SRC_DIR/pytest-isaacgym.log || true
EXIT_CODE=1
fi
if [ "$EXIT_CODE" -ne 0 ]; then
echo "Tests failed with exit code $EXIT_CODE"
exit 1
else
echo "===All tests passed!==="
fi
########### Postbuild #########
- name: post_build
if: always() # always try to terminate the instance
run: |
echo "Cleaning up resources..."
if [ -n "$INSTANCE_ID" ]; then
echo "Terminating EC2 instance $INSTANCE_ID..."
aws ec2 terminate-instances --instance-ids $INSTANCE_ID --region $REGION || true
fi
- name: Prepare test logs for upload
if: always()
run: |
# Copy test logs from CODEBUILD_SRC_DIR to workspace root for artifact upload
if [ -d "$CODEBUILD_SRC_DIR" ]; then
cp -v $CODEBUILD_SRC_DIR/pytest-*.log . 2>/dev/null || echo "No pytest logs found"
cp -v $CODEBUILD_SRC_DIR/test_exit_codes.txt . 2>/dev/null || echo "No exit codes file found"
else
echo "CODEBUILD_SRC_DIR not set, files should already be in workspace"
fi
- name: Upload test logs as artifacts
if: always()
uses: actions/upload-artifact@v4
with:
name: test-logs
path: |
pytest-*.log
test_exit_codes.txt
if-no-files-found: warn
retention-days: 7
workflow-integrity-check:
runs-on: ubuntu-latest
if: github.event_name == 'pull_request_target'
permissions:
pull-requests: read
steps:
- name: Check for workflow changes
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
PR_NUMBER: ${{ github.event.pull_request.number }}
run: |
set -euo pipefail
echo "Checking if .github/workflows/premerge-ci.yml is modified in PR #$PR_NUMBER..."
CHANGES=$(gh pr diff "$PR_NUMBER" --name-only)
if echo "$CHANGES" | grep -q "^.github/workflows/premerge-ci.yml$"; then
echo "❌ Critical workflow modification detected!"
echo "For security reasons, this workflow file cannot be modified via Pull Request."
echo "Please revert changes to .github/workflows/premerge-ci.yml to pass this check."
exit 1
fi
echo "✅ Workflow integrity verified (file not modified)."