Skip to content

Commit 2024c0b

Browse files
committed
create new jobs, migrate tests
1 parent e1153cb commit 2024c0b

File tree

5 files changed

+192
-107
lines changed

5 files changed

+192
-107
lines changed

.github/workflows/pr-test-amd.yml

Lines changed: 184 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -676,67 +676,12 @@ jobs:
676676
run: |
677677
bash scripts/ci/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-c-test-large-8-gpu-amd-mi35x --auto-partition-id ${{ matrix.part }} --auto-partition-size 3 --timeout-per-file 3600
678678
679-
performance-test-1-gpu-part-1-amd:
679+
stage-b-test-large-1-gpu-performance-amd:
680680
needs: [check-changes, stage-a-test-1-amd]
681681
if: |
682682
always() &&
683683
(
684-
(inputs.target_stage == 'performance-test-1-gpu-part-1-amd') ||
685-
(
686-
!inputs.target_stage &&
687-
(!failure() && !cancelled()) &&
688-
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
689-
)
690-
)
691-
strategy:
692-
fail-fast: false
693-
matrix:
694-
runner: [linux-mi325-gpu-1]
695-
runs-on: ${{matrix.runner}}
696-
steps:
697-
- name: Checkout code
698-
uses: actions/checkout@v4
699-
with:
700-
ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
701-
702-
- name: Ensure VRAM is clear
703-
run: bash scripts/ensure_vram_clear.sh rocm
704-
705-
- name: Start CI container
706-
run: bash scripts/ci/amd_ci_start_container.sh
707-
env:
708-
GITHUB_WORKSPACE: ${{ github.workspace }}
709-
710-
- name: Install dependencies
711-
run: bash scripts/ci/amd_ci_install_dependency.sh
712-
713-
- name: Benchmark single latency
714-
timeout-minutes: 20
715-
run: |
716-
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test/registered/perf python3 -m unittest test_bench_one_batch_1gpu.TestBenchOneBatch1GPU.test_bs1_small
717-
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test/registered/perf python3 -m unittest test_bench_one_batch_1gpu.TestBenchOneBatch1GPU.test_bs1_default
718-
719-
- name: Benchmark online latency
720-
timeout-minutes: 15
721-
run: |
722-
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test/registered/perf python3 -m unittest test_bench_serving_1gpu_part1.TestBenchServing1GPUPart1.test_online_latency_default
723-
724-
- name: Benchmark offline throughput
725-
timeout-minutes: 15
726-
run: |
727-
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test/registered/perf python3 -m unittest test_bench_serving_1gpu_part1.TestBenchServing1GPUPart1.test_offline_throughput_default
728-
729-
- name: Benchmark offline throughput (Non-streaming, small batch size)
730-
timeout-minutes: 15
731-
run: |
732-
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test/registered/perf python3 -m unittest test_bench_serving_1gpu_part1.TestBenchServing1GPUPart1.test_offline_throughput_non_stream_small_batch_size
733-
734-
performance-test-1-gpu-part-2-amd:
735-
needs: [check-changes, stage-a-test-1-amd]
736-
if: |
737-
always() &&
738-
(
739-
(inputs.target_stage == 'performance-test-1-gpu-part-2-amd') ||
684+
(inputs.target_stage == 'stage-b-test-large-1-gpu-performance-amd') ||
740685
(
741686
!inputs.target_stage &&
742687
(!failure() && !cancelled()) &&
@@ -747,6 +692,7 @@ jobs:
747692
fail-fast: false
748693
matrix:
749694
runner: [linux-mi325-gpu-1]
695+
part: [0, 1]
750696
runs-on: ${{matrix.runner}}
751697
steps:
752698
- name: Checkout code
@@ -765,27 +711,17 @@ jobs:
765711
- name: Install dependencies
766712
run: bash scripts/ci/amd_ci_install_dependency.sh
767713

768-
- name: Benchmark offline throughput (w/o RadixAttention)
769-
timeout-minutes: 15
770-
run: |
771-
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test/registered/perf python3 -m unittest test_bench_serving_1gpu_part1.TestBenchServing1GPUPart1.test_offline_throughput_without_radix_cache
772-
773-
- name: Benchmark offline throughput (w/ Triton)
774-
timeout-minutes: 15
775-
run: |
776-
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test/registered/perf python3 -m unittest test_bench_serving_1gpu_part1.TestBenchServing1GPUPart1.test_offline_throughput_with_triton_attention_backend
777-
778-
- name: Benchmark offline throughput (w/ FP8)
779-
timeout-minutes: 15
714+
- name: Run test
715+
timeout-minutes: 30
780716
run: |
781-
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test/registered/perf python3 -m unittest test_bench_serving_1gpu_large.TestBenchServing1GPULarge.test_offline_throughput_default_fp8
717+
bash scripts/ci/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-b-test-large-1-gpu-performance-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 --timeout-per-file 1800
782718
783-
performance-test-2-gpu-amd:
719+
stage-b-test-large-2-gpu-performance-amd:
784720
needs: [check-changes, stage-a-test-1-amd]
785721
if: |
786722
always() &&
787723
(
788-
(inputs.target_stage == 'performance-test-2-gpu-amd') ||
724+
(inputs.target_stage == 'stage-b-test-large-2-gpu-performance-amd') ||
789725
(
790726
!inputs.target_stage &&
791727
(!failure() && !cancelled()) &&
@@ -796,6 +732,7 @@ jobs:
796732
fail-fast: false
797733
matrix:
798734
runner: [linux-mi325-gpu-2]
735+
part: [0, 1]
799736
runs-on: ${{matrix.runner}}
800737
steps:
801738
- name: Checkout code
@@ -814,40 +751,183 @@ jobs:
814751
- name: Install dependencies
815752
run: bash scripts/ci/amd_ci_install_dependency.sh
816753

817-
- name: Benchmark dummy grok (TP=2)
754+
- name: Run test
818755
timeout-minutes: 30
819756
run: |
820-
bash scripts/ci/amd_ci_exec.sh python3 models/test_dummy_grok_models.py
821-
822-
- name: Benchmark single latency (TP=2)
823-
timeout-minutes: 25
824-
run: |
825-
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test/registered/perf python3 -m unittest test_bench_one_batch_2gpu.TestBenchOneBatch2GPU.test_moe_tp2_bs1
826-
827-
- name: Benchmark single latency + torch.compile (TP=2)
828-
timeout-minutes: 25
829-
run: |
830-
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test/registered/perf python3 -m unittest test_bench_one_batch_2gpu.TestBenchOneBatch2GPU.test_torch_compile_tp2_bs1
831-
832-
- name: Benchmark offline throughput (TP=2)
833-
timeout-minutes: 25
834-
run: |
835-
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test/registered/perf python3 -m unittest test_bench_serving_2gpu.TestBenchServing2GPU.test_moe_offline_throughput_default
836-
837-
- name: Benchmark offline throughput (w/o RadixAttention) (TP=2)
838-
timeout-minutes: 25
839-
run: |
840-
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test/registered/perf python3 -m unittest test_bench_serving_2gpu.TestBenchServing2GPU.test_moe_offline_throughput_without_radix_cache
841-
842-
- name: Benchmark offline PP decode throughput (PP=2)
843-
timeout-minutes: 10
844-
run: |
845-
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test/registered/perf python3 -m unittest test_bench_serving_2gpu.TestBenchServing2GPU.test_pp_offline_throughput_default_decode
846-
847-
- name: Benchmark offline PP prefill throughput (PP=2)
848-
timeout-minutes: 10
849-
run: |
850-
bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test/registered/perf python3 -m unittest test_bench_serving_2gpu.TestBenchServing2GPU.test_pp_long_context_prefill
757+
bash scripts/ci/amd_ci_exec.sh -w "/sglang-checkout/test" python3 run_suite.py --hw amd --suite stage-b-test-large-2-gpu-performance-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 --timeout-per-file 1800
758+
759+
# performance-test-1-gpu-part-1-amd:
760+
# needs: [check-changes, stage-a-test-1-amd]
761+
# if: |
762+
# always() &&
763+
# (
764+
# (inputs.target_stage == 'performance-test-1-gpu-part-1-amd') ||
765+
# (
766+
# !inputs.target_stage &&
767+
# (!failure() && !cancelled()) &&
768+
# ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
769+
# )
770+
# )
771+
# strategy:
772+
# fail-fast: false
773+
# matrix:
774+
# runner: [linux-mi325-gpu-1]
775+
# runs-on: ${{matrix.runner}}
776+
# steps:
777+
# - name: Checkout code
778+
# uses: actions/checkout@v4
779+
# with:
780+
# ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
781+
782+
# - name: Ensure VRAM is clear
783+
# run: bash scripts/ensure_vram_clear.sh rocm
784+
785+
# - name: Start CI container
786+
# run: bash scripts/ci/amd_ci_start_container.sh
787+
# env:
788+
# GITHUB_WORKSPACE: ${{ github.workspace }}
789+
790+
# - name: Install dependencies
791+
# run: bash scripts/ci/amd_ci_install_dependency.sh
792+
793+
# - name: Benchmark single latency
794+
# timeout-minutes: 20
795+
# run: |
796+
# bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test/registered/perf python3 -m unittest test_bench_one_batch_1gpu.TestBenchOneBatch1GPU.test_bs1_small
797+
# bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test/registered/perf python3 -m unittest test_bench_one_batch_1gpu.TestBenchOneBatch1GPU.test_bs1_default
798+
799+
# - name: Benchmark online latency
800+
# timeout-minutes: 15
801+
# run: |
802+
# bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test/registered/perf python3 -m unittest test_bench_serving_1gpu_part1.TestBenchServing1GPUPart1.test_online_latency_default
803+
804+
# - name: Benchmark offline throughput
805+
# timeout-minutes: 15
806+
# run: |
807+
# bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test/registered/perf python3 -m unittest test_bench_serving_1gpu_part1.TestBenchServing1GPUPart1.test_offline_throughput_default
808+
809+
# - name: Benchmark offline throughput (Non-streaming, small batch size)
810+
# timeout-minutes: 15
811+
# run: |
812+
# bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test/registered/perf python3 -m unittest test_bench_serving_1gpu_part1.TestBenchServing1GPUPart1.test_offline_throughput_non_stream_small_batch_size
813+
814+
# performance-test-1-gpu-part-2-amd:
815+
# needs: [check-changes, stage-a-test-1-amd]
816+
# if: |
817+
# always() &&
818+
# (
819+
# (inputs.target_stage == 'performance-test-1-gpu-part-2-amd') ||
820+
# (
821+
# !inputs.target_stage &&
822+
# (!failure() && !cancelled()) &&
823+
# ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
824+
# )
825+
# )
826+
# strategy:
827+
# fail-fast: false
828+
# matrix:
829+
# runner: [linux-mi325-gpu-1]
830+
# runs-on: ${{matrix.runner}}
831+
# steps:
832+
# - name: Checkout code
833+
# uses: actions/checkout@v4
834+
# with:
835+
# ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
836+
837+
# - name: Ensure VRAM is clear
838+
# run: bash scripts/ensure_vram_clear.sh rocm
839+
840+
# - name: Start CI container
841+
# run: bash scripts/ci/amd_ci_start_container.sh
842+
# env:
843+
# GITHUB_WORKSPACE: ${{ github.workspace }}
844+
845+
# - name: Install dependencies
846+
# run: bash scripts/ci/amd_ci_install_dependency.sh
847+
848+
# - name: Benchmark offline throughput (w/o RadixAttention)
849+
# timeout-minutes: 15
850+
# run: |
851+
# bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test/registered/perf python3 -m unittest test_bench_serving_1gpu_part1.TestBenchServing1GPUPart1.test_offline_throughput_without_radix_cache
852+
853+
# - name: Benchmark offline throughput (w/ Triton)
854+
# timeout-minutes: 15
855+
# run: |
856+
# bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test/registered/perf python3 -m unittest test_bench_serving_1gpu_part1.TestBenchServing1GPUPart1.test_offline_throughput_with_triton_attention_backend
857+
858+
# - name: Benchmark offline throughput (w/ FP8)
859+
# timeout-minutes: 15
860+
# run: |
861+
# bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test/registered/perf python3 -m unittest test_bench_serving_1gpu_large.TestBenchServing1GPULarge.test_offline_throughput_default_fp8
862+
863+
# performance-test-2-gpu-amd:
864+
# needs: [check-changes, stage-a-test-1-amd]
865+
# if: |
866+
# always() &&
867+
# (
868+
# (inputs.target_stage == 'performance-test-2-gpu-amd') ||
869+
# (
870+
# !inputs.target_stage &&
871+
# (!failure() && !cancelled()) &&
872+
# ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
873+
# )
874+
# )
875+
# strategy:
876+
# fail-fast: false
877+
# matrix:
878+
# runner: [linux-mi325-gpu-2]
879+
# runs-on: ${{matrix.runner}}
880+
# steps:
881+
# - name: Checkout code
882+
# uses: actions/checkout@v4
883+
# with:
884+
# ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }}
885+
886+
# - name: Ensure VRAM is clear
887+
# run: bash scripts/ensure_vram_clear.sh rocm
888+
889+
# - name: Start CI container
890+
# run: bash scripts/ci/amd_ci_start_container.sh
891+
# env:
892+
# GITHUB_WORKSPACE: ${{ github.workspace }}
893+
894+
# - name: Install dependencies
895+
# run: bash scripts/ci/amd_ci_install_dependency.sh
896+
897+
# - name: Benchmark dummy grok (TP=2)
898+
# timeout-minutes: 30
899+
# run: |
900+
# bash scripts/ci/amd_ci_exec.sh python3 models/test_dummy_grok_models.py
901+
902+
# - name: Benchmark single latency (TP=2)
903+
# timeout-minutes: 25
904+
# run: |
905+
# bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test/registered/perf python3 -m unittest test_bench_one_batch_2gpu.TestBenchOneBatch2GPU.test_moe_tp2_bs1
906+
907+
# - name: Benchmark single latency + torch.compile (TP=2)
908+
# timeout-minutes: 25
909+
# run: |
910+
# bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test/registered/perf python3 -m unittest test_bench_one_batch_2gpu.TestBenchOneBatch2GPU.test_torch_compile_tp2_bs1
911+
912+
# - name: Benchmark offline throughput (TP=2)
913+
# timeout-minutes: 25
914+
# run: |
915+
# bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test/registered/perf python3 -m unittest test_bench_serving_2gpu.TestBenchServing2GPU.test_moe_offline_throughput_default
916+
917+
# - name: Benchmark offline throughput (w/o RadixAttention) (TP=2)
918+
# timeout-minutes: 25
919+
# run: |
920+
# bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test/registered/perf python3 -m unittest test_bench_serving_2gpu.TestBenchServing2GPU.test_moe_offline_throughput_without_radix_cache
921+
922+
# - name: Benchmark offline PP decode throughput (PP=2)
923+
# timeout-minutes: 10
924+
# run: |
925+
# bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test/registered/perf python3 -m unittest test_bench_serving_2gpu.TestBenchServing2GPU.test_pp_offline_throughput_default_decode
926+
927+
# - name: Benchmark offline PP prefill throughput (PP=2)
928+
# timeout-minutes: 10
929+
# run: |
930+
# bash scripts/ci/amd_ci_exec.sh -w /sglang-checkout/test/registered/perf python3 -m unittest test_bench_serving_2gpu.TestBenchServing2GPU.test_pp_long_context_prefill
851931

852932
accuracy-test-1-gpu-amd:
853933
needs: [check-changes, stage-a-test-1-amd]

test/registered/perf/test_bench_serving_1gpu_large.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
import unittest
66

7-
from sglang.test.ci.ci_register import register_cuda_ci
7+
from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci
88
from sglang.test.test_utils import (
99
DEFAULT_DRAFT_MODEL_EAGLE,
1010
DEFAULT_MODEL_NAME_FOR_TEST_FP8,
@@ -17,6 +17,7 @@
1717
)
1818

1919
register_cuda_ci(est_time=300, suite="stage-b-test-large-1-gpu-performance")
20+
register_amd_ci(est_time=300, suite="stage-b-test-large-1-gpu-performance-amd")
2021

2122

2223
class TestBenchServing1GPULarge(CustomTestCase):

test/registered/perf/test_bench_serving_1gpu_part1.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
import requests
1111

12-
from sglang.test.ci.ci_register import register_cuda_ci
12+
from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci
1313
from sglang.test.test_utils import (
1414
DEFAULT_MODEL_NAME_FOR_TEST,
1515
CustomTestCase,
@@ -20,6 +20,7 @@
2020
)
2121

2222
register_cuda_ci(est_time=1000, suite="stage-b-test-large-1-gpu-performance")
23+
register_amd_ci(est_time=1000, suite="stage-b-test-large-1-gpu-performance-amd")
2324

2425

2526
class TestBenchServing1GPUPart1(CustomTestCase):

test/registered/perf/test_bench_serving_2gpu.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
import unittest
66

7-
from sglang.test.ci.ci_register import register_cuda_ci
7+
from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci
88
from sglang.test.test_utils import (
99
DEFAULT_MOE_MODEL_NAME_FOR_TEST,
1010
CustomTestCase,
@@ -15,6 +15,7 @@
1515
)
1616

1717
register_cuda_ci(est_time=600, suite="stage-b-test-large-2-gpu-performance")
18+
register_amd_ci(est_time=600, suite="stage-b-test-large-2-gpu-performance-amd")
1819

1920

2021
class TestBenchServing2GPU(CustomTestCase):

test/run_suite.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
"stage-b-test-small-1-gpu-amd",
2424
"stage-b-test-small-1-gpu-amd-mi35x",
2525
"stage-b-test-large-2-gpu-amd",
26+
"stage-b-test-large-1-gpu-performance-amd",
27+
"stage-b-test-large-2-gpu-performance-amd",
2628
"stage-c-test-large-8-gpu-amd-mi35x",
2729
],
2830
HWBackend.CUDA: [

0 commit comments

Comments
 (0)