Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ on:
env:
HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache
HOST_LOCALTIME: /usr/share/zoneinfo/Asia/Shanghai
OUTPUT_FOLDER: cuda11.8_dist_${{ github.run_id }}
OUTPUT_FOLDER: cuda12.4_dist_${{ github.run_id }}
REPORT_DIR: /nvme/qa_test_models/benchmark-reports/${{ github.run_id }}
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
FAIL_CONFIG: ${{ github.run_attempt != 1 && '--lf --lfnf none' || '--lf'}}
Expand All @@ -42,7 +42,7 @@ jobs:
env:
PYTHON_VERSION: ${{ matrix.pyver }}
PLAT_NAME: manylinux2014_x86_64
DOCKER_TAG: cuda11.8
DOCKER_TAG: cuda12.4
steps:
- name: Checkout repository
uses: actions/checkout@v3
Expand Down Expand Up @@ -108,7 +108,7 @@ jobs:
- name: Install lmdeploy
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
run: |
python3 -m pip install lmdeploy-*.whl --no-deps
python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
python3 -m pip install -r requirements/test.txt
- name: Install lmdeploy - offline
if: ${{inputs.offline_mode}}
Expand Down
39 changes: 19 additions & 20 deletions .github/workflows/daily_ete_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ on:
env:
HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache
HOST_LOCALTIME: /usr/share/zoneinfo/Asia/Shanghai
OUTPUT_FOLDER: cuda11.8_dist_${{ github.run_id }}
OUTPUT_FOLDER: cuda12.4_dist_${{ github.run_id }}
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
REPORT_DIR: /nvme/qa_test_models/test-reports/${{ github.run_id }}
COV_PARAM: --cov /opt/py3/lib/python3.10/site-packages/lmdeploy
Expand All @@ -64,7 +64,7 @@ jobs:
env:
PYTHON_VERSION: ${{ matrix.pyver }}
PLAT_NAME: manylinux2014_x86_64
DOCKER_TAG: cuda11.8
DOCKER_TAG: cuda12.4
steps:
- name: Checkout repository
uses: actions/checkout@v3
Expand Down Expand Up @@ -96,7 +96,7 @@ jobs:
runs-on: [self-hosted, linux-a100]
timeout-minutes: 50
container:
image: openmmlab/lmdeploy:latest-cu12.8
image: openmmlab/lmdeploy:latest-cu12
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/qa_test_models:/nvme/qa_test_models
Expand Down Expand Up @@ -136,7 +136,7 @@ jobs:
MODELSCOPE_CACHE: /nvme/qa_test_models/modelscope_hub
MODELSCOPE_MODULES_CACHE: /nvme/qa_test_models/modelscope_modules
container:
image: openmmlab/lmdeploy:latest-cu12.8
image: openmmlab/lmdeploy:latest-cu12
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
Expand Down Expand Up @@ -168,7 +168,7 @@ jobs:
rm -rf allure-results
# remove tmp log in testcase
rm -rf /nvme/qa_test_models/autotest_model/log/*
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p && rm autotest/.pytest_cache -f
ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
- name: Test lmdeploy - quantization w4a16
continue-on-error: true
Expand Down Expand Up @@ -219,7 +219,7 @@ jobs:
MODELSCOPE_CACHE: /nvme/qa_test_models/modelscope_hub
MODELSCOPE_MODULES_CACHE: /nvme/qa_test_models/modelscope_modules
container:
image: openmmlab/lmdeploy:latest-cu12.8
image: openmmlab/lmdeploy:latest-cu12
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
Expand Down Expand Up @@ -251,7 +251,7 @@ jobs:
rm -rf allure-results
# remove tmp log in testcase
rm -rf /nvme/qa_test_models/autotest_model/log/*
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p && rm autotest/.pytest_cache -f
ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
- name: Test lmdeploy - chat
continue-on-error: true
Expand Down Expand Up @@ -324,7 +324,7 @@ jobs:
model: Intern-S1
timeout-minutes: 60
container:
image: openmmlab/lmdeploy:latest-cu12.8
image: openmmlab/lmdeploy:latest-cu12
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
Expand Down Expand Up @@ -352,7 +352,7 @@ jobs:
rm -rf allure-results
# remove tmp log in testcase
rm -rf /nvme/qa_test_models/autotest_model/log/*
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p && rm autotest/.pytest_cache -f
ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
- name: Start restful api
if: matrix.model != 'internlm2_5-20b'
Expand Down Expand Up @@ -408,7 +408,7 @@ jobs:
needs: test_quantization
timeout-minutes: 120
container:
image: openmmlab/lmdeploy:latest-cu12.8
image: openmmlab/lmdeploy:latest-cu12
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
Expand Down Expand Up @@ -436,7 +436,7 @@ jobs:
rm -rf allure-results
# remove tmp log in testcase
rm -rf /nvme/qa_test_models/autotest_model/log/*
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p && rm autotest/.pytest_cache -f
ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
- name: Test lmdeploy - interface pipeline case
run: |
Expand Down Expand Up @@ -465,7 +465,7 @@ jobs:
needs: test_quantization
timeout-minutes: 120
container:
image: openmmlab/lmdeploy:latest-cu12.8
image: openmmlab/lmdeploy:latest-cu12
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
Expand Down Expand Up @@ -493,7 +493,7 @@ jobs:
rm -rf allure-results
# remove tmp log in testcase
rm -rf /nvme/qa_test_models/autotest_model/log/*
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p && rm autotest/.pytest_cache -f
ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
- name: Test benchmark script
run: |
Expand All @@ -520,7 +520,7 @@ jobs:
matrix:
evaluate_type: ['chat', 'base']
container:
image: openmmlab/lmdeploy:latest-cu12.8
image: openmmlab/lmdeploy:latest-cu12
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
Expand Down Expand Up @@ -550,8 +550,7 @@ jobs:
run: |
git clone --depth=1 https://github.com/open-compass/opencompass.git
cd opencompass
cp /nvme/qa_test_models/offline_pkg/requirements-oc.txt requirements/runtime.txt
python3 -m pip install -e .
python3 -m pip install .
echo "OPENCOMPASS_DIR=$(pwd)" >> $GITHUB_ENV
- name: Check env
run: |
Expand All @@ -560,7 +559,7 @@ jobs:
rm -rf allure-results
# remove tmp log in testcase
rm -rf /nvme/qa_test_models/autotest_model/log/*
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p && rm autotest/.pytest_cache -f
ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
- name: Setup paths for evaluation
run: |
Expand All @@ -571,7 +570,7 @@ jobs:
run: |
export LMDEPLOY_DIR=$(pwd)

python3 .github/scripts/action_tools.py evaluate "[turbomind_internlm2_5_7b_chat, pytorch_internlm2_5_7b_chat, turbomind_internlm2_5_7b_chat_batch1, turbomind_internlm2_5_7b_chat_batch1_4bits, turbomind_internlm3_8b_instruct, pytorch_internlm3_8b_instruct, turbomind_internlm2_5_20b_chat, pytorch_internlm2_5_20b_chat, turbomind_qwen1_5_7b_chat, pytorch_qwen1_5_7b_chat, turbomind_llama3_8b_instruct, pytorch_llama3_8b_instruct, turbomind_llama3_1_8b_instruct, pytorch_llama3_1_8b_instruct, turbomind_qwen2_7b_instruct, pytorch_qwen2_7b_instruct, turbomind_qwen2_5_7b_instruct, pytorch_qwen2_5_7b_instruct, turbomind_llama2_7b_chat, pytorch_qwen1_5_moe_2_7b_chat, pytorch_gemma_2_9b_it, pytorch_gemma_2_27b_it]" "[*race_datasets, *gsm8k_datasets, *ifeval_datasets]" /root/evaluation-reports/${{ github.run_id }} chat true
python3 .github/scripts/action_tools.py evaluate "[turbomind_internlm2_5_7b_chat, pytorch_internlm2_5_7b_chat, turbomind_internlm2_5_7b_chat_batch1, turbomind_internlm2_5_7b_chat_batch1_4bits, turbomind_internlm3_8b_instruct, pytorch_internlm3_8b_instruct, turbomind_internlm2_5_20b_chat, pytorch_internlm2_5_20b_chat, turbomind_qwen1_5_7b_chat, pytorch_qwen1_5_7b_chat, turbomind_llama3_8b_instruct, pytorch_llama3_8b_instruct, turbomind_llama3_1_8b_instruct, pytorch_llama3_1_8b_instruct, turbomind_qwen2_5_7b_instruct, pytorch_qwen2_5_7b_instruct, turbomind_llama2_7b_chat, pytorch_qwen1_5_moe_2_7b_chat, pytorch_gemma_2_9b_it, pytorch_gemma_2_27b_it]" "[*race_datasets, *gsm8k_datasets, *ifeval_datasets]" /root/evaluation-reports/${{ github.run_id }} chat true
- name: Evaluate base models
if: matrix.evaluate_type == 'base'
run: |
Expand All @@ -594,7 +593,7 @@ jobs:
timeout-minutes: 5
runs-on: [self-hosted, linux-a100]
container:
image: openmmlab/lmdeploy:latest-cu12.8
image: openmmlab/lmdeploy:latest-cu12
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/qa_test_models:/nvme/qa_test_models
Expand All @@ -619,7 +618,7 @@ jobs:
needs: [test_tools, test_restful, test_pipeline, test_benchmark]
timeout-minutes: 5
container:
image: openmmlab/lmdeploy:latest-cu12.8
image: openmmlab/lmdeploy:latest-cu12
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
Expand Down
14 changes: 7 additions & 7 deletions .github/workflows/daily_ete_test_3090.yml
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ jobs:
python3 -m pip install -r requirements/lite.txt
- name: Install lmdeploy
run: |
python3 -m pip install lmdeploy-*.whl --no-deps
python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
python3 -m pip install transformers==4.53.1 datasets==3.6.0 timm
python3 -m pip install -r requirements/test.txt
- name: Check env
Expand All @@ -163,7 +163,7 @@ jobs:
rm -rf allure-results
# remove tmp log in testcase
rm -rf /nvme/qa_test_models/autotest_model/log/*
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p && rm autotest/.pytest_cache -f
ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
- name: Test lmdeploy - quantization w4a16
continue-on-error: true
Expand Down Expand Up @@ -226,7 +226,7 @@ jobs:
python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}}
- name: Install lmdeploy
run: |
python3 -m pip install lmdeploy-*.whl --no-deps
python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
python3 -m pip install -r requirements/test.txt
- name: Check env
run: |
Expand All @@ -235,7 +235,7 @@ jobs:
rm -rf allure-results
# remove tmp log in testcase
rm -rf /nvme/qa_test_models/autotest_model/log/*
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p && rm autotest/.pytest_cache -f
ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
- name: Test lmdeploy - chat
continue-on-error: true
Expand Down Expand Up @@ -290,7 +290,7 @@ jobs:
python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}}
- name: Install lmdeploy
run: |
python3 -m pip install lmdeploy-*.whl --no-deps
python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
python3 -m pip install -r requirements/test.txt
- name: Check env
run: |
Expand All @@ -299,7 +299,7 @@ jobs:
rm -rf allure-results
# remove tmp log in testcase
rm -rf /nvme/qa_test_models/autotest_model/log/*
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p && rm autotest/.pytest_cache -f
ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
- name: Start restful api turbomind
if: matrix.backend == 'turbomind'
Expand Down Expand Up @@ -370,7 +370,7 @@ jobs:
run: cp -r ${{env.TEST_CODE_PATH}}/. .
- name: Install lmdeploy
run: |
python3 -m pip install lmdeploy-*.whl --no-deps
python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
python3 -m pip install -r requirements/test.txt
- name: Get coverage report
run: |
Expand Down
24 changes: 12 additions & 12 deletions .github/workflows/daily_ete_test_5080.yml
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ jobs:
download_pkgs:
needs: linux-build
if: ${{!cancelled()}}
runs-on: [self-hosted, 5090-r1]
runs-on: [self-hosted, 5080-r1]
timeout-minutes: 50
container:
image: openmmlab/lmdeploy:latest-cu12.8
Expand Down Expand Up @@ -129,7 +129,7 @@ jobs:
test_quantization:
needs: download_pkgs
if: ${{!cancelled() && contains(needs.download_pkgs.result, 'success') && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'quant') )}}
runs-on: [self-hosted, 5090-r1]
runs-on: [self-hosted, 5080-r1]
timeout-minutes: 150
env:
PYTHONPATH: /nvme/qa_test_models/offline_pkg/LLaVA
Expand All @@ -153,7 +153,7 @@ jobs:
python3 -m pip install -r requirements/lite.txt
- name: Install lmdeploy
run: |
python3 -m pip install lmdeploy-*.whl --no-deps
python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
python3 -m pip install transformers==4.53.1 datasets==3.6.0 timm
python3 -m pip install -r requirements/test.txt
- name: Check env
Expand All @@ -163,7 +163,7 @@ jobs:
rm -rf allure-results
# remove tmp log in testcase
rm -rf /nvme/qa_test_models/autotest_model/log/*
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p && rm autotest/.pytest_cache -f
ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
- name: Test lmdeploy - quantization w4a16
continue-on-error: true
Expand All @@ -188,7 +188,7 @@ jobs:
chmod -R 777 $workdir
test_tools:
if: ${{!cancelled() && !contains(needs.test_quantization.result, 'fail') && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'tools'))}}
runs-on: [self-hosted, 5090-r1]
runs-on: [self-hosted, 5080-r1]
needs: test_quantization
timeout-minutes: 300
strategy:
Expand Down Expand Up @@ -225,7 +225,7 @@ jobs:
python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}}
- name: Install lmdeploy
run: |
python3 -m pip install lmdeploy-*.whl --no-deps
python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
python3 -m pip install -r requirements/test.txt
- name: Check env
run: |
Expand All @@ -234,7 +234,7 @@ jobs:
rm -rf allure-results
# remove tmp log in testcase
rm -rf /nvme/qa_test_models/autotest_model/log/*
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p && rm autotest/.pytest_cache -f
ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
- name: Test lmdeploy - chat
continue-on-error: true
Expand Down Expand Up @@ -265,7 +265,7 @@ jobs:
chmod -R 777 $workdir
test_restful:
if: ${{!cancelled() && !contains(needs.test_quantization.result, 'fail') && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'restful'))}}
runs-on: [self-hosted, 5090-r1]
runs-on: [self-hosted, 5080-r1]
needs: test_quantization
strategy:
fail-fast: false
Expand All @@ -289,7 +289,7 @@ jobs:
python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}}
- name: Install lmdeploy
run: |
python3 -m pip install lmdeploy-*.whl --no-deps
python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
python3 -m pip install -r requirements/test.txt
- name: Check env
run: |
Expand All @@ -298,7 +298,7 @@ jobs:
rm -rf allure-results
# remove tmp log in testcase
rm -rf /nvme/qa_test_models/autotest_model/log/*
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p && rm autotest/.pytest_cache -f
ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
- name: Start restful api turbomind
if: matrix.backend == 'turbomind'
Expand Down Expand Up @@ -353,7 +353,7 @@ jobs:
chmod -R 777 $workdir
get_coverage_report:
if: ${{!cancelled() && success()}}
runs-on: [self-hosted, 5090-r1]
runs-on: [self-hosted, 5080-r1]
needs: [test_tools, test_restful]
timeout-minutes: 5
container:
Expand All @@ -368,7 +368,7 @@ jobs:
run: cp -r ${{env.TEST_CODE_PATH}}/. .
- name: Install lmdeploy
run: |
python3 -m pip install lmdeploy-*.whl --no-deps
python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
python3 -m pip install -r requirements/test.txt
- name: Get coverage report
run: |
Expand Down
Loading