From 6d41fe8b5b1b7916b2cda74579016fbfad8df052 Mon Sep 17 00:00:00 2001 From: Xiaodong Ye Date: Mon, 12 Jan 2026 14:12:49 +0800 Subject: [PATCH] [MUSA] Add shell script to generate requirements-musa.txt and update doc Signed-off-by: Xiaodong Ye --- .gitignore | 1 + .../source/getting_started/installation.rst | 12 +- .../source/getting_started/installation.rst | 22 ++-- generate_requirements_musa.sh | 105 ++++++++++++++++++ 4 files changed, 127 insertions(+), 13 deletions(-) create mode 100755 generate_requirements_musa.sh diff --git a/.gitignore b/.gitignore index 6049c2cdbe..63408699f4 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ dist .idea .vscode tmp/ +requirements-musa.txt diff --git a/docs/CN/source/getting_started/installation.rst b/docs/CN/source/getting_started/installation.rst index fb998b7567..5fa0e304d2 100755 --- a/docs/CN/source/getting_started/installation.rst +++ b/docs/CN/source/getting_started/installation.rst @@ -27,7 +27,7 @@ Lightllm 是一个纯python开发的推理框架,其中的算子使用triton $ # 前请确保你的docker设置中已经分配了足够的共享内存,否则可能导致 $ # 服务无法正常启动。 $ # 1.如果是纯文本服务,建议分配2GB以上的共享内存, 如果你的内存充足,建议分配16GB以上的共享内存. - $ # 2.如果是多模态服务,建议分配16GB以上的共享内存,具体可以根据实际情况进行调整. + $ # 2.如果是多模态服务,建议分配16GB以上的共享内存,具体可以根据实际情况进行调整. $ # 如果你没有足够的共享内存,可以尝试在启动服务的时候调低 --running_max_req_size 参数,这会降低 $ # 服务的并发请求数量,但可以减少共享内存的占用。如果是多模态服务,也可以通过降低 --cache_capacity $ # 参数来减少共享内存的占用。 @@ -38,7 +38,7 @@ Lightllm 是一个纯python开发的推理框架,其中的算子使用triton 你也可以使用源码手动构建镜像并运行,建议手动构建镜像,因为更新比较频繁: .. code-block:: console - + $ # 进入代码仓库的根目录 $ cd /lightllm $ # 手动构建镜像, docker 目录下有不同功能场景的镜像构建文件,按需构建。 @@ -52,7 +52,7 @@ Lightllm 是一个纯python开发的推理框架,其中的算子使用triton 或者你也可以直接使用脚本一键启动镜像并且运行: .. code-block:: console - + $ # 查看脚本参数 $ python tools/quick_launch_docker.py --help @@ -80,6 +80,10 @@ Lightllm 是一个纯python开发的推理框架,其中的算子使用triton $ # 安装lightllm的依赖 (cuda 12.4) $ pip install -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cu124 $ + $ # 安装lightllm的依赖 (摩尔线程 GPU) + $ ./generate_requirements_musa.sh + $ pip install -r requirements-musa.txt + $ $ # 安装lightllm $ python setup.py install @@ -97,6 +101,6 @@ Lightllm 是一个纯python开发的推理框架,其中的算子使用triton .. code-block:: console $ pip install -U --index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/Triton-Nightly/pypi/simple/ triton-nightly --no-deps - + 具体原因可以参考:`issue `_ 和 `fix PR `_ diff --git a/docs/EN/source/getting_started/installation.rst b/docs/EN/source/getting_started/installation.rst index 75fa714764..6439c48de3 100755 --- a/docs/EN/source/getting_started/installation.rst +++ b/docs/EN/source/getting_started/installation.rst @@ -24,16 +24,16 @@ The easiest way to install Lightllm is using the official image. You can directl $ docker pull ghcr.io/modeltc/lightllm:main $ $ # Run,The current LightLLM service relies heavily on shared memory. - $ # Before starting, please make sure that you have allocated enough shared memory + $ # Before starting, please make sure that you have allocated enough shared memory $ # in your Docker settings; otherwise, the service may fail to start properly. $ # - $ # 1. For text-only services, it is recommended to allocate more than 2GB of shared memory. + $ # 1. For text-only services, it is recommended to allocate more than 2GB of shared memory. $ # If your system has sufficient RAM, allocating 16GB or more is recommended. - $ # 2.For multimodal services, it is recommended to allocate 16GB or more of shared memory. + $ # 2.For multimodal services, it is recommended to allocate 16GB or more of shared memory. $ # You can adjust this value according to your specific requirements. $ # - $ # If you do not have enough shared memory available, you can try lowering - $ # the --running_max_req_size parameter when starting the service. + $ # If you do not have enough shared memory available, you can try lowering + $ # the --running_max_req_size parameter when starting the service. $ # This will reduce the number of concurrent requests, but also decrease shared memory usage. $ docker run -it --gpus all -p 8080:8080 \ $ --shm-size 2g -v your_local_path:/data/ \ @@ -42,13 +42,13 @@ The easiest way to install Lightllm is using the official image. You can directl You can also manually build the image from source and run it: .. code-block:: console - + $ # move into lightllm root dir $ cd /lightllm $ # Manually build the image $ docker build -t -f ./docker/Dockerfile . $ - $ # Run, + $ # Run, $ docker run -it --gpus all -p 8080:8080 \ $ --shm-size 2g -v your_local_path:/data/ \ $ /bin/bash @@ -56,7 +56,7 @@ You can also manually build the image from source and run it: Or you can directly use the script to launch the image and run it with one click: .. code-block:: console - + $ # View script parameters $ python tools/quick_launch_docker.py --help @@ -84,6 +84,10 @@ You can also install Lightllm from source: $ # Install Lightllm dependencies (cuda 12.4) $ pip install -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cu124 $ + $ # Install Lightllm dependencies (Moore Threads GPU) + $ ./generate_requirements_musa.sh + $ pip install -r requirements-musa.txt + $ $ # Install Lightllm $ python setup.py install @@ -101,5 +105,5 @@ You can also install Lightllm from source: .. code-block:: console $ pip install -U --index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/Triton-Nightly/pypi/simple/ triton-nightly --no-deps - + For specific reasons, please refer to: `issue `_ and `fix PR `_ \ No newline at end of file diff --git a/generate_requirements_musa.sh b/generate_requirements_musa.sh new file mode 100755 index 0000000000..f5bfb8ff83 --- /dev/null +++ b/generate_requirements_musa.sh @@ -0,0 +1,105 @@ +#!/bin/bash +# Script to generate requirements-musa.txt from requirements.txt +# MUSA is not compatible with CUDA packages, so they need to be removed +# Torch-related packages are pre-installed in the MUSA docker container + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +INPUT_FILE="${SCRIPT_DIR}/requirements.txt" +OUTPUT_FILE="${SCRIPT_DIR}/requirements-musa.txt" + +if [ ! -f "$INPUT_FILE" ]; then + echo "Error: requirements.txt not found at $INPUT_FILE" + exit 1 +fi + +echo "Generating requirements-musa.txt from requirements.txt..." + +# Define patterns to remove (CUDA-specific packages) +# These packages are not compatible with MUSA +CUDA_PACKAGES=( + "^cupy" # cupy-cuda12x and similar + "^cuda_bindings" # CUDA bindings + "^nixl" # NIXL (NVIDIA Inter-node eXchange Library) + "^flashinfer" # flashinfer-python (CUDA-specific attention kernel) + "^sgl-kernel" # SGL kernel (CUDA-specific) +) + +# Define torch-related packages (pre-installed in MUSA container, remove version pins) +TORCH_PACKAGES=( + "^torch==" + "^torchvision==" +) + +# Create the output file with a header comment +cat > "$OUTPUT_FILE" << 'EOF' +# Requirements for MUSA (Moore Threads GPU) +# Auto-generated from requirements.txt by generate_requirements_musa.sh +# CUDA-specific packages have been removed +# Torch-related packages have version pins removed (pre-installed in MUSA container) + +EOF + +# Process the requirements file +while IFS= read -r line || [ -n "$line" ]; do + # Skip empty lines and comments (but keep them in output) + if [[ -z "$line" || "$line" =~ ^[[:space:]]*# ]]; then + echo "$line" >> "$OUTPUT_FILE" + continue + fi + + # Extract package name (before ==, >=, <=, ~=, etc.) + pkg_name=$(echo "$line" | sed -E 's/^([a-zA-Z0-9_-]+).*/\1/') + + # Check if this is a CUDA package to skip + skip=false + for pattern in "${CUDA_PACKAGES[@]}"; do + if [[ "$pkg_name" =~ $pattern ]]; then + echo " Removing CUDA package: $line" + skip=true + break + fi + done + + if $skip; then + continue + fi + + # Check if this is a torch-related package (remove version pin) + for pattern in "${TORCH_PACKAGES[@]}"; do + if [[ "$line" =~ $pattern ]]; then + # Remove version pin, keep just the package name + pkg_only=$(echo "$line" | sed -E 's/==.*//') + echo " Unpinning version for: $pkg_only (pre-installed in MUSA container)" + echo "$pkg_only" >> "$OUTPUT_FILE" + skip=true + break + fi + done + + if $skip; then + continue + fi + + # Keep the package as-is + echo "$line" >> "$OUTPUT_FILE" + +done < "$INPUT_FILE" + +# Add MUSA-specific packages at the end +cat >> "$OUTPUT_FILE" << 'EOF' + +# MUSA-specific packages +torch_musa +torchada +EOF + +echo "" +echo "Successfully generated: $OUTPUT_FILE" +echo "" +echo "Summary of changes:" +echo " - Removed CUDA-specific packages: cupy-cuda12x, cuda_bindings, nixl, flashinfer-python, sgl-kernel" +echo " - Unpinned torch-related packages: torch, torchvision (pre-installed in MUSA container)" +echo " - Added MUSA-specific packages: torch_musa, torchada" +