From 6d41fe8b5b1b7916b2cda74579016fbfad8df052 Mon Sep 17 00:00:00 2001
From: Xiaodong Ye <xiaodong.ye@mthreads.com>
Date: Mon, 12 Jan 2026 14:12:49 +0800
Subject: [PATCH] [MUSA] Add shell script to generate requirements-musa.txt and
 update doc

Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com>
---
 .gitignore                                    |   1 +
 .../source/getting_started/installation.rst   |  12 +-
 .../source/getting_started/installation.rst   |  22 ++--
 generate_requirements_musa.sh                 | 105 ++++++++++++++++++
 4 files changed, 127 insertions(+), 13 deletions(-)
 create mode 100755 generate_requirements_musa.sh

diff --git a/.gitignore b/.gitignore
index 6049c2cdbe..63408699f4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,3 +6,4 @@ dist
 .idea
 .vscode
 tmp/
+requirements-musa.txt
diff --git a/docs/CN/source/getting_started/installation.rst b/docs/CN/source/getting_started/installation.rst
index fb998b7567..5fa0e304d2 100755
--- a/docs/CN/source/getting_started/installation.rst
+++ b/docs/CN/source/getting_started/installation.rst
@@ -27,7 +27,7 @@ Lightllm 是一个纯python开发的推理框架，其中的算子使用triton
     $ # 前请确保你的docker设置中已经分配了足够的共享内存，否则可能导致
     $ # 服务无法正常启动。
     $ # 1.如果是纯文本服务，建议分配2GB以上的共享内存, 如果你的内存充足，建议分配16GB以上的共享内存.
-    $ # 2.如果是多模态服务，建议分配16GB以上的共享内存，具体可以根据实际情况进行调整. 
+    $ # 2.如果是多模态服务，建议分配16GB以上的共享内存，具体可以根据实际情况进行调整.
     $ # 如果你没有足够的共享内存，可以尝试在启动服务的时候调低 --running_max_req_size 参数，这会降低
     $ # 服务的并发请求数量，但可以减少共享内存的占用。如果是多模态服务，也可以通过降低 --cache_capacity
     $ # 参数来减少共享内存的占用。
@@ -38,7 +38,7 @@ Lightllm 是一个纯python开发的推理框架，其中的算子使用triton
 你也可以使用源码手动构建镜像并运行,建议手动构建镜像,因为更新比较频繁：
 
 .. code-block:: console
-    
+
     $ # 进入代码仓库的根目录
     $ cd /lightllm
     $ # 手动构建镜像, docker 目录下有不同功能场景的镜像构建文件，按需构建。
@@ -52,7 +52,7 @@ Lightllm 是一个纯python开发的推理框架，其中的算子使用triton
 或者你也可以直接使用脚本一键启动镜像并且运行：
 
 .. code-block:: console
-    
+
     $ # 查看脚本参数
     $ python tools/quick_launch_docker.py --help
 
@@ -80,6 +80,10 @@ Lightllm 是一个纯python开发的推理框架，其中的算子使用triton
     $ # 安装lightllm的依赖 (cuda 12.4)
     $ pip install -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cu124
     $
+    $ # 安装lightllm的依赖 (摩尔线程 GPU)
+    $ ./generate_requirements_musa.sh
+    $ pip install -r requirements-musa.txt
+    $
     $ # 安装lightllm
     $ python setup.py install
 
@@ -97,6 +101,6 @@ Lightllm 是一个纯python开发的推理框架，其中的算子使用triton
     .. code-block:: console
 
         $ pip install -U --index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/Triton-Nightly/pypi/simple/ triton-nightly --no-deps
-    
+
     具体原因可以参考：`issue <https://github.com/triton-lang/triton/issues/3619>`_ 和 `fix PR <https://github.com/triton-lang/triton/pull/3638>`_
 
diff --git a/docs/EN/source/getting_started/installation.rst b/docs/EN/source/getting_started/installation.rst
index 75fa714764..6439c48de3 100755
--- a/docs/EN/source/getting_started/installation.rst
+++ b/docs/EN/source/getting_started/installation.rst
@@ -24,16 +24,16 @@ The easiest way to install Lightllm is using the official image. You can directl
     $ docker pull ghcr.io/modeltc/lightllm:main
     $
     $ # Run，The current LightLLM service relies heavily on shared memory.
-    $ # Before starting, please make sure that you have allocated enough shared memory 
+    $ # Before starting, please make sure that you have allocated enough shared memory
     $ # in your Docker settings; otherwise, the service may fail to start properly.
     $ #
-    $ # 1. For text-only services, it is recommended to allocate more than 2GB of shared memory. 
+    $ # 1. For text-only services, it is recommended to allocate more than 2GB of shared memory.
     $ # If your system has sufficient RAM, allocating 16GB or more is recommended.
-    $ # 2.For multimodal services, it is recommended to allocate 16GB or more of shared memory. 
+    $ # 2.For multimodal services, it is recommended to allocate 16GB or more of shared memory.
     $ # You can adjust this value according to your specific requirements.
     $ #
-    $ # If you do not have enough shared memory available, you can try lowering 
-    $ # the --running_max_req_size parameter when starting the service. 
+    $ # If you do not have enough shared memory available, you can try lowering
+    $ # the --running_max_req_size parameter when starting the service.
     $ # This will reduce the number of concurrent requests, but also decrease shared memory usage.
     $ docker run -it --gpus all -p 8080:8080            \
     $   --shm-size 2g -v your_local_path:/data/         \
@@ -42,13 +42,13 @@ The easiest way to install Lightllm is using the official image. You can directl
 You can also manually build the image from source and run it:
 
 .. code-block:: console
-    
+
     $ # move into lightllm root dir
     $ cd /lightllm
     $ # Manually build the image
     $ docker build -t <image_name> -f ./docker/Dockerfile .
     $
-    $ # Run, 
+    $ # Run,
     $ docker run -it --gpus all -p 8080:8080            \
     $   --shm-size 2g -v your_local_path:/data/         \
     $   <image_name> /bin/bash
@@ -56,7 +56,7 @@ You can also manually build the image from source and run it:
 Or you can directly use the script to launch the image and run it with one click:
 
 .. code-block:: console
-    
+
     $ # View script parameters
     $ python tools/quick_launch_docker.py --help
 
@@ -84,6 +84,10 @@ You can also install Lightllm from source:
     $ # Install Lightllm dependencies (cuda 12.4)
     $ pip install -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cu124
     $
+    $ # Install Lightllm dependencies (Moore Threads GPU)
+    $ ./generate_requirements_musa.sh
+    $ pip install -r requirements-musa.txt
+    $
     $ # Install Lightllm
     $ python setup.py install
 
@@ -101,5 +105,5 @@ You can also install Lightllm from source:
     .. code-block:: console
 
         $ pip install -U --index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/Triton-Nightly/pypi/simple/ triton-nightly --no-deps
-    
+
     For specific reasons, please refer to: `issue <https://github.com/triton-lang/triton/issues/3619>`_ and `fix PR <https://github.com/triton-lang/triton/pull/3638>`_
\ No newline at end of file
diff --git a/generate_requirements_musa.sh b/generate_requirements_musa.sh
new file mode 100755
index 0000000000..f5bfb8ff83
--- /dev/null
+++ b/generate_requirements_musa.sh
@@ -0,0 +1,105 @@
+#!/bin/bash
+# Script to generate requirements-musa.txt from requirements.txt
+# MUSA is not compatible with CUDA packages, so they need to be removed
+# Torch-related packages are pre-installed in the MUSA docker container
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+INPUT_FILE="${SCRIPT_DIR}/requirements.txt"
+OUTPUT_FILE="${SCRIPT_DIR}/requirements-musa.txt"
+
+if [ ! -f "$INPUT_FILE" ]; then
+    echo "Error: requirements.txt not found at $INPUT_FILE"
+    exit 1
+fi
+
+echo "Generating requirements-musa.txt from requirements.txt..."
+
+# Define patterns to remove (CUDA-specific packages)
+# These packages are not compatible with MUSA
+CUDA_PACKAGES=(
+    "^cupy"           # cupy-cuda12x and similar
+    "^cuda_bindings"  # CUDA bindings
+    "^nixl"           # NIXL (NVIDIA Inter-node eXchange Library)
+    "^flashinfer"     # flashinfer-python (CUDA-specific attention kernel)
+    "^sgl-kernel"     # SGL kernel (CUDA-specific)
+)
+
+# Define torch-related packages (pre-installed in MUSA container, remove version pins)
+TORCH_PACKAGES=(
+    "^torch=="
+    "^torchvision=="
+)
+
+# Create the output file with a header comment
+cat > "$OUTPUT_FILE" << 'EOF'
+# Requirements for MUSA (Moore Threads GPU)
+# Auto-generated from requirements.txt by generate_requirements_musa.sh
+# CUDA-specific packages have been removed
+# Torch-related packages have version pins removed (pre-installed in MUSA container)
+
+EOF
+
+# Process the requirements file
+while IFS= read -r line || [ -n "$line" ]; do
+    # Skip empty lines and comments (but keep them in output)
+    if [[ -z "$line" || "$line" =~ ^[[:space:]]*# ]]; then
+        echo "$line" >> "$OUTPUT_FILE"
+        continue
+    fi
+
+    # Extract package name (before ==, >=, <=, ~=, etc.)
+    pkg_name=$(echo "$line" | sed -E 's/^([a-zA-Z0-9_-]+).*/\1/')
+
+    # Check if this is a CUDA package to skip
+    skip=false
+    for pattern in "${CUDA_PACKAGES[@]}"; do
+        if [[ "$pkg_name" =~ $pattern ]]; then
+            echo "  Removing CUDA package: $line"
+            skip=true
+            break
+        fi
+    done
+
+    if $skip; then
+        continue
+    fi
+
+    # Check if this is a torch-related package (remove version pin)
+    for pattern in "${TORCH_PACKAGES[@]}"; do
+        if [[ "$line" =~ $pattern ]]; then
+            # Remove version pin, keep just the package name
+            pkg_only=$(echo "$line" | sed -E 's/==.*//')
+            echo "  Unpinning version for: $pkg_only (pre-installed in MUSA container)"
+            echo "$pkg_only" >> "$OUTPUT_FILE"
+            skip=true
+            break
+        fi
+    done
+
+    if $skip; then
+        continue
+    fi
+
+    # Keep the package as-is
+    echo "$line" >> "$OUTPUT_FILE"
+
+done < "$INPUT_FILE"
+
+# Add MUSA-specific packages at the end
+cat >> "$OUTPUT_FILE" << 'EOF'
+
+# MUSA-specific packages
+torch_musa
+torchada
+EOF
+
+echo ""
+echo "Successfully generated: $OUTPUT_FILE"
+echo ""
+echo "Summary of changes:"
+echo "  - Removed CUDA-specific packages: cupy-cuda12x, cuda_bindings, nixl, flashinfer-python, sgl-kernel"
+echo "  - Unpinned torch-related packages: torch, torchvision (pre-installed in MUSA container)"
+echo "  - Added MUSA-specific packages: torch_musa, torchada"
+