Skip to content

Commit b3b1526

Browse files
cennndrikster80
andauthored
WIP: [CI/Build] simplify Dockerfile build for ARM64 / GH200 (vllm-project#11212)
Signed-off-by: drikster80 <ed.sealing@gmail.com> Co-authored-by: drikster80 <ed.sealing@gmail.com>
1 parent 17138af commit b3b1526

File tree

5 files changed

+64
-11
lines changed

5 files changed

+64
-11
lines changed

Dockerfile

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ ARG CUDA_VERSION=12.4.1
1111
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu20.04 AS base
1212
ARG CUDA_VERSION=12.4.1
1313
ARG PYTHON_VERSION=3.12
14+
ARG TARGETPLATFORM
1415
ENV DEBIAN_FRONTEND=noninteractive
1516

1617
# Install Python and other dependencies
@@ -46,9 +47,14 @@ WORKDIR /workspace
4647
# install build and runtime dependencies
4748
COPY requirements-common.txt requirements-common.txt
4849
COPY requirements-cuda.txt requirements-cuda.txt
50+
COPY requirements-cuda-arm64.txt requirements-cuda-arm64.txt
4951
RUN --mount=type=cache,target=/root/.cache/pip \
5052
python3 -m pip install -r requirements-cuda.txt
5153

54+
RUN --mount=type=cache,target=/root/.cache/pip \
55+
if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
56+
python3 -m pip install -r requirements-cuda-arm64.txt; \
57+
fi
5258

5359
# cuda arch list used by torch
5460
# can be useful for both `dev` and `test`
@@ -63,13 +69,19 @@ ENV VLLM_FA_CMAKE_GPU_ARCHES=${vllm_fa_cmake_gpu_arches}
6369

6470
#################### WHEEL BUILD IMAGE ####################
6571
FROM base AS build
72+
ARG TARGETPLATFORM
6673

6774
# install build dependencies
6875
COPY requirements-build.txt requirements-build.txt
6976

7077
RUN --mount=type=cache,target=/root/.cache/pip \
7178
python3 -m pip install -r requirements-build.txt
7279

80+
RUN --mount=type=cache,target=/root/.cache/pip \
81+
if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
82+
python3 -m pip install -r requirements-cuda-arm64.txt; \
83+
fi
84+
7385
COPY . .
7486
ARG GIT_REPO_CHECK=0
7587
RUN --mount=type=bind,source=.git,target=.git \
@@ -134,15 +146,18 @@ COPY requirements-test.txt requirements-test.txt
134146
COPY requirements-dev.txt requirements-dev.txt
135147
RUN --mount=type=cache,target=/root/.cache/pip \
136148
python3 -m pip install -r requirements-dev.txt
137-
138149
#################### DEV IMAGE ####################
150+
139151
#################### vLLM installation IMAGE ####################
140152
# image with vLLM installed
141153
FROM nvidia/cuda:${CUDA_VERSION}-base-ubuntu22.04 AS vllm-base
142154
ARG CUDA_VERSION=12.4.1
143155
ARG PYTHON_VERSION=3.12
144156
WORKDIR /vllm-workspace
145157
ENV DEBIAN_FRONTEND=noninteractive
158+
ARG TARGETPLATFORM
159+
160+
COPY requirements-cuda-arm64.txt requirements-cuda-arm64.txt
146161

147162
RUN PYTHON_VERSION_STR=$(echo ${PYTHON_VERSION} | sed 's/\.//g') && \
148163
echo "export PYTHON_VERSION_STR=${PYTHON_VERSION_STR}" >> /etc/environment
@@ -168,18 +183,25 @@ RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
168183
# or future versions of triton.
169184
RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
170185

171-
# install vllm wheel first, so that torch etc will be installed
186+
# Install vllm wheel first, so that torch etc will be installed.
172187
RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \
173188
--mount=type=cache,target=/root/.cache/pip \
174189
python3 -m pip install dist/*.whl --verbose
175190

176191
RUN --mount=type=cache,target=/root/.cache/pip \
177-
. /etc/environment && \
178-
python3 -m pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.1.6/flashinfer-0.1.6+cu121torch2.4-cp${PYTHON_VERSION_STR}-cp${PYTHON_VERSION_STR}-linux_x86_64.whl
192+
if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
193+
pip uninstall -y torch && \
194+
python3 -m pip install -r requirements-cuda-arm64.txt; \
195+
fi
196+
197+
RUN --mount=type=cache,target=/root/.cache/pip \
198+
. /etc/environment && \
199+
if [ "$TARGETPLATFORM" != "linux/arm64" ]; then \
200+
python3 -m pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.1.6/flashinfer-0.1.6+cu121torch2.4-cp${PYTHON_VERSION_STR}-cp${PYTHON_VERSION_STR}-linux_x86_64.whl; \
201+
fi
179202
COPY examples examples
180203
#################### vLLM installation IMAGE ####################
181204

182-
183205
#################### TEST IMAGE ####################
184206
# image to run unit testing suite
185207
# note that this uses vllm installed by `pip`
@@ -209,7 +231,6 @@ COPY vllm/v1 /usr/local/lib/python3.12/dist-packages/vllm/v1
209231
RUN mkdir test_docs
210232
RUN mv docs test_docs/
211233
RUN mv vllm test_docs/
212-
213234
#################### TEST IMAGE ####################
214235

215236
#################### OPENAI API SERVER ####################
@@ -218,8 +239,11 @@ FROM vllm-base AS vllm-openai
218239

219240
# install additional dependencies for openai api server
220241
RUN --mount=type=cache,target=/root/.cache/pip \
221-
pip install accelerate hf_transfer 'modelscope!=1.15.0' 'bitsandbytes>=0.45.0' timm==0.9.10
222-
242+
if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
243+
pip install accelerate hf_transfer 'modelscope!=1.15.0' 'bitsandbytes>=0.42.0' 'timm==0.9.10'; \
244+
else \
245+
pip install accelerate hf_transfer 'modelscope!=1.15.0' 'bitsandbytes>=0.45.0' 'timm==0.9.10'; \
246+
fi
223247
ENV VLLM_USAGE_SOURCE production-docker-image
224248

225249
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]

docs/source/serving/deploying_with_docker.rst

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,32 @@ You can build and run vLLM from source via the provided `Dockerfile <https://git
3737
current GPU type the machine is running on, you can add the argument ``--build-arg torch_cuda_arch_list=""``
3838
for vLLM to find the current GPU type and build for that.
3939

40+
Building for Arm64/aarch64
41+
--------------------------
42+
43+
A docker container can be built for aarch64 systems such as the Nvidia Grace-Hopper. At time of this writing, this requires the use
44+
of PyTorch Nightly and should be considered **experimental**. Using the flag `--platform "linux/arm64"` will attempt to build for arm64.
45+
46+
.. note::
47+
48+
Multiple modules must be compiled, so this process can take a while. Recommend using `--build-arg max_jobs=` & `--build-arg nvcc_threads=`
49+
flags to speed up build process. However, ensure your 'max_jobs' is substantially larger than 'nvcc_threads' to get the most benefits.
50+
Keep an eye on memory usage with parallel jobs as it can be substantial (see example below).
51+
52+
.. code-block:: console
53+
54+
# Example of building on Nvidia GH200 server. (Memory usage: ~12GB, Build time: ~1475s / ~25 min, Image size: 7.26GB)
55+
$ DOCKER_BUILDKIT=1 sudo docker build . \
56+
--target vllm-openai \
57+
-platform "linux/arm64" \
58+
-t vllm/vllm-gh200-openai:latest \
59+
--build-arg max_jobs=66 \
60+
--build-arg nvcc_threads=2 \
61+
--build-arg torch_cuda_arch_list="9.0+PTX" \
62+
--build-arg vllm_fa_cmake_gpu_arches="90-real"
63+
64+
65+
4066
4167
To run vLLM:
4268

requirements-build.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,6 @@ ninja
44
packaging
55
setuptools>=61
66
setuptools-scm>=8
7-
torch==2.5.1
7+
torch==2.5.1; platform_machine != 'aarch64'
88
wheel
99
jinja2

requirements-cuda-arm64.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
--index-url https://download.pytorch.org/whl/nightly/cu124
2+
torchvision==0.22.0.dev20241215; platform_machine == 'aarch64'
3+
torch==2.6.0.dev20241210+cu124; platform_machine == 'aarch64'

requirements-cuda.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
# Dependencies for NVIDIA GPUs
55
ray >= 2.9
66
nvidia-ml-py >= 12.560.30 # for pynvml package
7-
torch == 2.5.1
7+
torch == 2.5.1; platform_machine != 'aarch64'
88
# These must be updated alongside torch
9-
torchvision == 0.20.1 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
9+
torchvision == 0.20.1; platform_machine != 'aarch64' # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
1010
xformers == 0.0.28.post3; platform_system == 'Linux' and platform_machine == 'x86_64' # Requires PyTorch 2.5.1

0 commit comments

Comments
 (0)