@@ -11,6 +11,7 @@ ARG CUDA_VERSION=12.4.1
1111FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu20.04 AS base
1212ARG CUDA_VERSION=12.4.1
1313ARG PYTHON_VERSION=3.12
14+ ARG TARGETPLATFORM
1415ENV DEBIAN_FRONTEND=noninteractive
1516
1617# Install Python and other dependencies
@@ -46,9 +47,14 @@ WORKDIR /workspace
4647# install build and runtime dependencies
4748COPY requirements-common.txt requirements-common.txt
4849COPY requirements-cuda.txt requirements-cuda.txt
50+ COPY requirements-cuda-arm64.txt requirements-cuda-arm64.txt
4951RUN --mount=type=cache,target=/root/.cache/pip \
5052 python3 -m pip install -r requirements-cuda.txt
5153
54+ RUN --mount=type=cache,target=/root/.cache/pip \
55+ if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
56+ python3 -m pip install -r requirements-cuda-arm64.txt; \
57+ fi
5258
5359# cuda arch list used by torch
5460# can be useful for both `dev` and `test`
@@ -63,13 +69,19 @@ ENV VLLM_FA_CMAKE_GPU_ARCHES=${vllm_fa_cmake_gpu_arches}
6369
6470# ################### WHEEL BUILD IMAGE ####################
6571FROM base AS build
72+ ARG TARGETPLATFORM
6673
6774# install build dependencies
6875COPY requirements-build.txt requirements-build.txt
6976
7077RUN --mount=type=cache,target=/root/.cache/pip \
7178 python3 -m pip install -r requirements-build.txt
7279
80+ RUN --mount=type=cache,target=/root/.cache/pip \
81+ if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
82+ python3 -m pip install -r requirements-cuda-arm64.txt; \
83+ fi
84+
7385COPY . .
7486ARG GIT_REPO_CHECK=0
7587RUN --mount=type=bind,source=.git,target=.git \
@@ -134,15 +146,18 @@ COPY requirements-test.txt requirements-test.txt
134146COPY requirements-dev.txt requirements-dev.txt
135147RUN --mount=type=cache,target=/root/.cache/pip \
136148 python3 -m pip install -r requirements-dev.txt
137-
138149# ################### DEV IMAGE ####################
150+
139151# ################### vLLM installation IMAGE ####################
140152# image with vLLM installed
141153FROM nvidia/cuda:${CUDA_VERSION}-base-ubuntu22.04 AS vllm-base
142154ARG CUDA_VERSION=12.4.1
143155ARG PYTHON_VERSION=3.12
144156WORKDIR /vllm-workspace
145157ENV DEBIAN_FRONTEND=noninteractive
158+ ARG TARGETPLATFORM
159+
160+ COPY requirements-cuda-arm64.txt requirements-cuda-arm64.txt
146161
147162RUN PYTHON_VERSION_STR=$(echo ${PYTHON_VERSION} | sed 's/\. //g' ) && \
148163 echo "export PYTHON_VERSION_STR=${PYTHON_VERSION_STR}" >> /etc/environment
@@ -168,18 +183,25 @@ RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
168183# or future versions of triton.
169184RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
170185
171- # install vllm wheel first, so that torch etc will be installed
186+ # Install vllm wheel first, so that torch etc will be installed.
172187RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \
173188 --mount=type=cache,target=/root/.cache/pip \
174189 python3 -m pip install dist/*.whl --verbose
175190
176191RUN --mount=type=cache,target=/root/.cache/pip \
177- . /etc/environment && \
178- python3 -m pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.1.6/flashinfer-0.1.6+cu121torch2.4-cp${PYTHON_VERSION_STR}-cp${PYTHON_VERSION_STR}-linux_x86_64.whl
192+ if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
193+ pip uninstall -y torch && \
194+ python3 -m pip install -r requirements-cuda-arm64.txt; \
195+ fi
196+
197+ RUN --mount=type=cache,target=/root/.cache/pip \
198+ . /etc/environment && \
199+ if [ "$TARGETPLATFORM" != "linux/arm64" ]; then \
200+ python3 -m pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.1.6/flashinfer-0.1.6+cu121torch2.4-cp${PYTHON_VERSION_STR}-cp${PYTHON_VERSION_STR}-linux_x86_64.whl; \
201+ fi
179202COPY examples examples
180203# ################### vLLM installation IMAGE ####################
181204
182-
183205# ################### TEST IMAGE ####################
184206# image to run unit testing suite
185207# note that this uses vllm installed by `pip`
@@ -209,7 +231,6 @@ COPY vllm/v1 /usr/local/lib/python3.12/dist-packages/vllm/v1
209231RUN mkdir test_docs
210232RUN mv docs test_docs/
211233RUN mv vllm test_docs/
212-
213234# ################### TEST IMAGE ####################
214235
215236# ################### OPENAI API SERVER ####################
@@ -218,8 +239,11 @@ FROM vllm-base AS vllm-openai
218239
219240# install additional dependencies for openai api server
220241RUN --mount=type=cache,target=/root/.cache/pip \
221- pip install accelerate hf_transfer 'modelscope!=1.15.0' 'bitsandbytes>=0.45.0' timm==0.9.10
222-
242+ if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
243+ pip install accelerate hf_transfer 'modelscope!=1.15.0' 'bitsandbytes>=0.42.0' 'timm==0.9.10' ; \
244+ else \
245+ pip install accelerate hf_transfer 'modelscope!=1.15.0' 'bitsandbytes>=0.45.0' 'timm==0.9.10' ; \
246+ fi
223247ENV VLLM_USAGE_SOURCE production-docker-image
224248
225249ENTRYPOINT ["python3" , "-m" , "vllm.entrypoints.openai.api_server" ]
0 commit comments