Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 63 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -155,3 +155,66 @@ COPY --from=builder /app/model-runner /app/model-runner
FROM sglang AS final-sglang
# Copy the built binary from builder-sglang (without vLLM)
COPY --from=builder-sglang /app/model-runner /app/model-runner

# --- vLLM ROCm: builder stage ---
# Builds upstream vLLM from source on AMD's pre-built ROCm dev image, which
# already contains PyTorch ROCm, Triton, flash-attention, and the ROCm SDK
# (see https://hub.docker.com/r/rocm/vllm-dev). vLLM is checked out at the
# tagged release matching VLLM_VERSION — no fork, no custom wheels.
FROM rocm/vllm-dev:base AS vllm-rocm-builder

ARG VLLM_VERSION=0.19.1
# Target GPU architectures officially supported by vLLM ROCm:
# gfx90a (MI200), gfx942 (MI300), gfx1100/1101 (RDNA3 7900/7800).
ARG PYTORCH_ROCM_ARCH="gfx90a;gfx942;gfx1100;gfx1101"
ENV PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH}

RUN git clone --depth 1 --branch v${VLLM_VERSION} \
https://github.com/vllm-project/vllm.git /vllm-src

WORKDIR /vllm-src
RUN python3 -m pip install --no-cache-dir -r requirements/rocm.txt \
&& python3 setup.py bdist_wheel --dist-dir=/wheels

# --- vLLM ROCm: runtime stage ---
# Mirrors the /opt/vllm-env layout that pkg/inference/backends/vllm/vllm.go
# expects (binary at /opt/vllm-env/bin/vllm, version file at
# /opt/vllm-env/version). Symlinks are used instead of a real venv because
# rocm/vllm-dev:base installs Python dependencies system-wide and recreating
# a venv would break the PyTorch ROCm / Triton ROCm wiring.
#
# Note: unlike the CUDA vllm stage, this image does NOT include llama.cpp.
# The base image is incompatible (different ROCm runtime versions), and the
# rocm vllm image is intended as a vLLM-only artifact.
FROM rocm/vllm-dev:base AS vllm-rocm

COPY --from=vllm-rocm-builder /wheels/*.whl /tmp/
RUN python3 -m pip install --no-cache-dir /tmp/*.whl && rm /tmp/*.whl

RUN groupadd --system modelrunner \
&& useradd --system --gid modelrunner -G video \
--create-home --home-dir /home/modelrunner modelrunner

RUN mkdir -p /opt/vllm-env/bin \
&& ln -s "$(command -v vllm)" /opt/vllm-env/bin/vllm \
&& python3 -c "import vllm; print(vllm.__version__)" > /opt/vllm-env/version \
&& chown -R modelrunner:modelrunner /opt/vllm-env

RUN mkdir -p /var/run/model-runner /models /app \
&& chown -R modelrunner:modelrunner /var/run/model-runner /app /models \
&& chmod -R 755 /models

USER modelrunner

ENV MODEL_RUNNER_SOCK=/var/run/model-runner/model-runner.sock
ENV MODEL_RUNNER_PORT=12434
ENV HOME=/home/modelrunner
ENV MODELS_PATH=/models

LABEL com.docker.desktop.service="model-runner"

ENTRYPOINT ["/app/model-runner"]

FROM vllm-rocm AS final-vllm-rocm
# Copy the built binary from builder
COPY --from=builder /app/model-runner /app/model-runner
26 changes: 23 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ LLAMA_UPSTREAM_IMAGE ?= $(shell \
"$(LLAMA_SERVER_VERSION)" "$(LLAMA_SERVER_VARIANT)")
DOCKER_IMAGE := docker/model-runner:latest
DOCKER_IMAGE_VLLM := docker/model-runner:latest-vllm-cuda
DOCKER_IMAGE_VLLM_ROCM := docker/model-runner:latest-vllm-rocm
DOCKER_IMAGE_SGLANG := docker/model-runner:latest-sglang
DOCKER_IMAGE_MUSA := docker/model-runner:latest-musa
DOCKER_IMAGE_OPENVINO := docker/model-runner:latest-openvino
Expand Down Expand Up @@ -43,7 +44,7 @@ DOCKER_BUILD_COMMON_ARGS = \
.PHONY: build build-cli build-dmr build-llamacpp install-cli run clean test integration-tests e2e
.PHONY: validate validate-versions validate-all lint help
.PHONY: docker-build docker-build-multiplatform docker-run docker-run-impl
.PHONY: docker-build-vllm docker-run-vllm docker-build-sglang docker-run-sglang
.PHONY: docker-build-vllm docker-run-vllm docker-build-vllm-rocm docker-run-vllm-rocm docker-build-sglang docker-run-sglang
.PHONY: docker-build-musa docker-run-musa docker-build-openvino docker-run-openvino
.PHONY: test-docker-ce-installation
.PHONY: vllm-metal-build vllm-metal-install vllm-metal-dev vllm-metal-clean
Expand Down Expand Up @@ -194,6 +195,23 @@ docker-build-vllm:
docker-run-vllm: docker-build-vllm
@$(MAKE) -s docker-run-impl DOCKER_IMAGE=$(DOCKER_IMAGE_VLLM)

# Build vLLM Docker image with ROCm (AMD GPU) support.
# Builds upstream vLLM from source on top of rocm/vllm-dev:base — this is a
# vLLM-only image (no llama.cpp), unlike the CUDA variant. Build is heavy:
# expect 30-60 min and ~12-15 GB final image size.
# LLAMA_SERVER_VARIANT is not consumed by the Dockerfile stages here, but
# setting it to "rocm" restricts DOCKER_BUILD_PLATFORMS to linux/amd64
# (vLLM ROCm has no aarch64 support).
docker-build-vllm-rocm:
@$(MAKE) docker-build \
DOCKER_TARGET=final-vllm-rocm \
DOCKER_IMAGE=$(DOCKER_IMAGE_VLLM_ROCM) \
LLAMA_SERVER_VARIANT=rocm

# Run vLLM ROCm Docker container with TCP port access and mounted model storage
docker-run-vllm-rocm: docker-build-vllm-rocm
@$(MAKE) -s docker-run-impl DOCKER_IMAGE=$(DOCKER_IMAGE_VLLM_ROCM)

# Build SGLang Docker image
docker-build-sglang:
@$(MAKE) docker-build \
Expand Down Expand Up @@ -402,8 +420,10 @@ help:
@echo " docker-build - Build Docker image for current platform"
@echo " docker-build-multiplatform - Build Docker image for multiple platforms"
@echo " docker-run - Run in Docker container with TCP port access and mounted model storage"
@echo " docker-build-vllm - Build vLLM Docker image"
@echo " docker-run-vllm - Run vLLM Docker container"
@echo " docker-build-vllm - Build vLLM Docker image (CUDA)"
@echo " docker-run-vllm - Run vLLM Docker container (CUDA)"
@echo " docker-build-vllm-rocm - Build vLLM Docker image (ROCm / AMD GPU, source build)"
@echo " docker-run-vllm-rocm - Run vLLM Docker container (ROCm / AMD GPU)"
@echo " docker-build-sglang - Build SGLang Docker image"
@echo " docker-run-sglang - Run SGLang Docker container"
@echo " docker-build-musa - Build MUSA Docker image"
Expand Down