"vllm/v1/worker/gpu/model_runner.py" did not exist on "1f400c58b8a6d2852b137cd841206a6ea8aaf43a"
Dockerfile.local_vllm 7.67 KB
Newer Older
1
# syntax=docker/dockerfile:1.10.0
2
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
4
5
6
7
8
9
10
11
# SPDX-License-Identifier: Apache-2.0
#
# Dockerfile for using local/pre-built vLLM images with Dynamo
# Based on container/Dockerfile.vllm but uses existing vLLM image instead of building from source

# All ARGs used in FROM statements must be declared before any FROM
ARG LOCAL_VLLM_IMAGE="vllm-elastic-ep:latest_all2all_buffer_input"
ARG DYNAMO_BASE_IMAGE="dynamo:latest-none"
ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
12
ARG RUNTIME_IMAGE_TAG="12.9.0-runtime-ubuntu24.04"
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59

# Other build arguments
ARG PYTHON_VERSION=3.12
ARG ARCH=amd64
ARG ARCH_ALT=x86_64

# Use local vLLM image as source
FROM ${LOCAL_VLLM_IMAGE} AS vllm_source

# Use Dynamo base image
FROM ${DYNAMO_BASE_IMAGE} AS dynamo_base

##################################################
########## Runtime Image ########################
##################################################
FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime

WORKDIR /workspace
ENV DYNAMO_HOME=/opt/dynamo
ENV VIRTUAL_ENV=/opt/dynamo/venv
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"

ARG ARCH_ALT
ARG PYTHON_VERSION
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu
ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins

# Install Python, build-essential and runtime dependencies
RUN apt-get update && \
    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
        # Python runtime - CRITICAL for virtual environment to work
        python${PYTHON_VERSION}-dev \
        build-essential \
        # jq and curl for polling various endpoints and health checks
        jq \
        git \
        curl \
        # Libraries required by UCX to find RDMA devices
        libibverbs1 rdma-core ibverbs-utils libibumad3 \
        libnuma1 librdmacm1 ibverbs-providers \
        # JIT Kernel Compilation, flashinfer
        ninja-build \
        g++ \
        # prometheus dependencies
        ca-certificates \
        # DeepGemm uses 'cuobjdump' which does not come with CUDA image
60
        cuda-command-line-tools-12-9 && \
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
    rm -rf /var/lib/apt/lists/*

# Copy CUDA development tools from vLLM image (for JIT compilation)
COPY --from=vllm_source /usr/local/cuda/bin/nvcc /usr/local/cuda/bin/nvcc
COPY --from=vllm_source /usr/local/cuda/bin/cudafe++ /usr/local/cuda/bin/cudafe++
COPY --from=vllm_source /usr/local/cuda/bin/ptxas /usr/local/cuda/bin/ptxas
COPY --from=vllm_source /usr/local/cuda/bin/fatbinary /usr/local/cuda/bin/fatbinary
COPY --from=vllm_source /usr/local/cuda/include/ /usr/local/cuda/include/
COPY --from=vllm_source /usr/local/cuda/nvvm /usr/local/cuda/nvvm
COPY --from=vllm_source /usr/local/cuda/lib64/libcudart.so* /usr/local/cuda/lib64/

### COPY NATS & ETCD ###
COPY --from=dynamo_base /usr/bin/nats-server /usr/bin/nats-server
COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/
# Add ETCD and CUDA binaries to PATH
ENV PATH=/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH

# Copy UCX and NIXL from dynamo base
COPY --from=dynamo_base /usr/local/ucx /usr/local/ucx
COPY --from=dynamo_base $NIXL_PREFIX $NIXL_PREFIX
ENV PATH=/usr/local/ucx/bin:$PATH

### VIRTUAL ENVIRONMENT SETUP ###

# Copy uv directly from official image (like Dockerfile.vllm does)
86
# TODO: Pin uv image to a specific version tag for reproducibility (e.g. ghcr.io/astral-sh/uv:0.10.7)
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/

# Create fresh virtual environment (following Dockerfile.vllm pattern)
RUN mkdir -p /opt/dynamo/venv && \
    uv venv /opt/dynamo/venv --python ${PYTHON_VERSION}

# Activate virtual environment
ENV VIRTUAL_ENV=/opt/dynamo/venv
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"

# Copy vLLM installation from local image
# vLLM workspace is at /vllm-workspace in the image
COPY --from=vllm_source /vllm-workspace /opt/vllm

# Copy ALL Python packages from vLLM image directly to venv
# Since vLLM is already installed (not as wheels), we copy the site-packages
COPY --from=vllm_source /usr/local/lib/python3.12/dist-packages ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages

# Fix the .pth files to point to the correct location for pplx_kernels and DeepEP
RUN if [ -f ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/__editable__.pplx_kernels-0.0.1.pth ]; then \
        echo "/opt/vllm/ep_kernels_workspace/pplx-kernels/src" > ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/__editable__.pplx_kernels-0.0.1.pth; \
    fi && \
109
110
111
112
113
114
    # Fix ALL editable install paths (DeepEP .pth files and finder modules for both DeepEP and vLLM)
    find ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages \
        \( -name "__editable__.deep_ep*.pth" -o \
           -name "__editable___deep_ep*_finder.py" -o \
           -name "__editable___vllm*_finder.py" \) \
        -exec sed -i "s|/vllm-workspace|/opt/vllm|g" {} \; && \
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136

# Set LD_LIBRARY_PATH for all components
ENV LD_LIBRARY_PATH=\
/opt/vllm/ep_kernels_workspace/nvshmem_install/lib:\
$NIXL_LIB_DIR:\
$NIXL_PLUGIN_DIR:\
/usr/local/ucx/lib:\
/usr/local/ucx/lib/ucx:\
$LD_LIBRARY_PATH

# DeepGemm JIT compilation support
ENV CPATH=/usr/local/cuda/include

# Install Dynamo and dependencies (following Dockerfile.vllm pattern)
# First install basic Python packages
RUN --mount=type=cache,target=/root/.cache/uv \
    uv pip install pip setuptools wheel

# Copy and install Dynamo wheels from dynamo_base
COPY --from=dynamo_base /opt/dynamo/wheelhouse/ /opt/dynamo/wheelhouse/
RUN --mount=type=cache,target=/root/.cache/uv \
    uv pip install \
137
    /opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \
138
139
140
141
    /opt/dynamo/wheelhouse/ai_dynamo*any.whl \
    /opt/dynamo/wheelhouse/nixl/nixl*.whl \
    && rm -rf /opt/dynamo/wheelhouse

142
# Install runtime dependencies (common + vllm-specific + benchmarks) and test dependencies
143
144
145
COPY container/deps/requirements.common.txt /tmp/requirements.common.txt
COPY container/deps/requirements.vllm.txt /tmp/requirements.vllm.txt
COPY container/deps/requirements.benchmark.txt /tmp/requirements.benchmark.txt
146
147
COPY container/deps/requirements.test.txt /tmp/requirements.test.txt
RUN --mount=type=cache,target=/root/.cache/uv \
148
149
150
151
152
    uv pip install \
        --requirement /tmp/requirements.common.txt \
        --requirement /tmp/requirements.vllm.txt \
        --requirement /tmp/requirements.benchmark.txt \
        --requirement /tmp/requirements.test.txt && \
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
    rm /tmp/requirements*.txt

# Copy workspace files
COPY . /workspace/

# Copy attribution files
COPY ATTRIBUTION* LICENSE /workspace/

# Setup entrypoint
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD []

###########################################################
########## Development Image ##############################
###########################################################
FROM runtime AS dev

# Install development tools
RUN apt-get update -y && \
    apt-get install -y --no-install-recommends  \
    nvtop \
    wget \
    tmux \
    vim \
    openssh-client \
    iproute2 \
    rsync \
    zip \
    unzip \
    htop \
    autoconf \
    automake \
    cmake \
    libtool \
    meson \
    net-tools \
    pybind11-dev \
    clang \
    libclang-dev \
192
    libfontconfig-dev \
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
    protobuf-compiler && \
    rm -rf /var/lib/apt/lists/*

# Set workspace directory
ENV WORKSPACE_DIR=/workspace \
    DYNAMO_HOME=/workspace \
    RUSTUP_HOME=/usr/local/rustup \
    CARGO_HOME=/usr/local/cargo \
    PATH=/usr/local/cargo/bin:$PATH

# Copy Rust toolchain if needed
COPY --from=dynamo_base /usr/local/rustup /usr/local/rustup
COPY --from=dynamo_base /usr/local/cargo /usr/local/cargo

# Install maturin for development
RUN --mount=type=cache,target=/root/.cache/uv \
    uv pip install maturin[patchelf] && \
    uv pip install --no-deps -e .

ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD []