Dockerfile.local_vllm 7.3 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
# syntax=docker/dockerfile:1.10.0
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Dockerfile for using local/pre-built vLLM images with Dynamo
# Based on container/Dockerfile.vllm but uses existing vLLM image instead of building from source

# All ARGs used in FROM statements must be declared before any FROM
ARG LOCAL_VLLM_IMAGE="vllm-elastic-ep:latest_all2all_buffer_input"
ARG DYNAMO_BASE_IMAGE="dynamo:latest-none"
ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"

# Other build arguments
ARG PYTHON_VERSION=3.12
ARG ARCH=amd64
ARG ARCH_ALT=x86_64

# Use local vLLM image as source
FROM ${LOCAL_VLLM_IMAGE} AS vllm_source

# Use Dynamo base image
FROM ${DYNAMO_BASE_IMAGE} AS dynamo_base

##################################################
########## Runtime Image ########################
##################################################
FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime

WORKDIR /workspace
ENV DYNAMO_HOME=/opt/dynamo
ENV VIRTUAL_ENV=/opt/dynamo/venv
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"

ARG ARCH_ALT
ARG PYTHON_VERSION
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu
ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins

# Install Python, build-essential and runtime dependencies
RUN apt-get update && \
    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
        # Python runtime - CRITICAL for virtual environment to work
        python${PYTHON_VERSION}-dev \
        build-essential \
        # jq and curl for polling various endpoints and health checks
        jq \
        git \
        curl \
        # Libraries required by UCX to find RDMA devices
        libibverbs1 rdma-core ibverbs-utils libibumad3 \
        libnuma1 librdmacm1 ibverbs-providers \
        # JIT Kernel Compilation, flashinfer
        ninja-build \
        g++ \
        # prometheus dependencies
        ca-certificates \
        # DeepGemm uses 'cuobjdump' which does not come with CUDA image
        cuda-command-line-tools-12-8 && \
    rm -rf /var/lib/apt/lists/*

# Copy CUDA development tools from vLLM image (for JIT compilation)
COPY --from=vllm_source /usr/local/cuda/bin/nvcc /usr/local/cuda/bin/nvcc
COPY --from=vllm_source /usr/local/cuda/bin/cudafe++ /usr/local/cuda/bin/cudafe++
COPY --from=vllm_source /usr/local/cuda/bin/ptxas /usr/local/cuda/bin/ptxas
COPY --from=vllm_source /usr/local/cuda/bin/fatbinary /usr/local/cuda/bin/fatbinary
COPY --from=vllm_source /usr/local/cuda/include/ /usr/local/cuda/include/
COPY --from=vllm_source /usr/local/cuda/nvvm /usr/local/cuda/nvvm
COPY --from=vllm_source /usr/local/cuda/lib64/libcudart.so* /usr/local/cuda/lib64/

### COPY NATS & ETCD ###
COPY --from=dynamo_base /usr/bin/nats-server /usr/bin/nats-server
COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/
# Add ETCD and CUDA binaries to PATH
ENV PATH=/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH

# Copy UCX and NIXL from dynamo base
COPY --from=dynamo_base /usr/local/ucx /usr/local/ucx
COPY --from=dynamo_base $NIXL_PREFIX $NIXL_PREFIX
ENV PATH=/usr/local/ucx/bin:$PATH

### VIRTUAL ENVIRONMENT SETUP ###

# Copy uv directly from official image (like Dockerfile.vllm does)
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/

# Create fresh virtual environment (following Dockerfile.vllm pattern)
RUN mkdir -p /opt/dynamo/venv && \
    uv venv /opt/dynamo/venv --python ${PYTHON_VERSION}

# Activate virtual environment
ENV VIRTUAL_ENV=/opt/dynamo/venv
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"

# Copy vLLM installation from local image
# vLLM workspace is at /vllm-workspace in the image
COPY --from=vllm_source /vllm-workspace /opt/vllm

# Copy ALL Python packages from vLLM image directly to venv
# Since vLLM is already installed (not as wheels), we copy the site-packages
COPY --from=vllm_source /usr/local/lib/python3.12/dist-packages ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages

# Fix the .pth files to point to the correct location for pplx_kernels and DeepEP
RUN if [ -f ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/__editable__.pplx_kernels-0.0.1.pth ]; then \
        echo "/opt/vllm/ep_kernels_workspace/pplx-kernels/src" > ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/__editable__.pplx_kernels-0.0.1.pth; \
    fi && \
    if [ -f ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/__editable__.deep_ep-0.0.1.pth ]; then \
        echo "/opt/vllm/ep_kernels_workspace/DeepEP" > ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/__editable__.deep_ep-0.0.1.pth; \
    fi && \
    # Also check for any other .pth files that might reference /vllm-workspace
    find ${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages -name "*.pth" -exec sed -i 's|/vllm-workspace|/opt/vllm|g' {} \;

# Set LD_LIBRARY_PATH for all components
ENV LD_LIBRARY_PATH=\
/opt/vllm/ep_kernels_workspace/nvshmem_install/lib:\
$NIXL_LIB_DIR:\
$NIXL_PLUGIN_DIR:\
/usr/local/ucx/lib:\
/usr/local/ucx/lib/ucx:\
$LD_LIBRARY_PATH

# DeepGemm JIT compilation support
ENV CPATH=/usr/local/cuda/include

# Install Dynamo and dependencies (following Dockerfile.vllm pattern)
# First install basic Python packages
RUN --mount=type=cache,target=/root/.cache/uv \
    uv pip install pip setuptools wheel

# Copy and install Dynamo wheels from dynamo_base
COPY --from=dynamo_base /opt/dynamo/wheelhouse/ /opt/dynamo/wheelhouse/
RUN --mount=type=cache,target=/root/.cache/uv \
    uv pip install \
    /opt/dynamo/wheelhouse/ai_dynamo_runtime*cp312*.whl \
    /opt/dynamo/wheelhouse/ai_dynamo*any.whl \
    /opt/dynamo/wheelhouse/nixl/nixl*.whl \
    && rm -rf /opt/dynamo/wheelhouse

# Install common and test dependencies
COPY container/deps/requirements.txt /tmp/requirements.txt
COPY container/deps/requirements.test.txt /tmp/requirements.test.txt
RUN --mount=type=cache,target=/root/.cache/uv \
    uv pip install --requirement /tmp/requirements.txt --requirement /tmp/requirements.test.txt && \
    rm /tmp/requirements*.txt

# Copy workspace files
COPY . /workspace/

# Copy attribution files
COPY ATTRIBUTION* LICENSE /workspace/

# Setup entrypoint
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD []

###########################################################
########## Development Image ##############################
###########################################################
FROM runtime AS dev

# Install development tools
RUN apt-get update -y && \
    apt-get install -y --no-install-recommends  \
    nvtop \
    wget \
    tmux \
    vim \
    openssh-client \
    iproute2 \
    rsync \
    zip \
    unzip \
    htop \
    autoconf \
    automake \
    cmake \
    libtool \
    meson \
    net-tools \
    pybind11-dev \
    clang \
    libclang-dev \
    protobuf-compiler && \
    rm -rf /var/lib/apt/lists/*

# Set workspace directory
ENV WORKSPACE_DIR=/workspace \
    DYNAMO_HOME=/workspace \
    RUSTUP_HOME=/usr/local/rustup \
    CARGO_HOME=/usr/local/cargo \
    PATH=/usr/local/cargo/bin:$PATH

# Copy Rust toolchain if needed
COPY --from=dynamo_base /usr/local/rustup /usr/local/rustup
COPY --from=dynamo_base /usr/local/cargo /usr/local/cargo

# Install maturin for development
RUN --mount=type=cache,target=/root/.cache/uv \
    uv pip install maturin[patchelf] && \
    uv pip install --no-deps -e .

ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD []