Dockerfile.vllm 6.03 KB
Newer Older
1
2
3
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

4
5
ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
6

7
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS dev
8
9
10

USER root

11
12
13
14
15
16
17
18
19
20
21
22
23
# Install utilities
RUN apt update -y && apt install -y git wget curl nvtop tmux vim
# nats
RUN wget https://github.com/nats-io/nats-server/releases/download/v2.10.24/nats-server-v2.10.24-amd64.deb && dpkg -i nats-server-v2.10.24-amd64.deb
# etcd
ENV ETCD_VERSION="v3.5.18"
RUN wget https://github.com/etcd-io/etcd/releases/download/$ETCD_VERSION/etcd-$ETCD_VERSION-linux-amd64.tar.gz -O /tmp/etcd.tar.gz && \
mkdir -p /usr/local/bin/etcd && \
tar -xvf /tmp/etcd.tar.gz -C /usr/local/bin/etcd --strip-components=1
ENV PATH=/usr/local/bin/etcd/:$PATH


### VIRTUAL ENVIRONMENT SETUP ###
24
25
26

# Install uv and create virtualenv
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
Neelay Shah's avatar
Neelay Shah committed
27
28
RUN mkdir /opt/dynamo && \
    uv venv /opt/dynamo/venv --python 3.12
29
30

# Activate virtual environment
Neelay Shah's avatar
Neelay Shah committed
31
ENV VIRTUAL_ENV=/opt/dynamo/venv
32
33
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"

34
35
36
# Install patched vllm - keep this early in Dockerfile to avoid
# rebuilds from unrelated source code changes
ARG VLLM_REF="v0.7.2"
Neelay Shah's avatar
Neelay Shah committed
37
ARG VLLM_PATCH="vllm_${VLLM_REF}-dynamo-kv-disagg-patch.patch"
38
39
RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
    bash /tmp/deps/vllm/install.sh --patch /tmp/deps/vllm/${VLLM_PATCH} --ref ${VLLM_REF} --install-cmd "uv pip install --editable" --use-precompiled --installation-dir /opt/vllm
40

41
# Install genai-perf for benchmarking
42
43
# TODO: Move to tag when fix for genai-perf will be released
ARG GENAI_PERF_TAG="25d0188713adc47868d6b3f22426375237a90529"
44
45
RUN uv pip install "git+https://github.com/triton-inference-server/perf_analyzer.git@${GENAI_PERF_TAG}#subdirectory=genai-perf"

46
47
48
# Install test dependencies
RUN --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.txt \
    uv pip install --requirement /tmp/requirements.txt
49

50
51
52
53
### MISC UTILITY SETUP ###

# Finish pyright install
RUN pyright --help > /dev/null 2>&1
54

55
56
57
58
59
# Enable Git operations in the /workspace directory
RUN printf "[safe]\n      directory=/workspace\n" > /root/.gitconfig

RUN ln -sf /bin/bash /bin/sh

60
61
62
63
64
65
### BUILDS ###

# Rust build/dev dependencies
RUN apt update -y && \
    apt install -y \
    build-essential \
Biswa Panda's avatar
Biswa Panda committed
66
    protobuf-compiler \
Neelay Shah's avatar
Neelay Shah committed
67
68
69
    cmake \
    libssl-dev \
    pkg-config && \
70
71
    curl https://sh.rustup.rs -sSf | bash -s -- -y
ENV PATH="/root/.cargo/bin:${PATH}"
72
RUN rustup toolchain install 1.85.0-x86_64-unknown-linux-gnu
73
74
75
76

# Working directory
WORKDIR /workspace

77
78
79
80
81
82
# Copy Python wheel configuration files
COPY pyproject.toml /workspace/
COPY README.md /workspace/
COPY LICENSE /workspace/

# Build Rust runtime
Neelay Shah's avatar
Neelay Shah committed
83
84
COPY lib/runtime /workspace/lib/runtime
RUN cd lib/runtime && \
85
86
    cargo build --release --locked && cargo doc --no-deps

87
# Build OpenAI HTTP Service binaries
Neelay Shah's avatar
Neelay Shah committed
88
COPY lib/llm /workspace/lib/llm
89
90
91
92
93
94
COPY examples/rust /workspace/examples/rust
RUN cd examples/rust && \
    cargo build --release && \
    cp target/release/http /usr/local/bin/ && \
    cp target/release/llmctl /usr/local/bin/

Neelay Shah's avatar
Neelay Shah committed
95
# TODO: Build dynamo-run
96
97
# COPY applications/...

98
# Generate C bindings for kv cache routing in vLLM
Neelay Shah's avatar
Neelay Shah committed
99
100
COPY lib/bindings /workspace/lib/bindings
RUN cd lib/bindings/c && \
101
102
cargo build --release --locked && cargo doc --no-deps

Neelay Shah's avatar
Neelay Shah committed
103
104
105
COPY deploy/dynamo/sdk /workspace/deploy/dynamo/sdk
# Build dynamo wheel
RUN source /opt/dynamo/venv/bin/activate && \
106
    uv build --wheel --out-dir /workspace/dist && \
107
    uv pip install /workspace/dist/ai_dynamo*cp312*.whl && \
Neelay Shah's avatar
Neelay Shah committed
108
    cd /workspace/deploy/dynamo/sdk && \
109
    uv build --wheel --out-dir /workspace/dist && \
110
    uv pip install /workspace/dist/ai_dynamo_sdk*any.whl
111

112
# Package the bindings
Neelay Shah's avatar
Neelay Shah committed
113
114
RUN mkdir -p /opt/dynamo/bindings/wheels && \
    mkdir /opt/dynamo/bindings/lib && \
115
    cp dist/ai_dynamo*cp312*.whl /opt/dynamo/bindings/wheels/. && \
Neelay Shah's avatar
Neelay Shah committed
116
117
    cp lib/bindings/c/target/release/libdynamo_llm_capi.so /opt/dynamo/bindings/lib/. && \
    cp -r lib/bindings/c/include /opt/dynamo/bindings/.
Neelay Shah's avatar
Neelay Shah committed
118

Neelay Shah's avatar
Neelay Shah committed
119
120
# Tell vllm to use the Dynamo LLM C API for KV Cache Routing
ENV VLLM_KV_CAPI_PATH="/opt/dynamo/bindings/lib/libdynamo_llm_capi.so"
121

122
123
# FIXME: Copy more specific folders in for dev/debug after directory restructure
COPY . /workspace
124

Neelay Shah's avatar
Neelay Shah committed
125
# FIXME: May want a modification with dynamo banner on entry
126
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
127

128
CMD []
129

130
### Lean Runtime Image Stage ###
131

132
133
134
135
# FIXME: Separate build and runtime images
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS runtime

USER root
136

137
138
139
140
141
142
# Install tools for interactive convenience
RUN apt update -y && \
    apt install -y curl tmux vim && \
    echo "set -g mouse on" >> /root/.tmux.conf

# Set environment variables
Neelay Shah's avatar
Neelay Shah committed
143
ENV VIRTUAL_ENV=/opt/dynamo/venv
144
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
145
ENV RAPIDS_LIBUCX_PREFER_SYSTEM_LIBRARY=true
Neelay Shah's avatar
Neelay Shah committed
146
ENV VLLM_KV_CAPI_PATH="/opt/dynamo/bindings/lib/libdynamo_llm_capi.so"
147

148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
# Copy binaries
COPY --from=dev /usr/local/bin/http /usr/local/bin/http
COPY --from=dev /usr/local/bin/llmctl /usr/local/bin/llmctl
COPY --from=dev /usr/local/bin/etcd/etcd /usr/local/bin/etcd
COPY --from=dev /usr/bin/nats-server /usr/local/bin/nats-server
COPY --from=dev /bin/uv /usr/local/bin/uv
COPY --from=dev /bin/uvx /usr/local/bin/uvx

# Copy venv with installed packages
RUN uv python install 3.12
COPY --from=dev /opt/vllm /opt/vllm
COPY --from=dev ${VIRTUAL_ENV} ${VIRTUAL_ENV}

# Copy minimal set of files for testing. May consider separate stage for testing
# if test dependencies start to negatively impact deployment environment/size.
COPY pyproject.toml /workspace/pyproject.toml
COPY container/deps/vllm /workspace/container/deps/vllm
# Add library for KV routing
COPY --from=dev ${VLLM_KV_CAPI_PATH} ${VLLM_KV_CAPI_PATH}
# Copy minimal set of files for deployment/examples
# FIXME: Use a more consolidated path after directory restructure
COPY examples/python_rs/llm/vllm /workspace/examples/python_rs/llm/vllm

WORKDIR /workspace

Neelay Shah's avatar
Neelay Shah committed
173
# FIXME: May want a modification with dynamo banner on entry
174
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
175

176
CMD []