"tools/cfgs/vscode:/vscode.git/clone" did not exist on "4dc18496f09154eb4d4babd9f2dd90f35bffc6bf"
Dockerfile.vllm 5.58 KB
Newer Older
1
2
3
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

4
5
ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
6

7
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS dev
8
9
10

USER root

11
12
13
14
15
16
17
18
19
20
21
22
23
# Install utilities
RUN apt update -y && apt install -y git wget curl nvtop tmux vim
# nats
RUN wget https://github.com/nats-io/nats-server/releases/download/v2.10.24/nats-server-v2.10.24-amd64.deb && dpkg -i nats-server-v2.10.24-amd64.deb
# etcd
ENV ETCD_VERSION="v3.5.18"
RUN wget https://github.com/etcd-io/etcd/releases/download/$ETCD_VERSION/etcd-$ETCD_VERSION-linux-amd64.tar.gz -O /tmp/etcd.tar.gz && \
mkdir -p /usr/local/bin/etcd && \
tar -xvf /tmp/etcd.tar.gz -C /usr/local/bin/etcd --strip-components=1
ENV PATH=/usr/local/bin/etcd/:$PATH


### VIRTUAL ENVIRONMENT SETUP ###
24
25
26
27
28
29
30
31
32
33

# Install uv and create virtualenv
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
RUN mkdir /opt/triton && \
    uv venv /opt/triton/venv --python 3.12

# Activate virtual environment
ENV VIRTUAL_ENV=/opt/triton/venv
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"

34
35
36
37
38
39
# Install patched vllm - keep this early in Dockerfile to avoid
# rebuilds from unrelated source code changes
ARG VLLM_REF="v0.7.2"
ARG VLLM_PATCH="vllm_${VLLM_REF}-triton-kv-disagg-patch.patch"
RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
    bash /tmp/deps/vllm/install.sh --patch /tmp/deps/vllm/${VLLM_PATCH} --ref ${VLLM_REF} --install-cmd "uv pip install --editable" --use-precompiled --installation-dir /opt/vllm
40

41
# Install genai-perf for benchmarking
42
43
44
ARG GENAI_PERF_TAG="r25.01"
RUN uv pip install "git+https://github.com/triton-inference-server/perf_analyzer.git@${GENAI_PERF_TAG}#subdirectory=genai-perf"

45
46
47
# Install test dependencies
RUN --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.txt \
    uv pip install --requirement /tmp/requirements.txt
48

49
50
51
52
### MISC UTILITY SETUP ###

# Finish pyright install
RUN pyright --help > /dev/null 2>&1
53

54
55
56
57
58
# Enable Git operations in the /workspace directory
RUN printf "[safe]\n      directory=/workspace\n" > /root/.gitconfig

RUN ln -sf /bin/bash /bin/sh

59
60
61
62
63
64
65
66
67
### BUILDS ###

# Rust build/dev dependencies
RUN apt update -y && \
    apt install -y \
    build-essential \
    protobuf-compiler && \
    curl https://sh.rustup.rs -sSf | bash -s -- -y
ENV PATH="/root/.cargo/bin:${PATH}"
68
69
70
71

# Working directory
WORKDIR /workspace

72
COPY runtime/rust /workspace/runtime/rust
73
74
75
RUN cd runtime/rust && \
    cargo build --release --locked && cargo doc --no-deps

76
77
78
79
80
81
82
83
# Build OpenAI HTTP Service binaries
COPY llm/rust /workspace/llm/rust
COPY examples/rust /workspace/examples/rust
RUN cd examples/rust && \
    cargo build --release && \
    cp target/release/http /usr/local/bin/ && \
    cp target/release/llmctl /usr/local/bin/

84
85
86
# TODO: Build tio
# COPY applications/...

87
# Generate C bindings for kv cache routing in vLLM
88
COPY llm/rust /workspace/llm/rust
89
90
91
RUN cd llm/rust/ && \
cargo build --release --locked && cargo doc --no-deps

92
# Build triton_distributed_rs wheel
93
94
COPY python-wheel /workspace/python-wheel
RUN cd python-wheel && \
95
96
97
    uv build && \
    uv pip install dist/triton_distributed_rs*cp312*.whl

98
99
100
101
102
# Package the bindings
RUN mkdir -p /opt/triton/llm_binding/wheels && mkdir /opt/triton/llm_binding/lib
RUN cp python-wheel/dist/triton_distributed_rs*cp312*.whl /opt/triton/llm_binding/wheels/.
RUN cp llm/rust/target/release/libtriton_llm_capi.so /opt/triton/llm_binding/lib/.
RUN cp -r llm/rust/libtriton-llm/include /opt/triton/llm_binding/.
103
104
# Tell vllm to use the Triton LLM C API for KV Cache Routing
ENV VLLM_KV_CAPI_PATH="/opt/triton/llm_binding/lib/libtriton_llm_capi.so"
105

106
107
# FIXME: Copy more specific folders in for dev/debug after directory restructure
COPY . /workspace
108

109
110
# FIXME: May want a modification with triton-distributed banner on entry
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
111

112
CMD []
113

114
### Lean Runtime Image Stage ###
115

116
117
118
119
# FIXME: Separate build and runtime images
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS runtime

USER root
120

121
122
123
124
125
126
127
128
# Install tools for interactive convenience
RUN apt update -y && \
    apt install -y curl tmux vim && \
    echo "set -g mouse on" >> /root/.tmux.conf

# Set environment variables
ENV VIRTUAL_ENV=/opt/triton/venv
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
129
ENV RAPIDS_LIBUCX_PREFER_SYSTEM_LIBRARY=true
130
ENV VLLM_KV_CAPI_PATH="/opt/triton/llm_binding/lib/libtriton_llm_capi.so"
131

132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
# Copy binaries
COPY --from=dev /usr/local/bin/http /usr/local/bin/http
COPY --from=dev /usr/local/bin/llmctl /usr/local/bin/llmctl
COPY --from=dev /usr/local/bin/etcd/etcd /usr/local/bin/etcd
COPY --from=dev /usr/bin/nats-server /usr/local/bin/nats-server
COPY --from=dev /bin/uv /usr/local/bin/uv
COPY --from=dev /bin/uvx /usr/local/bin/uvx

# Copy venv with installed packages
RUN uv python install 3.12
COPY --from=dev /opt/vllm /opt/vllm
COPY --from=dev ${VIRTUAL_ENV} ${VIRTUAL_ENV}

# Copy minimal set of files for testing. May consider separate stage for testing
# if test dependencies start to negatively impact deployment environment/size.
COPY pyproject.toml /workspace/pyproject.toml
COPY container/deps/vllm /workspace/container/deps/vllm
COPY python-wheel/python /workspace/python-wheel/python
# Add library for KV routing
COPY --from=dev ${VLLM_KV_CAPI_PATH} ${VLLM_KV_CAPI_PATH}
# Copy minimal set of files for deployment/examples
# FIXME: Use a more consolidated path after directory restructure
COPY examples/python_rs/llm/vllm /workspace/examples/python_rs/llm/vllm

WORKDIR /workspace

# FIXME: May want a modification with triton-distributed banner on entry
159
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
160
161

CMD []