sglang_runtime.Dockerfile 9.39 KB
Newer Older
1
2
3
4
{#
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#}
5
# === BEGIN templates/sglang_runtime.Dockerfile ===
6
7
8
9
10
11
##################################
########## Runtime Image #########
##################################

FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime

12
13
14
# NOTE: Unlike vLLM/TRTLLM, the SGLang upstream runtime image already ships with the full CUDA
# toolkit (nvcc, nvlink, ptxas, etc.), so no selective COPY of CUDA binaries is needed here.

15
16
17
18
19
20
# cleanup unnecessary libs (python3-blinker conflicts with pip-installed blinker from Flask/dash)
RUN apt remove -y python3-apt python3-blinker && \
    pip uninstall -y termplotlib

# This ARG is still utilized for SGLANG Version extraction
ARG RUNTIME_IMAGE_TAG
21
ARG TARGETARCH
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
WORKDIR /workspace

# Install NATS and ETCD
COPY --from=dynamo_base /usr/bin/nats-server /usr/bin/nats-server
COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/

ENV PATH=/usr/local/bin/etcd:$PATH

# Create dynamo user with group 0 for OpenShift compatibility
RUN userdel -r ubuntu > /dev/null 2>&1 || true \
    && useradd -m -s /bin/bash -g 0 dynamo \
    && [ `id -u dynamo` -eq 1000 ] \
    && mkdir -p /home/dynamo/.cache /opt/dynamo \
    # Non-recursive chown - only the directories themselves, not contents
    && chown dynamo:0 /home/dynamo /home/dynamo/.cache /opt/dynamo /workspace \
    # No chmod needed: umask 002 handles new files, COPY --chmod handles copied content
    # Set umask globally for all subsequent RUN commands (must be done as root before USER dynamo)
    # NOTE: Setting ENV UMASK=002 does NOT work - umask is a shell builtin, not an environment variable
    && mkdir -p /etc/profile.d && echo 'umask 002' > /etc/profile.d/00-umask.sh

# Cache apt downloads; sharing=locked avoids apt/dpkg races with concurrent builds.
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
    apt-get update && \
    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
        # required for verification of GPG keys
        gnupg2 \
    && apt-get clean \
    && rm -rf /var/lib/apt/lists/*

# Copy attribution files
COPY --chmod=664 --chown=dynamo:0 ATTRIBUTION* LICENSE /workspace/

54
{% if context.sglang.enable_media_ffmpeg == "true" %}
55
56
# Copy ffmpeg
RUN --mount=type=bind,from=wheel_builder,source=/usr/local/,target=/tmp/usr/local/ \
57
58
59
60
61
62
    mkdir -p /usr/local/lib/pkgconfig && \
    cp -rnL /tmp/usr/local/include/libav* /tmp/usr/local/include/libsw* /usr/local/include/ && \
    cp -nL /tmp/usr/local/lib/libav*.so /tmp/usr/local/lib/libsw*.so /usr/local/lib/ && \
    cp -nL /tmp/usr/local/lib/pkgconfig/libav*.pc /tmp/usr/local/lib/pkgconfig/libsw*.pc /usr/local/lib/pkgconfig/ && \
    cp -r /tmp/usr/local/src/ffmpeg /usr/local/src/
{% endif %}
63

64
# Copy wheels first (separate from benchmarks to avoid unnecessary cache invalidation)
65
66
67
68
COPY --chmod=775 --chown=dynamo:0 --from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/wheelhouse/
COPY --chmod=775 --chown=dynamo:0 --from=wheel_builder /opt/dynamo/dist/nixl/ /opt/dynamo/wheelhouse/nixl/
COPY --chmod=775 --chown=dynamo:0 --from=wheel_builder /workspace/nixl/build/src/bindings/python/nixl-meta/nixl-*.whl /opt/dynamo/wheelhouse/nixl/

69
70
# NIXL environment and native libraries
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
71
ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib64
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins

# Copy UCX and NIXL native libraries to system directories
COPY --from=wheel_builder /usr/local/ucx /usr/local/ucx
COPY --chown=dynamo:0 --from=wheel_builder $NIXL_PREFIX $NIXL_PREFIX

ENV PATH=/usr/local/ucx/bin:$PATH

ENV LD_LIBRARY_PATH=\
$NIXL_LIB_DIR:\
$NIXL_PLUGIN_DIR:\
/usr/local/ucx/lib:\
/usr/local/ucx/lib/ucx:\
$LD_LIBRARY_PATH

87
ENV SGLANG_VERSION="${RUNTIME_IMAGE_TAG%%-*}"
88
89

{% if target not in ("dev", "local-dev") %}
90
# Install packages as root to ensure they go to system location (/usr/local/lib/python3.12/dist-packages)
91
RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \
92
93
94
95
96
    export PIP_CACHE_DIR=/root/.cache/pip && \
    pip install --break-system-packages \
        /opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \
        /opt/dynamo/wheelhouse/ai_dynamo*any.whl \
        /opt/dynamo/wheelhouse/nixl/nixl*.whl \
97
98
99
100
101
102
103
104
105
106
107
108
109
110
        sglang==${SGLANG_VERSION}
{% else %}
# Dev/local-dev: skip dynamo wheel install (users build from source via cargo build + maturin develop).
# Install NIXL wheel (pre-built C++ binary, not buildable from source) and sglang.
RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \
    export PIP_CACHE_DIR=/root/.cache/pip && \
    pip install --break-system-packages \
        /opt/dynamo/wheelhouse/nixl/nixl*.whl \
        sglang==${SGLANG_VERSION}
{% endif %}

# Install gpu_memory_service wheel if enabled (all targets)
ARG ENABLE_GPU_MEMORY_SERVICE
RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \
111
    if [ "${ENABLE_GPU_MEMORY_SERVICE}" = "true" ]; then \
112
        export PIP_CACHE_DIR=/root/.cache/pip && \
113
        GMS_WHEEL=$(ls /opt/dynamo/wheelhouse/gpu_memory_service*.whl 2>/dev/null | head -1); \
114
        if [ -n "$GMS_WHEEL" ]; then pip install --no-cache-dir --break-system-packages "$GMS_WHEEL"; fi; \
115
116
    fi

117
{% if target not in ("dev", "local-dev") %}
118
119
120
# Copy benchmarks after wheel install so benchmarks changes don't invalidate the layer above
# Pattern: COPY --chmod=775 <path>; chmod g+w <path> done later as root because COPY --chmod only affects <path>/*, not <path>
COPY --chmod=775 --chown=dynamo:0 benchmarks/ /workspace/benchmarks/
121
{% endif %}
122

123
# Install runtime dependencies (common + benchmarks) as root.
124
125
126
# Test and dev dependencies are NOT installed here — they go in the test and dev images.
RUN --mount=type=bind,source=container/deps/requirements.common.txt,target=/tmp/deps/requirements.common.txt \
    --mount=type=bind,source=container/deps/requirements.benchmark.txt,target=/tmp/deps/requirements.benchmark.txt \
127
128
129
    --mount=type=cache,target=/root/.cache/pip,sharing=locked \
    export PIP_CACHE_DIR=/root/.cache/pip && \
    pip install --break-system-packages \
130
131
        --requirement /tmp/deps/requirements.common.txt \
        --requirement /tmp/deps/requirements.benchmark.txt \
132
        sglang==${SGLANG_VERSION} && \
133
134
135
136
137
138
139
    #TODO: Temporary change until upstream sglang runtime image is updated
    pip install --break-system-packages "urllib3>=2.6.3"

{% if target not in ("dev", "local-dev") %}
# Install benchmarks and fix permissions (dev/local-dev install from bind-mounted source if needed)
RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \
    export PIP_CACHE_DIR=/root/.cache/pip && \
140
141
    cd /workspace/benchmarks && \
    pip install --break-system-packages . && \
142
    chmod -R g+w /workspace/benchmarks
143
{% endif %}
144

145
# Force-reinstall NVIDIA packages in a separate layer so requirements changes don't trigger re-download
146
147
RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \
    export PIP_CACHE_DIR=/root/.cache/pip && \
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
    CUDA_MAJOR=$(nvcc --version | egrep -o 'cuda_[0-9]+' | cut -d_ -f2) && \
    if [ "$CUDA_MAJOR" = "12" ]; then \
        # Install NVIDIA packages that are needed for DeepEP to work properly
        # This is done in the upstream runtime image too, but these packages are overridden in earlier commands
        pip install --break-system-packages --force-reinstall --no-deps \
            nvidia-nccl-cu12==2.28.3 \
            nvidia-cudnn-cu12==9.16.0.29 \
            nvidia-cutlass-dsl==4.3.5; \
    elif [ "$CUDA_MAJOR" = "13" ]; then \
        # CUDA 13: Install CuDNN for PyTorch 2.9.1 compatibility
        pip install --break-system-packages --force-reinstall --no-deps \
            nvidia-nccl-cu13==2.28.3 \
            nvidia-cublas==13.1.0.3 \
            nvidia-cutlass-dsl==4.3.1 \
            nvidia-cudnn-cu13==9.16.0.29; \
    fi

# Switch back to dynamo user after package installations
USER dynamo

168
# Copy tests, deploy, and the sglang/common/mocker component subtrees for CI.
169
170
171
172
# Pattern: COPY --chmod=775 <path>; chmod g+w <path> done later as root because COPY --chmod only affects <path>/*, not <path>
COPY --chmod=775 --chown=dynamo:0 tests /workspace/tests
COPY --chmod=775 --chown=dynamo:0 examples /workspace/examples
COPY --chmod=775 --chown=dynamo:0 deploy /workspace/deploy
173
174
175
COPY --chmod=775 --chown=dynamo:0 components/src/dynamo/common /workspace/components/src/dynamo/common
COPY --chmod=775 --chown=dynamo:0 components/src/dynamo/sglang /workspace/components/src/dynamo/sglang
COPY --chmod=775 --chown=dynamo:0 components/src/dynamo/mocker /workspace/components/src/dynamo/mocker
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
COPY --chmod=775 --chown=dynamo:0 recipes/ /workspace/recipes/

# Enable forceful shutdown of inflight requests
ENV SGLANG_FORCE_SHUTDOWN=1

# Setup launch banner in common directory accessible to all users
RUN --mount=type=bind,source=./container/launch_message/runtime.txt,target=/opt/dynamo/launch_message.txt \
    sed '/^#\s/d' /opt/dynamo/launch_message.txt > /opt/dynamo/.launch_screen

# Our scripting assumes /workspace is where dynamo is located
# In order to maintain the ability to have sglang and dynamo
# in the same workspace, symlink /workspace to /sgl-workspace/dynamo
USER root

# Fix directory permissions: COPY --chmod only affects contents, not the directory itself
RUN chmod 755 /opt/dynamo/.launch_screen && \
    echo 'cat /opt/dynamo/.launch_screen' >> /etc/bash.bashrc && \
    ln -s /workspace /sgl-workspace/dynamo

USER dynamo
ARG DYNAMO_COMMIT_SHA
ENV DYNAMO_COMMIT_SHA=${DYNAMO_COMMIT_SHA}

ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD []