sglang_runtime.Dockerfile 9.12 KB
Newer Older
1
2
3
4
{#
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#}
5
# === BEGIN templates/sglang_runtime.Dockerfile ===
6
7
8
9
10
11
##################################
########## Runtime Image #########
##################################

FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime

12
13
14
# NOTE: Unlike vLLM/TRTLLM, the SGLang upstream runtime image already ships with the full CUDA
# toolkit (nvcc, nvlink, ptxas, etc.), so no selective COPY of CUDA binaries is needed here.

15
16
17
18
19
20
# cleanup unnecessary libs (python3-blinker conflicts with pip-installed blinker from Flask/dash)
RUN apt remove -y python3-apt python3-blinker && \
    pip uninstall -y termplotlib

# This ARG is still utilized for SGLANG Version extraction
ARG RUNTIME_IMAGE_TAG
21
ARG ARCH_ALT
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
WORKDIR /workspace

# Install NATS and ETCD
COPY --from=dynamo_base /usr/bin/nats-server /usr/bin/nats-server
COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/

ENV PATH=/usr/local/bin/etcd:$PATH

# Create dynamo user with group 0 for OpenShift compatibility
RUN userdel -r ubuntu > /dev/null 2>&1 || true \
    && useradd -m -s /bin/bash -g 0 dynamo \
    && [ `id -u dynamo` -eq 1000 ] \
    && mkdir -p /home/dynamo/.cache /opt/dynamo \
    # Non-recursive chown - only the directories themselves, not contents
    && chown dynamo:0 /home/dynamo /home/dynamo/.cache /opt/dynamo /workspace \
    # No chmod needed: umask 002 handles new files, COPY --chmod handles copied content
    # Set umask globally for all subsequent RUN commands (must be done as root before USER dynamo)
    # NOTE: Setting ENV UMASK=002 does NOT work - umask is a shell builtin, not an environment variable
    && mkdir -p /etc/profile.d && echo 'umask 002' > /etc/profile.d/00-umask.sh

# Cache apt downloads; sharing=locked avoids apt/dpkg races with concurrent builds.
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
    apt-get update && \
    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
        # required for verification of GPG keys
        gnupg2 \
    && apt-get clean \
    && rm -rf /var/lib/apt/lists/*

# Copy attribution files
COPY --chmod=664 --chown=dynamo:0 ATTRIBUTION* LICENSE /workspace/

54
{% if context.sglang.enable_media_ffmpeg == "true" %}
55
56
# Copy ffmpeg
RUN --mount=type=bind,from=wheel_builder,source=/usr/local/,target=/tmp/usr/local/ \
57
58
59
60
61
62
    mkdir -p /usr/local/lib/pkgconfig && \
    cp -rnL /tmp/usr/local/include/libav* /tmp/usr/local/include/libsw* /usr/local/include/ && \
    cp -nL /tmp/usr/local/lib/libav*.so /tmp/usr/local/lib/libsw*.so /usr/local/lib/ && \
    cp -nL /tmp/usr/local/lib/pkgconfig/libav*.pc /tmp/usr/local/lib/pkgconfig/libsw*.pc /usr/local/lib/pkgconfig/ && \
    cp -r /tmp/usr/local/src/ffmpeg /usr/local/src/
{% endif %}
63

64
# Copy wheels first (separate from benchmarks to avoid unnecessary cache invalidation)
65
66
67
68
COPY --chmod=775 --chown=dynamo:0 --from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/wheelhouse/
COPY --chmod=775 --chown=dynamo:0 --from=wheel_builder /opt/dynamo/dist/nixl/ /opt/dynamo/wheelhouse/nixl/
COPY --chmod=775 --chown=dynamo:0 --from=wheel_builder /workspace/nixl/build/src/bindings/python/nixl-meta/nixl-*.whl /opt/dynamo/wheelhouse/nixl/

69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# NIXL environment and native libraries
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl
ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu
ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins

# Copy UCX and NIXL native libraries to system directories
COPY --from=wheel_builder /usr/local/ucx /usr/local/ucx
COPY --chown=dynamo:0 --from=wheel_builder $NIXL_PREFIX $NIXL_PREFIX
COPY --chown=dynamo:0 --from=wheel_builder /opt/nvidia/nvda_nixl/lib64/. ${NIXL_LIB_DIR}/

ENV PATH=/usr/local/ucx/bin:$PATH

ENV LD_LIBRARY_PATH=\
$NIXL_LIB_DIR:\
$NIXL_PLUGIN_DIR:\
/usr/local/ucx/lib:\
/usr/local/ucx/lib/ucx:\
$LD_LIBRARY_PATH

88
ENV SGLANG_VERSION="${RUNTIME_IMAGE_TAG%%-*}"
89
90

{% if target not in ("dev", "local-dev") %}
91
# Install packages as root to ensure they go to system location (/usr/local/lib/python3.12/dist-packages)
92
RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \
93
94
95
96
97
    export PIP_CACHE_DIR=/root/.cache/pip && \
    pip install --break-system-packages \
        /opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \
        /opt/dynamo/wheelhouse/ai_dynamo*any.whl \
        /opt/dynamo/wheelhouse/nixl/nixl*.whl \
98
99
100
101
102
103
104
105
106
107
108
109
110
111
        sglang==${SGLANG_VERSION}
{% else %}
# Dev/local-dev: skip dynamo wheel install (users build from source via cargo build + maturin develop).
# Install NIXL wheel (pre-built C++ binary, not buildable from source) and sglang.
RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \
    export PIP_CACHE_DIR=/root/.cache/pip && \
    pip install --break-system-packages \
        /opt/dynamo/wheelhouse/nixl/nixl*.whl \
        sglang==${SGLANG_VERSION}
{% endif %}

# Install gpu_memory_service wheel if enabled (all targets)
ARG ENABLE_GPU_MEMORY_SERVICE
RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \
112
    if [ "${ENABLE_GPU_MEMORY_SERVICE}" = "true" ]; then \
113
        export PIP_CACHE_DIR=/root/.cache/pip && \
114
        GMS_WHEEL=$(ls /opt/dynamo/wheelhouse/gpu_memory_service*.whl 2>/dev/null | head -1); \
115
        if [ -n "$GMS_WHEEL" ]; then pip install --no-cache-dir --break-system-packages "$GMS_WHEEL"; fi; \
116
117
    fi

118
{% if target not in ("dev", "local-dev") %}
119
120
121
# Copy benchmarks after wheel install so benchmarks changes don't invalidate the layer above
# Pattern: COPY --chmod=775 <path>; chmod g+w <path> done later as root because COPY --chmod only affects <path>/*, not <path>
COPY --chmod=775 --chown=dynamo:0 benchmarks/ /workspace/benchmarks/
122
{% endif %}
123

124
# Install common and test dependencies as root
125
126
RUN --mount=type=bind,source=container/deps/requirements.txt,target=/tmp/deps/requirements.txt \
    --mount=type=bind,source=container/deps/requirements.test.txt,target=/tmp/deps/requirements.test.txt \
127
128
129
    --mount=type=cache,target=/root/.cache/pip,sharing=locked \
    export PIP_CACHE_DIR=/root/.cache/pip && \
    pip install --break-system-packages \
130
131
        --requirement /tmp/deps/requirements.txt \
        --requirement /tmp/deps/requirements.test.txt \
132
        sglang==${SGLANG_VERSION} && \
133
134
135
136
137
138
139
    #TODO: Temporary change until upstream sglang runtime image is updated
    pip install --break-system-packages "urllib3>=2.6.3"

{% if target not in ("dev", "local-dev") %}
# Install benchmarks and fix permissions (dev/local-dev install from bind-mounted source if needed)
RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \
    export PIP_CACHE_DIR=/root/.cache/pip && \
140
141
    cd /workspace/benchmarks && \
    pip install --break-system-packages . && \
142
    chmod -R g+w /workspace/benchmarks
143
{% endif %}
144
145
146
147

# Force-reinstall NVIDIA packages in a separate layer so requirements.txt changes don't trigger re-download
RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \
    export PIP_CACHE_DIR=/root/.cache/pip && \
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
    CUDA_MAJOR=$(nvcc --version | egrep -o 'cuda_[0-9]+' | cut -d_ -f2) && \
    if [ "$CUDA_MAJOR" = "12" ]; then \
        # Install NVIDIA packages that are needed for DeepEP to work properly
        # This is done in the upstream runtime image too, but these packages are overridden in earlier commands
        pip install --break-system-packages --force-reinstall --no-deps \
            nvidia-nccl-cu12==2.28.3 \
            nvidia-cudnn-cu12==9.16.0.29 \
            nvidia-cutlass-dsl==4.3.5; \
    elif [ "$CUDA_MAJOR" = "13" ]; then \
        # CUDA 13: Install CuDNN for PyTorch 2.9.1 compatibility
        pip install --break-system-packages --force-reinstall --no-deps \
            nvidia-nccl-cu13==2.28.3 \
            nvidia-cublas==13.1.0.3 \
            nvidia-cutlass-dsl==4.3.1 \
            nvidia-cudnn-cu13==9.16.0.29; \
    fi

# Switch back to dynamo user after package installations
USER dynamo

# Copy tests, deploy and components for CI with correct ownership
# Pattern: COPY --chmod=775 <path>; chmod g+w <path> done later as root because COPY --chmod only affects <path>/*, not <path>
COPY --chmod=775 --chown=dynamo:0 tests /workspace/tests
COPY --chmod=775 --chown=dynamo:0 examples /workspace/examples
COPY --chmod=775 --chown=dynamo:0 deploy /workspace/deploy
COPY --chmod=775 --chown=dynamo:0 components/ /workspace/components/
COPY --chmod=775 --chown=dynamo:0 recipes/ /workspace/recipes/

# Enable forceful shutdown of inflight requests
ENV SGLANG_FORCE_SHUTDOWN=1

# Setup launch banner in common directory accessible to all users
RUN --mount=type=bind,source=./container/launch_message/runtime.txt,target=/opt/dynamo/launch_message.txt \
    sed '/^#\s/d' /opt/dynamo/launch_message.txt > /opt/dynamo/.launch_screen

# Our scripting assumes /workspace is where dynamo is located
# In order to maintain the ability to have sglang and dynamo
# in the same workspace, symlink /workspace to /sgl-workspace/dynamo
USER root

# Fix directory permissions: COPY --chmod only affects contents, not the directory itself
RUN chmod 755 /opt/dynamo/.launch_screen && \
    echo 'cat /opt/dynamo/.launch_screen' >> /etc/bash.bashrc && \
    ln -s /workspace /sgl-workspace/dynamo

USER dynamo
ARG DYNAMO_COMMIT_SHA
ENV DYNAMO_COMMIT_SHA=${DYNAMO_COMMIT_SHA}

ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD []