Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
2075eb67
Unverified
Commit
2075eb67
authored
Apr 03, 2026
by
Alec
Committed by
GitHub
Apr 03, 2026
Browse files
build(sglang): slim runtime image (#7850)
parent
5bd30719
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
14 additions
and
122 deletions
+14
-122
components/src/dynamo/common/multimodal/embedding_transfer.py
...onents/src/dynamo/common/multimodal/embedding_transfer.py
+3
-3
container/context.yaml
container/context.yaml
+3
-0
container/templates/args.Dockerfile
container/templates/args.Dockerfile
+1
-1
container/templates/sglang_runtime.Dockerfile
container/templates/sglang_runtime.Dockerfile
+7
-118
No files found.
components/src/dynamo/common/multimodal/embedding_transfer.py
View file @
2075eb67
...
@@ -13,7 +13,7 @@ from abc import ABC, abstractmethod
...
@@ -13,7 +13,7 @@ from abc import ABC, abstractmethod
from
queue
import
Queue
from
queue
import
Queue
from
typing
import
Any
,
Awaitable
,
List
,
Optional
from
typing
import
Any
,
Awaitable
,
List
,
Optional
import
msg
pack
import
msg
spec
import
torch
import
torch
from
nixl._api
import
nixl_agent
,
nixl_agent_config
from
nixl._api
import
nixl_agent
,
nixl_agent_config
from
pydantic
import
BaseModel
from
pydantic
import
BaseModel
...
@@ -522,7 +522,7 @@ class NixlWriteEmbeddingSender(AbstractEmbeddingSender):
...
@@ -522,7 +522,7 @@ class NixlWriteEmbeddingSender(AbstractEmbeddingSender):
(
target_buffer
,
target_byte_size
,
target_device_id
,
target_mem_str
),
(
target_buffer
,
target_byte_size
,
target_device_id
,
target_mem_str
),
write_done_id
,
write_done_id
,
remote_agent_metadata
,
remote_agent_metadata
,
)
=
msg
pack
.
unpackb
(
notif
)
)
=
msg
spec
.
msgpack
.
decode
(
notif
)
write_requests
.
append
(
write_requests
.
append
(
(
(
# receiver contact
# receiver contact
...
@@ -703,7 +703,7 @@ class NixlWriteEmbeddingReceiver(AbstractEmbeddingReceiver):
...
@@ -703,7 +703,7 @@ class NixlWriteEmbeddingReceiver(AbstractEmbeddingReceiver):
# Request for transfer
# Request for transfer
tensor_id
=
self
.
id_counter
.
get_next_id
()
tensor_id
=
self
.
id_counter
.
get_next_id
()
notif_msg
=
msg
pack
.
packb
(
notif_msg
=
msg
spec
.
msgpack
.
encode
(
(
(
nixl_request
.
tensor_id
,
nixl_request
.
tensor_id
,
(
(
...
...
container/context.yaml
View file @
2075eb67
...
@@ -66,6 +66,7 @@ vllm:
...
@@ -66,6 +66,7 @@ vllm:
flashinf_ref
:
v0.6.6
flashinf_ref
:
v0.6.6
lmcache_ref
:
0.4.2
lmcache_ref
:
0.4.2
vllm_omni_ref
:
"
v0.18.0"
vllm_omni_ref
:
"
v0.18.0"
nixl_ref
:
0.10.1
max_jobs
:
"
10"
max_jobs
:
"
10"
enable_media_ffmpeg
:
"
false"
enable_media_ffmpeg
:
"
false"
enable_gpu_memory_service
:
"
true"
enable_gpu_memory_service
:
"
true"
...
@@ -84,6 +85,7 @@ sglang:
...
@@ -84,6 +85,7 @@ sglang:
runtime_image
:
lmsysorg/sglang
runtime_image
:
lmsysorg/sglang
base_image_tag
:
25.11-cuda13.0-devel-ubuntu24.04
base_image_tag
:
25.11-cuda13.0-devel-ubuntu24.04
runtime_image_tag
:
v0.5.9-cu130-runtime
runtime_image_tag
:
v0.5.9-cu130-runtime
nixl_ref
:
0.10.0
enable_media_ffmpeg
:
"
false"
enable_media_ffmpeg
:
"
false"
enable_gpu_memory_service
:
"
true"
enable_gpu_memory_service
:
"
true"
enable_kvbm
:
"
false"
enable_kvbm
:
"
false"
...
@@ -94,6 +96,7 @@ trtllm:
...
@@ -94,6 +96,7 @@ trtllm:
runtime_image
:
nvcr.io/nvidia/cuda-dl-base
runtime_image
:
nvcr.io/nvidia/cuda-dl-base
base_image_tag
:
25.12-py3
base_image_tag
:
25.12-py3
runtime_image_tag
:
25.12-cuda13.1-runtime-ubuntu24.04
runtime_image_tag
:
25.12-cuda13.1-runtime-ubuntu24.04
nixl_ref
:
0.10.1
enable_media_ffmpeg
:
"
false"
enable_media_ffmpeg
:
"
false"
enable_gpu_memory_service
:
"
false"
enable_gpu_memory_service
:
"
false"
enable_kvbm
:
"
true"
enable_kvbm
:
"
true"
...
...
container/templates/args.Dockerfile
View file @
2075eb67
...
@@ -66,7 +66,7 @@ ARG SCCACHE_REGION=""
...
@@ -66,7 +66,7 @@ ARG SCCACHE_REGION=""
# NIXL configuration
# NIXL configuration
ARG
NIXL_UCX_REF={{ context.dynamo.nixl_ucx_ref }}
ARG
NIXL_UCX_REF={{ context.dynamo.nixl_ucx_ref }}
ARG
NIXL_REF={{ context
.dynamo
.nixl_ref }}
ARG
NIXL_REF={{ context
[framework]
.nixl_ref }}
{% if device == "cuda" %}
{% if device == "cuda" %}
ARG
NIXL_GDRCOPY_REF={{ context.dynamo.nixl_gdrcopy_ref }}
ARG
NIXL_GDRCOPY_REF={{ context.dynamo.nixl_gdrcopy_ref }}
ARG
NIXL_LIBFABRIC_REF={{ context.dynamo.nixl_libfabric_ref }}
ARG
NIXL_LIBFABRIC_REF={{ context.dynamo.nixl_libfabric_ref }}
...
...
container/templates/sglang_runtime.Dockerfile
View file @
2075eb67
...
@@ -9,16 +9,6 @@
...
@@ -9,16 +9,6 @@
FROM
${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime
FROM
${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime
# NOTE: Unlike vLLM/TRTLLM, the SGLang upstream runtime image already ships with the full CUDA
# toolkit (nvcc, nvlink, ptxas, etc.), so no selective COPY of CUDA binaries is needed here.
# cleanup unnecessary libs (python3-blinker conflicts with pip-installed blinker from Flask/dash)
RUN
apt remove
-y
python3-apt python3-blinker
&&
\
pip uninstall
-y
termplotlib
# This ARG is still utilized for SGLANG Version extraction
ARG
RUNTIME_IMAGE_TAG
ARG
TARGETARCH
WORKDIR
/workspace
WORKDIR
/workspace
# Install NATS and ETCD
# Install NATS and ETCD
...
@@ -39,18 +29,6 @@ RUN userdel -r ubuntu > /dev/null 2>&1 || true \
...
@@ -39,18 +29,6 @@ RUN userdel -r ubuntu > /dev/null 2>&1 || true \
# NOTE: Setting ENV UMASK=002 does NOT work - umask is a shell builtin, not an environment variable
# NOTE: Setting ENV UMASK=002 does NOT work - umask is a shell builtin, not an environment variable
&& mkdir -p /etc/profile.d && echo 'umask 002' > /etc/profile.d/00-umask.sh
&& mkdir -p /etc/profile.d && echo 'umask 002' > /etc/profile.d/00-umask.sh
# Cache apt downloads; sharing=locked avoids apt/dpkg races with concurrent builds.
RUN
--mount
=
type
=
cache,target
=
/var/cache/apt,sharing
=
locked
\
apt-get update
&&
\
DEBIAN_FRONTEND
=
noninteractive apt-get
install
-y
--no-install-recommends
\
# required for verification of GPG keys
gnupg2 \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
# Copy attribution files
COPY
--chmod=664 --chown=dynamo:0 ATTRIBUTION* LICENSE /workspace/
{% if context.sglang.enable_media_ffmpeg == "true" %}
{% if context.sglang.enable_media_ffmpeg == "true" %}
# Copy ffmpeg
# Copy ffmpeg
RUN
--mount
=
type
=
bind
,from
=
wheel_builder,source
=
/usr/local/,target
=
/tmp/usr/local/
\
RUN
--mount
=
type
=
bind
,from
=
wheel_builder,source
=
/usr/local/,target
=
/tmp/usr/local/
\
...
@@ -61,49 +39,16 @@ RUN --mount=type=bind,from=wheel_builder,source=/usr/local/,target=/tmp/usr/loca
...
@@ -61,49 +39,16 @@ RUN --mount=type=bind,from=wheel_builder,source=/usr/local/,target=/tmp/usr/loca
cp
-r
/tmp/usr/local/src/ffmpeg /usr/local/src/
cp
-r
/tmp/usr/local/src/ffmpeg /usr/local/src/
{% endif %}
{% endif %}
# Copy wheels first (separate from benchmarks to avoid unnecessary cache invalidation)
{% if target not in ("dev", "local-dev") %}
# Runtime target installs the prebuilt Dynamo wheels. Dev/local-dev build from
# source later in the shared dev stage after the workspace is bind-mounted.
COPY
--chmod=775 --chown=dynamo:0 --from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/wheelhouse/
COPY
--chmod=775 --chown=dynamo:0 --from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/wheelhouse/
COPY
--chmod=775 --chown=dynamo:0 --from=wheel_builder /opt/dynamo/dist/nixl/ /opt/dynamo/wheelhouse/nixl/
COPY
--chmod=775 --chown=dynamo:0 --from=wheel_builder /workspace/nixl/build/src/bindings/python/nixl-meta/nixl-*.whl /opt/dynamo/wheelhouse/nixl/
# NIXL environment and native libraries
ENV
NIXL_PREFIX=/opt/nvidia/nvda_nixl
ENV
NIXL_LIB_DIR=$NIXL_PREFIX/lib64
ENV
NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins
# Copy UCX and NIXL native libraries to system directories
COPY
--from=wheel_builder /usr/local/ucx /usr/local/ucx
COPY
--chown=dynamo:0 --from=wheel_builder $NIXL_PREFIX $NIXL_PREFIX
ENV
PATH=/usr/local/ucx/bin:$PATH
ENV
LD_LIBRARY_PATH=\
$NIXL_LIB_DIR:\
$NIXL_PLUGIN_DIR:\
/usr/local/ucx/lib:\
/usr/local/ucx/lib/ucx:\
$LD_LIBRARY_PATH
ENV
SGLANG_VERSION="${RUNTIME_IMAGE_TAG%%-*}"
{% if target not in ("dev", "local-dev") %}
# Install packages as root to ensure they go to system location (/usr/local/lib/python3.12/dist-packages)
RUN
--mount
=
type
=
cache,target
=
/root/.cache/pip,sharing
=
locked
\
RUN
--mount
=
type
=
cache,target
=
/root/.cache/pip,sharing
=
locked
\
export
PIP_CACHE_DIR
=
/root/.cache/pip
&&
\
export
PIP_CACHE_DIR
=
/root/.cache/pip
&&
\
pip
install
--break-system-packages
\
pip
install
--break-system-packages
--no-deps
\
/opt/dynamo/wheelhouse/ai_dynamo_runtime
*
.whl
\
/opt/dynamo/wheelhouse/ai_dynamo_runtime
*
.whl
\
/opt/dynamo/wheelhouse/ai_dynamo
*
any.whl
\
/opt/dynamo/wheelhouse/ai_dynamo
*
any.whl
/opt/dynamo/wheelhouse/nixl/nixl
*
.whl
\
sglang
==
${
SGLANG_VERSION
}
{% else %}
# Dev/local-dev: skip dynamo wheel install (users build from source via cargo build + maturin develop).
# Install NIXL wheel (pre-built C++ binary, not buildable from source) and sglang.
RUN
--mount
=
type
=
cache,target
=
/root/.cache/pip,sharing
=
locked
\
export
PIP_CACHE_DIR
=
/root/.cache/pip
&&
\
pip
install
--break-system-packages
\
/opt/dynamo/wheelhouse/nixl/nixl
*
.whl
\
sglang
==
${
SGLANG_VERSION
}
{% endif %}
# Install gpu_memory_service wheel if enabled (all targets)
# Install gpu_memory_service wheel if enabled (all targets)
ARG
ENABLE_GPU_MEMORY_SERVICE
ARG
ENABLE_GPU_MEMORY_SERVICE
...
@@ -113,60 +58,9 @@ RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \
...
@@ -113,60 +58,9 @@ RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \
GMS_WHEEL
=
$(
ls
/opt/dynamo/wheelhouse/gpu_memory_service
*
.whl 2>/dev/null |
head
-1
)
;
\
GMS_WHEEL
=
$(
ls
/opt/dynamo/wheelhouse/gpu_memory_service
*
.whl 2>/dev/null |
head
-1
)
;
\
if
[
-n
"
$GMS_WHEEL
"
]
;
then
pip
install
--no-cache-dir
--break-system-packages
"
$GMS_WHEEL
"
;
fi
;
\
if
[
-n
"
$GMS_WHEEL
"
]
;
then
pip
install
--no-cache-dir
--break-system-packages
"
$GMS_WHEEL
"
;
fi
;
\
fi
fi
{% if target not in ("dev", "local-dev") %}
# Copy benchmarks after wheel install so benchmarks changes don't invalidate the layer above
# Pattern: COPY --chmod=775 <path>; chmod g+w <path> done later as root because COPY --chmod only affects <path>/*, not <path>
COPY
--chmod=775 --chown=dynamo:0 benchmarks/ /workspace/benchmarks/
{% endif %}
# Install runtime dependencies (common + benchmarks) as root.
# Test and dev dependencies are NOT installed here — they go in the test and dev images.
RUN
--mount
=
type
=
bind
,source
=
container/deps/requirements.common.txt,target
=
/tmp/deps/requirements.common.txt
\
--mount
=
type
=
bind
,source
=
container/deps/requirements.benchmark.txt,target
=
/tmp/deps/requirements.benchmark.txt
\
--mount
=
type
=
cache,target
=
/root/.cache/pip,sharing
=
locked
\
export
PIP_CACHE_DIR
=
/root/.cache/pip
&&
\
pip
install
--break-system-packages
\
--requirement
/tmp/deps/requirements.common.txt
\
--requirement
/tmp/deps/requirements.benchmark.txt
\
sglang
==
${
SGLANG_VERSION
}
&&
\
#TODO: Temporary change until upstream sglang runtime image is updated
pip install --break-system-packages "urllib3>=2.6.3"
{% if target not in ("dev", "local-dev") %}
# Install benchmarks and fix permissions (dev/local-dev install from bind-mounted source if needed)
RUN
--mount
=
type
=
cache,target
=
/root/.cache/pip,sharing
=
locked
\
export
PIP_CACHE_DIR
=
/root/.cache/pip
&&
\
cd
/workspace/benchmarks
&&
\
pip
install
--break-system-packages
.
&&
\
chmod
-R
g+w /workspace/benchmarks
{% endif %}
{% endif %}
# Force-reinstall NVIDIA packages in a separate layer so requirements changes don't trigger re-download
# Copy tests, deploy and components for CI with correct ownership
RUN
--mount
=
type
=
cache,target
=
/root/.cache/pip,sharing
=
locked
\
export
PIP_CACHE_DIR
=
/root/.cache/pip
&&
\
CUDA_MAJOR
=
$(
nvcc
--version
| egrep
-o
'cuda_[0-9]+'
|
cut
-d_
-f2
)
&&
\
if
[
"
$CUDA_MAJOR
"
=
"12"
]
;
then
\
# Install NVIDIA packages that are needed for DeepEP to work properly
# This is done in the upstream runtime image too, but these packages are overridden in earlier commands
pip install --break-system-packages --force-reinstall --no-deps \
nvidia-nccl-cu12==2.28.3 \
nvidia-cudnn-cu12==9.16.0.29 \
nvidia-cutlass-dsl==4.3.5; \
elif [ "$CUDA_MAJOR" = "13" ]; then \
# CUDA 13: Install CuDNN for PyTorch 2.9.1 compatibility
pip install --break-system-packages --force-reinstall --no-deps \
nvidia-nccl-cu13==2.28.3 \
nvidia-cublas==13.1.0.3 \
nvidia-cutlass-dsl==4.3.1 \
nvidia-cudnn-cu13==9.16.0.29; \
fi
# Switch back to dynamo user after package installations
USER
dynamo
# Copy tests, deploy, and the sglang/common/mocker component subtrees for CI.
# Pattern: COPY --chmod=775 <path>; chmod g+w <path> done later as root because COPY --chmod only affects <path>/*, not <path>
COPY
--chmod=775 --chown=dynamo:0 tests /workspace/tests
COPY
--chmod=775 --chown=dynamo:0 tests /workspace/tests
COPY
--chmod=775 --chown=dynamo:0 examples /workspace/examples
COPY
--chmod=775 --chown=dynamo:0 examples /workspace/examples
COPY
--chmod=775 --chown=dynamo:0 deploy /workspace/deploy
COPY
--chmod=775 --chown=dynamo:0 deploy /workspace/deploy
...
@@ -174,6 +68,7 @@ COPY --chmod=775 --chown=dynamo:0 components/src/dynamo/common /workspace/compon
...
@@ -174,6 +68,7 @@ COPY --chmod=775 --chown=dynamo:0 components/src/dynamo/common /workspace/compon
COPY
--chmod=775 --chown=dynamo:0 components/src/dynamo/sglang /workspace/components/src/dynamo/sglang
COPY
--chmod=775 --chown=dynamo:0 components/src/dynamo/sglang /workspace/components/src/dynamo/sglang
COPY
--chmod=775 --chown=dynamo:0 components/src/dynamo/mocker /workspace/components/src/dynamo/mocker
COPY
--chmod=775 --chown=dynamo:0 components/src/dynamo/mocker /workspace/components/src/dynamo/mocker
COPY
--chmod=775 --chown=dynamo:0 recipes/ /workspace/recipes/
COPY
--chmod=775 --chown=dynamo:0 recipes/ /workspace/recipes/
COPY
--chmod=664 --chown=dynamo:0 ATTRIBUTION* LICENSE /workspace/
# Enable forceful shutdown of inflight requests
# Enable forceful shutdown of inflight requests
ENV
SGLANG_FORCE_SHUTDOWN=1
ENV
SGLANG_FORCE_SHUTDOWN=1
...
@@ -182,12 +77,6 @@ ENV SGLANG_FORCE_SHUTDOWN=1
...
@@ -182,12 +77,6 @@ ENV SGLANG_FORCE_SHUTDOWN=1
RUN
--mount
=
type
=
bind
,source
=
./container/launch_message/runtime.txt,target
=
/opt/dynamo/launch_message.txt
\
RUN
--mount
=
type
=
bind
,source
=
./container/launch_message/runtime.txt,target
=
/opt/dynamo/launch_message.txt
\
sed
'/^#\s/d'
/opt/dynamo/launch_message.txt
>
/opt/dynamo/.launch_screen
sed
'/^#\s/d'
/opt/dynamo/launch_message.txt
>
/opt/dynamo/.launch_screen
# Our scripting assumes /workspace is where dynamo is located
# In order to maintain the ability to have sglang and dynamo
# in the same workspace, symlink /workspace to /sgl-workspace/dynamo
USER
root
# Fix directory permissions: COPY --chmod only affects contents, not the directory itself
RUN
chmod
755 /opt/dynamo/.launch_screen
&&
\
RUN
chmod
755 /opt/dynamo/.launch_screen
&&
\
echo
'cat /opt/dynamo/.launch_screen'
>>
/etc/bash.bashrc
&&
\
echo
'cat /opt/dynamo/.launch_screen'
>>
/etc/bash.bashrc
&&
\
ln
-s
/workspace /sgl-workspace/dynamo
ln
-s
/workspace /sgl-workspace/dynamo
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment