Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
2075eb67
"ssh:/git@developer.sourcefind.cn:2222/OpenDAS/dynamo.git" did not exist on "5135c321b06888cbe2708fa0a601d62165269607"
Unverified
Commit
2075eb67
authored
Apr 03, 2026
by
Alec
Committed by
GitHub
Apr 03, 2026
Browse files
build(sglang): slim runtime image (#7850)
parent
5bd30719
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
14 additions
and
122 deletions
+14
-122
components/src/dynamo/common/multimodal/embedding_transfer.py
...onents/src/dynamo/common/multimodal/embedding_transfer.py
+3
-3
container/context.yaml
container/context.yaml
+3
-0
container/templates/args.Dockerfile
container/templates/args.Dockerfile
+1
-1
container/templates/sglang_runtime.Dockerfile
container/templates/sglang_runtime.Dockerfile
+7
-118
No files found.
components/src/dynamo/common/multimodal/embedding_transfer.py
View file @
2075eb67
...
@@ -13,7 +13,7 @@ from abc import ABC, abstractmethod
...
@@ -13,7 +13,7 @@ from abc import ABC, abstractmethod
from
queue
import
Queue
from
queue
import
Queue
from
typing
import
Any
,
Awaitable
,
List
,
Optional
from
typing
import
Any
,
Awaitable
,
List
,
Optional
import
msg
pack
import
msg
spec
import
torch
import
torch
from
nixl._api
import
nixl_agent
,
nixl_agent_config
from
nixl._api
import
nixl_agent
,
nixl_agent_config
from
pydantic
import
BaseModel
from
pydantic
import
BaseModel
...
@@ -522,7 +522,7 @@ class NixlWriteEmbeddingSender(AbstractEmbeddingSender):
...
@@ -522,7 +522,7 @@ class NixlWriteEmbeddingSender(AbstractEmbeddingSender):
(
target_buffer
,
target_byte_size
,
target_device_id
,
target_mem_str
),
(
target_buffer
,
target_byte_size
,
target_device_id
,
target_mem_str
),
write_done_id
,
write_done_id
,
remote_agent_metadata
,
remote_agent_metadata
,
)
=
msg
pack
.
unpackb
(
notif
)
)
=
msg
spec
.
msgpack
.
decode
(
notif
)
write_requests
.
append
(
write_requests
.
append
(
(
(
# receiver contact
# receiver contact
...
@@ -703,7 +703,7 @@ class NixlWriteEmbeddingReceiver(AbstractEmbeddingReceiver):
...
@@ -703,7 +703,7 @@ class NixlWriteEmbeddingReceiver(AbstractEmbeddingReceiver):
# Request for transfer
# Request for transfer
tensor_id
=
self
.
id_counter
.
get_next_id
()
tensor_id
=
self
.
id_counter
.
get_next_id
()
notif_msg
=
msg
pack
.
packb
(
notif_msg
=
msg
spec
.
msgpack
.
encode
(
(
(
nixl_request
.
tensor_id
,
nixl_request
.
tensor_id
,
(
(
...
...
container/context.yaml
View file @
2075eb67
...
@@ -66,6 +66,7 @@ vllm:
...
@@ -66,6 +66,7 @@ vllm:
flashinf_ref
:
v0.6.6
flashinf_ref
:
v0.6.6
lmcache_ref
:
0.4.2
lmcache_ref
:
0.4.2
vllm_omni_ref
:
"
v0.18.0"
vllm_omni_ref
:
"
v0.18.0"
nixl_ref
:
0.10.1
max_jobs
:
"
10"
max_jobs
:
"
10"
enable_media_ffmpeg
:
"
false"
enable_media_ffmpeg
:
"
false"
enable_gpu_memory_service
:
"
true"
enable_gpu_memory_service
:
"
true"
...
@@ -84,6 +85,7 @@ sglang:
...
@@ -84,6 +85,7 @@ sglang:
runtime_image
:
lmsysorg/sglang
runtime_image
:
lmsysorg/sglang
base_image_tag
:
25.11-cuda13.0-devel-ubuntu24.04
base_image_tag
:
25.11-cuda13.0-devel-ubuntu24.04
runtime_image_tag
:
v0.5.9-cu130-runtime
runtime_image_tag
:
v0.5.9-cu130-runtime
nixl_ref
:
0.10.0
enable_media_ffmpeg
:
"
false"
enable_media_ffmpeg
:
"
false"
enable_gpu_memory_service
:
"
true"
enable_gpu_memory_service
:
"
true"
enable_kvbm
:
"
false"
enable_kvbm
:
"
false"
...
@@ -94,6 +96,7 @@ trtllm:
...
@@ -94,6 +96,7 @@ trtllm:
runtime_image
:
nvcr.io/nvidia/cuda-dl-base
runtime_image
:
nvcr.io/nvidia/cuda-dl-base
base_image_tag
:
25.12-py3
base_image_tag
:
25.12-py3
runtime_image_tag
:
25.12-cuda13.1-runtime-ubuntu24.04
runtime_image_tag
:
25.12-cuda13.1-runtime-ubuntu24.04
nixl_ref
:
0.10.1
enable_media_ffmpeg
:
"
false"
enable_media_ffmpeg
:
"
false"
enable_gpu_memory_service
:
"
false"
enable_gpu_memory_service
:
"
false"
enable_kvbm
:
"
true"
enable_kvbm
:
"
true"
...
...
container/templates/args.Dockerfile
View file @
2075eb67
...
@@ -66,7 +66,7 @@ ARG SCCACHE_REGION=""
...
@@ -66,7 +66,7 @@ ARG SCCACHE_REGION=""
# NIXL configuration
# NIXL configuration
ARG
NIXL_UCX_REF={{ context.dynamo.nixl_ucx_ref }}
ARG
NIXL_UCX_REF={{ context.dynamo.nixl_ucx_ref }}
ARG
NIXL_REF={{ context
.dynamo
.nixl_ref }}
ARG
NIXL_REF={{ context
[framework]
.nixl_ref }}
{% if device == "cuda" %}
{% if device == "cuda" %}
ARG
NIXL_GDRCOPY_REF={{ context.dynamo.nixl_gdrcopy_ref }}
ARG
NIXL_GDRCOPY_REF={{ context.dynamo.nixl_gdrcopy_ref }}
ARG
NIXL_LIBFABRIC_REF={{ context.dynamo.nixl_libfabric_ref }}
ARG
NIXL_LIBFABRIC_REF={{ context.dynamo.nixl_libfabric_ref }}
...
...
container/templates/sglang_runtime.Dockerfile
View file @
2075eb67
...
@@ -9,16 +9,6 @@
...
@@ -9,16 +9,6 @@
FROM
${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime
FROM
${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime
# NOTE: Unlike vLLM/TRTLLM, the SGLang upstream runtime image already ships with the full CUDA
# toolkit (nvcc, nvlink, ptxas, etc.), so no selective COPY of CUDA binaries is needed here.
# cleanup unnecessary libs (python3-blinker conflicts with pip-installed blinker from Flask/dash)
RUN
apt remove
-y
python3-apt python3-blinker
&&
\
pip uninstall
-y
termplotlib
# This ARG is still utilized for SGLANG Version extraction
ARG
RUNTIME_IMAGE_TAG
ARG
TARGETARCH
WORKDIR
/workspace
WORKDIR
/workspace
# Install NATS and ETCD
# Install NATS and ETCD
...
@@ -39,18 +29,6 @@ RUN userdel -r ubuntu > /dev/null 2>&1 || true \
...
@@ -39,18 +29,6 @@ RUN userdel -r ubuntu > /dev/null 2>&1 || true \
# NOTE: Setting ENV UMASK=002 does NOT work - umask is a shell builtin, not an environment variable
# NOTE: Setting ENV UMASK=002 does NOT work - umask is a shell builtin, not an environment variable
&& mkdir -p /etc/profile.d && echo 'umask 002' > /etc/profile.d/00-umask.sh
&& mkdir -p /etc/profile.d && echo 'umask 002' > /etc/profile.d/00-umask.sh
# Cache apt downloads; sharing=locked avoids apt/dpkg races with concurrent builds.
RUN
--mount
=
type
=
cache,target
=
/var/cache/apt,sharing
=
locked
\
apt-get update
&&
\
DEBIAN_FRONTEND
=
noninteractive apt-get
install
-y
--no-install-recommends
\
# required for verification of GPG keys
gnupg2 \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
# Copy attribution files
COPY
--chmod=664 --chown=dynamo:0 ATTRIBUTION* LICENSE /workspace/
{% if context.sglang.enable_media_ffmpeg == "true" %}
{% if context.sglang.enable_media_ffmpeg == "true" %}
# Copy ffmpeg
# Copy ffmpeg
RUN
--mount
=
type
=
bind
,from
=
wheel_builder,source
=
/usr/local/,target
=
/tmp/usr/local/
\
RUN
--mount
=
type
=
bind
,from
=
wheel_builder,source
=
/usr/local/,target
=
/tmp/usr/local/
\
...
@@ -61,49 +39,16 @@ RUN --mount=type=bind,from=wheel_builder,source=/usr/local/,target=/tmp/usr/loca
...
@@ -61,49 +39,16 @@ RUN --mount=type=bind,from=wheel_builder,source=/usr/local/,target=/tmp/usr/loca
cp
-r
/tmp/usr/local/src/ffmpeg /usr/local/src/
cp
-r
/tmp/usr/local/src/ffmpeg /usr/local/src/
{% endif %}
{% endif %}
# Copy wheels first (separate from benchmarks to avoid unnecessary cache invalidation)
{% if target not in ("dev", "local-dev") %}
# Runtime target installs the prebuilt Dynamo wheels. Dev/local-dev build from
# source later in the shared dev stage after the workspace is bind-mounted.
COPY
--chmod=775 --chown=dynamo:0 --from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/wheelhouse/
COPY
--chmod=775 --chown=dynamo:0 --from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/wheelhouse/
COPY
--chmod=775 --chown=dynamo:0 --from=wheel_builder /opt/dynamo/dist/nixl/ /opt/dynamo/wheelhouse/nixl/
COPY
--chmod=775 --chown=dynamo:0 --from=wheel_builder /workspace/nixl/build/src/bindings/python/nixl-meta/nixl-*.whl /opt/dynamo/wheelhouse/nixl/
# NIXL environment and native libraries
ENV
NIXL_PREFIX=/opt/nvidia/nvda_nixl
ENV
NIXL_LIB_DIR=$NIXL_PREFIX/lib64
ENV
NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins
# Copy UCX and NIXL native libraries to system directories
COPY
--from=wheel_builder /usr/local/ucx /usr/local/ucx
COPY
--chown=dynamo:0 --from=wheel_builder $NIXL_PREFIX $NIXL_PREFIX
ENV
PATH=/usr/local/ucx/bin:$PATH
ENV
LD_LIBRARY_PATH=\
$NIXL_LIB_DIR:\
$NIXL_PLUGIN_DIR:\
/usr/local/ucx/lib:\
/usr/local/ucx/lib/ucx:\
$LD_LIBRARY_PATH
ENV
SGLANG_VERSION="${RUNTIME_IMAGE_TAG%%-*}"
{% if target not in ("dev", "local-dev") %}
# Install packages as root to ensure they go to system location (/usr/local/lib/python3.12/dist-packages)
RUN
--mount
=
type
=
cache,target
=
/root/.cache/pip,sharing
=
locked
\
RUN
--mount
=
type
=
cache,target
=
/root/.cache/pip,sharing
=
locked
\
export
PIP_CACHE_DIR
=
/root/.cache/pip
&&
\
export
PIP_CACHE_DIR
=
/root/.cache/pip
&&
\
pip
install
--break-system-packages
\
pip
install
--break-system-packages
--no-deps
\
/opt/dynamo/wheelhouse/ai_dynamo_runtime
*
.whl
\
/opt/dynamo/wheelhouse/ai_dynamo_runtime
*
.whl
\
/opt/dynamo/wheelhouse/ai_dynamo
*
any.whl
\
/opt/dynamo/wheelhouse/ai_dynamo
*
any.whl
/opt/dynamo/wheelhouse/nixl/nixl
*
.whl
\
sglang
==
${
SGLANG_VERSION
}
{% else %}
# Dev/local-dev: skip dynamo wheel install (users build from source via cargo build + maturin develop).
# Install NIXL wheel (pre-built C++ binary, not buildable from source) and sglang.
RUN
--mount
=
type
=
cache,target
=
/root/.cache/pip,sharing
=
locked
\
export
PIP_CACHE_DIR
=
/root/.cache/pip
&&
\
pip
install
--break-system-packages
\
/opt/dynamo/wheelhouse/nixl/nixl
*
.whl
\
sglang
==
${
SGLANG_VERSION
}
{% endif %}
# Install gpu_memory_service wheel if enabled (all targets)
# Install gpu_memory_service wheel if enabled (all targets)
ARG
ENABLE_GPU_MEMORY_SERVICE
ARG
ENABLE_GPU_MEMORY_SERVICE
...
@@ -113,60 +58,9 @@ RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \
...
@@ -113,60 +58,9 @@ RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \
GMS_WHEEL
=
$(
ls
/opt/dynamo/wheelhouse/gpu_memory_service
*
.whl 2>/dev/null |
head
-1
)
;
\
GMS_WHEEL
=
$(
ls
/opt/dynamo/wheelhouse/gpu_memory_service
*
.whl 2>/dev/null |
head
-1
)
;
\
if
[
-n
"
$GMS_WHEEL
"
]
;
then
pip
install
--no-cache-dir
--break-system-packages
"
$GMS_WHEEL
"
;
fi
;
\
if
[
-n
"
$GMS_WHEEL
"
]
;
then
pip
install
--no-cache-dir
--break-system-packages
"
$GMS_WHEEL
"
;
fi
;
\
fi
fi
{% if target not in ("dev", "local-dev") %}
# Copy benchmarks after wheel install so benchmarks changes don't invalidate the layer above
# Pattern: COPY --chmod=775 <path>; chmod g+w <path> done later as root because COPY --chmod only affects <path>/*, not <path>
COPY
--chmod=775 --chown=dynamo:0 benchmarks/ /workspace/benchmarks/
{% endif %}
# Install runtime dependencies (common + benchmarks) as root.
# Test and dev dependencies are NOT installed here — they go in the test and dev images.
RUN
--mount
=
type
=
bind
,source
=
container/deps/requirements.common.txt,target
=
/tmp/deps/requirements.common.txt
\
--mount
=
type
=
bind
,source
=
container/deps/requirements.benchmark.txt,target
=
/tmp/deps/requirements.benchmark.txt
\
--mount
=
type
=
cache,target
=
/root/.cache/pip,sharing
=
locked
\
export
PIP_CACHE_DIR
=
/root/.cache/pip
&&
\
pip
install
--break-system-packages
\
--requirement
/tmp/deps/requirements.common.txt
\
--requirement
/tmp/deps/requirements.benchmark.txt
\
sglang
==
${
SGLANG_VERSION
}
&&
\
#TODO: Temporary change until upstream sglang runtime image is updated
pip install --break-system-packages "urllib3>=2.6.3"
{% if target not in ("dev", "local-dev") %}
# Install benchmarks and fix permissions (dev/local-dev install from bind-mounted source if needed)
RUN
--mount
=
type
=
cache,target
=
/root/.cache/pip,sharing
=
locked
\
export
PIP_CACHE_DIR
=
/root/.cache/pip
&&
\
cd
/workspace/benchmarks
&&
\
pip
install
--break-system-packages
.
&&
\
chmod
-R
g+w /workspace/benchmarks
{% endif %}
{% endif %}
# Force-reinstall NVIDIA packages in a separate layer so requirements changes don't trigger re-download
# Copy tests, deploy and components for CI with correct ownership
RUN
--mount
=
type
=
cache,target
=
/root/.cache/pip,sharing
=
locked
\
export
PIP_CACHE_DIR
=
/root/.cache/pip
&&
\
CUDA_MAJOR
=
$(
nvcc
--version
| egrep
-o
'cuda_[0-9]+'
|
cut
-d_
-f2
)
&&
\
if
[
"
$CUDA_MAJOR
"
=
"12"
]
;
then
\
# Install NVIDIA packages that are needed for DeepEP to work properly
# This is done in the upstream runtime image too, but these packages are overridden in earlier commands
pip install --break-system-packages --force-reinstall --no-deps \
nvidia-nccl-cu12==2.28.3 \
nvidia-cudnn-cu12==9.16.0.29 \
nvidia-cutlass-dsl==4.3.5; \
elif [ "$CUDA_MAJOR" = "13" ]; then \
# CUDA 13: Install CuDNN for PyTorch 2.9.1 compatibility
pip install --break-system-packages --force-reinstall --no-deps \
nvidia-nccl-cu13==2.28.3 \
nvidia-cublas==13.1.0.3 \
nvidia-cutlass-dsl==4.3.1 \
nvidia-cudnn-cu13==9.16.0.29; \
fi
# Switch back to dynamo user after package installations
USER
dynamo
# Copy tests, deploy, and the sglang/common/mocker component subtrees for CI.
# Pattern: COPY --chmod=775 <path>; chmod g+w <path> done later as root because COPY --chmod only affects <path>/*, not <path>
COPY
--chmod=775 --chown=dynamo:0 tests /workspace/tests
COPY
--chmod=775 --chown=dynamo:0 tests /workspace/tests
COPY
--chmod=775 --chown=dynamo:0 examples /workspace/examples
COPY
--chmod=775 --chown=dynamo:0 examples /workspace/examples
COPY
--chmod=775 --chown=dynamo:0 deploy /workspace/deploy
COPY
--chmod=775 --chown=dynamo:0 deploy /workspace/deploy
...
@@ -174,6 +68,7 @@ COPY --chmod=775 --chown=dynamo:0 components/src/dynamo/common /workspace/compon
...
@@ -174,6 +68,7 @@ COPY --chmod=775 --chown=dynamo:0 components/src/dynamo/common /workspace/compon
COPY
--chmod=775 --chown=dynamo:0 components/src/dynamo/sglang /workspace/components/src/dynamo/sglang
COPY
--chmod=775 --chown=dynamo:0 components/src/dynamo/sglang /workspace/components/src/dynamo/sglang
COPY
--chmod=775 --chown=dynamo:0 components/src/dynamo/mocker /workspace/components/src/dynamo/mocker
COPY
--chmod=775 --chown=dynamo:0 components/src/dynamo/mocker /workspace/components/src/dynamo/mocker
COPY
--chmod=775 --chown=dynamo:0 recipes/ /workspace/recipes/
COPY
--chmod=775 --chown=dynamo:0 recipes/ /workspace/recipes/
COPY
--chmod=664 --chown=dynamo:0 ATTRIBUTION* LICENSE /workspace/
# Enable forceful shutdown of inflight requests
# Enable forceful shutdown of inflight requests
ENV
SGLANG_FORCE_SHUTDOWN=1
ENV
SGLANG_FORCE_SHUTDOWN=1
...
@@ -182,12 +77,6 @@ ENV SGLANG_FORCE_SHUTDOWN=1
...
@@ -182,12 +77,6 @@ ENV SGLANG_FORCE_SHUTDOWN=1
RUN
--mount
=
type
=
bind
,source
=
./container/launch_message/runtime.txt,target
=
/opt/dynamo/launch_message.txt
\
RUN
--mount
=
type
=
bind
,source
=
./container/launch_message/runtime.txt,target
=
/opt/dynamo/launch_message.txt
\
sed
'/^#\s/d'
/opt/dynamo/launch_message.txt
>
/opt/dynamo/.launch_screen
sed
'/^#\s/d'
/opt/dynamo/launch_message.txt
>
/opt/dynamo/.launch_screen
# Our scripting assumes /workspace is where dynamo is located
# In order to maintain the ability to have sglang and dynamo
# in the same workspace, symlink /workspace to /sgl-workspace/dynamo
USER
root
# Fix directory permissions: COPY --chmod only affects contents, not the directory itself
RUN
chmod
755 /opt/dynamo/.launch_screen
&&
\
RUN
chmod
755 /opt/dynamo/.launch_screen
&&
\
echo
'cat /opt/dynamo/.launch_screen'
>>
/etc/bash.bashrc
&&
\
echo
'cat /opt/dynamo/.launch_screen'
>>
/etc/bash.bashrc
&&
\
ln
-s
/workspace /sgl-workspace/dynamo
ln
-s
/workspace /sgl-workspace/dynamo
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment