Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
e041ccfc
Unverified
Commit
e041ccfc
authored
Apr 22, 2026
by
Alec
Committed by
GitHub
Apr 22, 2026
Browse files
ci: remove unused SGLang NIXL ref (#8472)
parent
36b4208e
Changes
7
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
88 additions
and
29 deletions
+88
-29
components/src/dynamo/frontend/sglang_prepost.py
components/src/dynamo/frontend/sglang_prepost.py
+17
-0
components/src/dynamo/frontend/tests/test_sglang_tool_calls.py
...nents/src/dynamo/frontend/tests/test_sglang_tool_calls.py
+44
-0
container/context.yaml
container/context.yaml
+3
-1
container/templates/args.Dockerfile
container/templates/args.Dockerfile
+2
-0
container/templates/dev.Dockerfile
container/templates/dev.Dockerfile
+9
-26
container/templates/sglang_runtime.Dockerfile
container/templates/sglang_runtime.Dockerfile
+4
-2
container/templates/wheel_builder.Dockerfile
container/templates/wheel_builder.Dockerfile
+9
-0
No files found.
components/src/dynamo/frontend/sglang_prepost.py
View file @
e041ccfc
...
...
@@ -675,6 +675,23 @@ class SglangStreamingPostProcessor:
if
tc
.
parameters
:
self
.
_tool_call_args
[
seq_idx
]
=
[
tc
.
parameters
]
# Do not emit partial tool calls. A streaming parser can detect a
# tool name before the model finishes malformed JSON; if the
# finish-time re-parse cannot recover valid arguments, treat the
# response as plain text instead of surfacing name + empty args.
dropped_names
=
[]
for
idx
in
list
(
self
.
_tool_call_names
):
if
not
""
.
join
(
self
.
_tool_call_args
.
get
(
idx
,
[])):
dropped_names
.
append
(
self
.
_tool_call_names
[
idx
])
del
self
.
_tool_call_names
[
idx
]
self
.
_tool_call_ids
.
pop
(
idx
,
None
)
self
.
_tool_call_args
.
pop
(
idx
,
None
)
if
dropped_names
:
logger
.
warning
(
"Dropping incomplete SGLang tool calls with no valid arguments: %s"
,
dropped_names
,
)
if
finish_reason
and
self
.
_tool_call_names
:
tool_calls_out
:
list
[
dict
[
str
,
Any
]]
=
[]
for
idx
in
sorted
(
self
.
_tool_call_names
):
...
...
components/src/dynamo/frontend/tests/test_sglang_tool_calls.py
View file @
e041ccfc
...
...
@@ -467,6 +467,50 @@ class TestSingleChunkFallback:
assert
choice
[
"finish_reason"
]
==
"tool_calls"
class
TestMalformedToolCalls
:
def
test_incomplete_arguments_are_not_emitted
(
self
):
class
DummyTokenizer
:
def
decode
(
self
,
token_ids
,
skip_special_tokens
=
True
):
return
""
.
join
(
chr
(
x
)
for
x
in
token_ids
)
class
DummyToolCall
:
def
__init__
(
self
,
tool_index
,
name
,
parameters
):
self
.
tool_index
=
tool_index
self
.
name
=
name
self
.
parameters
=
parameters
class
DummyParser
:
def
parse_stream_chunk
(
self
,
text
):
return
""
,
[
DummyToolCall
(
0
,
"get_weather"
,
'{"city": "Paris"'
)]
def
has_tool_call
(
self
,
text
):
return
"<tool_call>"
in
text
def
parse_non_stream
(
self
,
text
):
return
""
,
[]
post
=
SglangStreamingPostProcessor
(
tokenizer
=
DummyTokenizer
(),
tool_call_parser
=
DummyParser
(),
reasoning_parser
=
None
,
)
malformed
=
(
'<tool_call>
\n
{"name": "get_weather", '
'"arguments": {"city": "Paris"}
\n
</tool_call>'
)
choice
=
post
.
process_output
(
{
"token_ids"
:
[
ord
(
c
)
for
c
in
malformed
],
"finish_reason"
:
"stop"
,
}
)
assert
choice
is
not
None
assert
choice
[
"finish_reason"
]
==
"stop"
assert
choice
.
get
(
"delta"
,
{}).
get
(
"tool_calls"
,
[])
==
[]
# ---------------------------------------------------------------------------
# JsonArrayParser path (tool_choice="required" / named function)
# ---------------------------------------------------------------------------
...
...
container/context.yaml
View file @
e041ccfc
...
...
@@ -85,7 +85,9 @@ sglang:
runtime_image
:
lmsysorg/sglang
base_image_tag
:
25.11-cuda13.0-devel-ubuntu24.04
runtime_image_tag
:
v0.5.10.post1-cu130-runtime
nixl_ref
:
v1.0.1
# SGLang uses the NIXL stack from the upstream lmsysorg/sglang runtime image.
# Do not add nixl_ref here: Dynamo does not build or install its NIXL wheel
# for SGLang, and SGLang does not use Dynamo KVBM/block-manager at runtime.
enable_media_ffmpeg
:
"
true"
enable_gpu_memory_service
:
"
true"
enable_kvbm
:
"
false"
...
...
container/templates/args.Dockerfile
View file @
e041ccfc
...
...
@@ -66,7 +66,9 @@ ARG SCCACHE_REGION=""
# NIXL configuration
ARG
NIXL_UCX_REF={{ context.dynamo.nixl_ucx_ref }}
{% if "nixl_ref" in context[framework] -%}
ARG
NIXL_REF={{ context[framework].nixl_ref }}
{% endif -%}
{% if device == "cuda" %}
ARG
NIXL_GDRCOPY_REF={{ context.dynamo.nixl_gdrcopy_ref }}
ARG
NIXL_LIBFABRIC_REF={{ context.dynamo.nixl_libfabric_ref }}
...
...
container/templates/dev.Dockerfile
View file @
e041ccfc
...
...
@@ -187,39 +187,19 @@ RUN if [ ! -e /usr/bin/python3 ]; then \
fi
;
\
fi
# Copy UCX and NIXL libraries for dev stage compilation.
# The upstream SGLang runtime image doesn't include NIXL, but cargo build needs to link against
# -lnixl, -lnixl_build, and -lnixl_common. Runtime stage doesn't need this since it uses pre-built
# wheels, but dev stage needs it for maturin develop and cargo build from source.
# - SGLang: Copy NIXL/UCX/libfabric/gdrcopy binaries from wheel_builder (not in upstream lmsysorg/sglang runtime).
# - vllm/trtllm/none: NIXL/UCX are already present in runtime (no-op).
ARG
TARGETARCH
RUN
--mount
=
from
=
wheel_builder,target
=
/wheel_builder
\
if
[
"
${
FRAMEWORK
}
"
=
"sglang"
]
;
then
\
if
[
-d
/wheel_builder/usr/local/ucx
]
&&
[
-d
/wheel_builder/opt/nvidia/nvda_nixl
]
;
then
\
mkdir
-p
/opt/nvidia /usr/include /usr/lib64 /etc/ld.so.conf.d
;
\
cp
-r
/wheel_builder/opt/nvidia/nvda_nixl /opt/nvidia/
;
\
cp
-r
/wheel_builder/usr/local/ucx /usr/local/
;
\
cp
-r
/wheel_builder/usr/local/libfabric /usr/local/
;
\
cp
/wheel_builder/usr/include/gdrapi.h /usr/include/
;
\
cp
/wheel_builder/usr/lib64/libgdrapi.so
*
/usr/lib64/
;
\
echo
"/usr/lib64"
>>
/etc/ld.so.conf.d/gdrcopy.conf
;
\
fi
;
\
fi
{% if device == "xpu" %}
ENV
NIXL_LIB_DIR=/opt/intel/intel_nixl/lib/x86_64-linux-gnu \
NIXL_PLUGIN_DIR=/opt/intel/intel_nixl/lib/x86_64-linux-gnu/plugins \
NIXL_PREFIX=/opt/intel/intel_nixl
{% else %}
# NIXL is installed under lib64 (manylinux/AlmaLinux convention used by the wheel_builder).
# All frameworks reference NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib64.
# For vllm/trtllm/none: This resets the same values already set in runtime (no harm).
# For sglang: This sets them for the first time (required).
{% elif framework != "sglang" %}
# Non-SGLang runtimes use the Dynamo-built NIXL install from wheel_builder.
# Reset the same values already set in runtime (no harm).
ENV
NIXL_PREFIX=/opt/nvidia/nvda_nixl \
NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib64 \
NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib64/plugins
{% endif %}
{% if device != "xpu" %}
# Set universal CUDA development environment variables (all frameworks)
# vLLM: Dockerfile.vllm line 533, 597
# TRT-LLM: Dockerfile.trtllm lines 600-606
...
...
@@ -235,15 +215,18 @@ ENV CUDA_HOME=/usr/local/cuda \
NVIDIA_DRIVER_CAPABILITIES=video,compute,utility
{% endif %}
{% if framework != "sglang" %}
# Base LD_LIBRARY_PATH with universal paths (all frameworks have these)
# Framework-specific paths are conditionally added in /etc/profile.d/50-framework-paths.sh
ARG
PYTHON_VERSION
ENV
LD_LIBRARY_PATH=\
${NIXL_LIB_DIR}:\
${NIXL_PLUGIN_DIR}:\
/usr/local/ucx/lib:\
/usr/local/ucx/lib/ucx:\
${LD_LIBRARY_PATH}
{% else %}
# SGLang dev/local-dev inherit the upstream SGLang/NIXL runtime stack.
{% endif %}
# Copy shell profile script for framework-specific environment variables
# This script conditionally adds PATH/LD_LIBRARY_PATH entries based on what exists
...
...
container/templates/sglang_runtime.Dockerfile
View file @
e041ccfc
...
...
@@ -40,8 +40,10 @@ RUN --mount=type=bind,from=wheel_builder,source=/usr/local/,target=/tmp/usr/loca
{% endif %}
{% if target not in ("dev", "local-dev") %}
# Runtime target installs the prebuilt Dynamo wheels. Dev/local-dev build from
# source later in the shared dev stage after the workspace is bind-mounted.
# Runtime target installs only the prebuilt Dynamo wheels. SGLang and its NIXL
# packages come from the upstream lmsysorg/sglang runtime image; --no-deps keeps
# pip from replacing that stack. Dev/local-dev build from source later in the
# shared dev stage after the workspace is bind-mounted.
COPY
--chmod=775 --chown=dynamo:0 --from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/wheelhouse/
RUN
--mount
=
type
=
cache,target
=
/root/.cache/pip,sharing
=
locked
\
...
...
container/templates/wheel_builder.Dockerfile
View file @
e041ccfc
...
...
@@ -495,6 +495,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
##################################
##### wheel_builder
##############
##################################
{% if "nixl_ref" in context[framework] %}
# Builds nixl (native + Python wheel) and kvbm wheel, then consolidates all wheels.
# Runtime templates COPY from this stage.
...
...
@@ -626,3 +627,11 @@ RUN --mount=type=secret,id=aws-web-identity-token,target=/run/secrets/aws-token
# Consolidate all wheels from the runtime wheel builder stage
COPY
--from=runtime_wheel_builder /opt/dynamo/dist/ /opt/dynamo/dist/
{% else %}
# SGLang uses NIXL from the upstream lmsysorg/sglang runtime image and does not
# build Dynamo KVBM. Keep this alias so downstream stages can still COPY Dynamo
# wheels and build tools from a common wheel_builder stage name.
# SGLang dev/source builds may link nixl-sys against stubs when native NIXL is
# absent; block-manager/KVBM runtime work should use vllm/trtllm/none images.
FROM
runtime_wheel_builder AS wheel_builder
{% endif %}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment