Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
e041ccfc
"docs/vscode:/vscode.git/clone" did not exist on "13640e15d5a775848885901fc575aa330c0a442b"
Unverified
Commit
e041ccfc
authored
Apr 22, 2026
by
Alec
Committed by
GitHub
Apr 22, 2026
Browse files
ci: remove unused SGLang NIXL ref (#8472)
parent
36b4208e
Changes
7
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
88 additions
and
29 deletions
+88
-29
components/src/dynamo/frontend/sglang_prepost.py
components/src/dynamo/frontend/sglang_prepost.py
+17
-0
components/src/dynamo/frontend/tests/test_sglang_tool_calls.py
...nents/src/dynamo/frontend/tests/test_sglang_tool_calls.py
+44
-0
container/context.yaml
container/context.yaml
+3
-1
container/templates/args.Dockerfile
container/templates/args.Dockerfile
+2
-0
container/templates/dev.Dockerfile
container/templates/dev.Dockerfile
+9
-26
container/templates/sglang_runtime.Dockerfile
container/templates/sglang_runtime.Dockerfile
+4
-2
container/templates/wheel_builder.Dockerfile
container/templates/wheel_builder.Dockerfile
+9
-0
No files found.
components/src/dynamo/frontend/sglang_prepost.py
View file @
e041ccfc
...
@@ -675,6 +675,23 @@ class SglangStreamingPostProcessor:
...
@@ -675,6 +675,23 @@ class SglangStreamingPostProcessor:
if
tc
.
parameters
:
if
tc
.
parameters
:
self
.
_tool_call_args
[
seq_idx
]
=
[
tc
.
parameters
]
self
.
_tool_call_args
[
seq_idx
]
=
[
tc
.
parameters
]
# Do not emit partial tool calls. A streaming parser can detect a
# tool name before the model finishes malformed JSON; if the
# finish-time re-parse cannot recover valid arguments, treat the
# response as plain text instead of surfacing name + empty args.
dropped_names
=
[]
for
idx
in
list
(
self
.
_tool_call_names
):
if
not
""
.
join
(
self
.
_tool_call_args
.
get
(
idx
,
[])):
dropped_names
.
append
(
self
.
_tool_call_names
[
idx
])
del
self
.
_tool_call_names
[
idx
]
self
.
_tool_call_ids
.
pop
(
idx
,
None
)
self
.
_tool_call_args
.
pop
(
idx
,
None
)
if
dropped_names
:
logger
.
warning
(
"Dropping incomplete SGLang tool calls with no valid arguments: %s"
,
dropped_names
,
)
if
finish_reason
and
self
.
_tool_call_names
:
if
finish_reason
and
self
.
_tool_call_names
:
tool_calls_out
:
list
[
dict
[
str
,
Any
]]
=
[]
tool_calls_out
:
list
[
dict
[
str
,
Any
]]
=
[]
for
idx
in
sorted
(
self
.
_tool_call_names
):
for
idx
in
sorted
(
self
.
_tool_call_names
):
...
...
components/src/dynamo/frontend/tests/test_sglang_tool_calls.py
View file @
e041ccfc
...
@@ -467,6 +467,50 @@ class TestSingleChunkFallback:
...
@@ -467,6 +467,50 @@ class TestSingleChunkFallback:
assert
choice
[
"finish_reason"
]
==
"tool_calls"
assert
choice
[
"finish_reason"
]
==
"tool_calls"
class
TestMalformedToolCalls
:
def
test_incomplete_arguments_are_not_emitted
(
self
):
class
DummyTokenizer
:
def
decode
(
self
,
token_ids
,
skip_special_tokens
=
True
):
return
""
.
join
(
chr
(
x
)
for
x
in
token_ids
)
class
DummyToolCall
:
def
__init__
(
self
,
tool_index
,
name
,
parameters
):
self
.
tool_index
=
tool_index
self
.
name
=
name
self
.
parameters
=
parameters
class
DummyParser
:
def
parse_stream_chunk
(
self
,
text
):
return
""
,
[
DummyToolCall
(
0
,
"get_weather"
,
'{"city": "Paris"'
)]
def
has_tool_call
(
self
,
text
):
return
"<tool_call>"
in
text
def
parse_non_stream
(
self
,
text
):
return
""
,
[]
post
=
SglangStreamingPostProcessor
(
tokenizer
=
DummyTokenizer
(),
tool_call_parser
=
DummyParser
(),
reasoning_parser
=
None
,
)
malformed
=
(
'<tool_call>
\n
{"name": "get_weather", '
'"arguments": {"city": "Paris"}
\n
</tool_call>'
)
choice
=
post
.
process_output
(
{
"token_ids"
:
[
ord
(
c
)
for
c
in
malformed
],
"finish_reason"
:
"stop"
,
}
)
assert
choice
is
not
None
assert
choice
[
"finish_reason"
]
==
"stop"
assert
choice
.
get
(
"delta"
,
{}).
get
(
"tool_calls"
,
[])
==
[]
# ---------------------------------------------------------------------------
# ---------------------------------------------------------------------------
# JsonArrayParser path (tool_choice="required" / named function)
# JsonArrayParser path (tool_choice="required" / named function)
# ---------------------------------------------------------------------------
# ---------------------------------------------------------------------------
...
...
container/context.yaml
View file @
e041ccfc
...
@@ -85,7 +85,9 @@ sglang:
...
@@ -85,7 +85,9 @@ sglang:
runtime_image
:
lmsysorg/sglang
runtime_image
:
lmsysorg/sglang
base_image_tag
:
25.11-cuda13.0-devel-ubuntu24.04
base_image_tag
:
25.11-cuda13.0-devel-ubuntu24.04
runtime_image_tag
:
v0.5.10.post1-cu130-runtime
runtime_image_tag
:
v0.5.10.post1-cu130-runtime
nixl_ref
:
v1.0.1
# SGLang uses the NIXL stack from the upstream lmsysorg/sglang runtime image.
# Do not add nixl_ref here: Dynamo does not build or install its NIXL wheel
# for SGLang, and SGLang does not use Dynamo KVBM/block-manager at runtime.
enable_media_ffmpeg
:
"
true"
enable_media_ffmpeg
:
"
true"
enable_gpu_memory_service
:
"
true"
enable_gpu_memory_service
:
"
true"
enable_kvbm
:
"
false"
enable_kvbm
:
"
false"
...
...
container/templates/args.Dockerfile
View file @
e041ccfc
...
@@ -66,7 +66,9 @@ ARG SCCACHE_REGION=""
...
@@ -66,7 +66,9 @@ ARG SCCACHE_REGION=""
# NIXL configuration
# NIXL configuration
ARG
NIXL_UCX_REF={{ context.dynamo.nixl_ucx_ref }}
ARG
NIXL_UCX_REF={{ context.dynamo.nixl_ucx_ref }}
{% if "nixl_ref" in context[framework] -%}
ARG
NIXL_REF={{ context[framework].nixl_ref }}
ARG
NIXL_REF={{ context[framework].nixl_ref }}
{% endif -%}
{% if device == "cuda" %}
{% if device == "cuda" %}
ARG
NIXL_GDRCOPY_REF={{ context.dynamo.nixl_gdrcopy_ref }}
ARG
NIXL_GDRCOPY_REF={{ context.dynamo.nixl_gdrcopy_ref }}
ARG
NIXL_LIBFABRIC_REF={{ context.dynamo.nixl_libfabric_ref }}
ARG
NIXL_LIBFABRIC_REF={{ context.dynamo.nixl_libfabric_ref }}
...
...
container/templates/dev.Dockerfile
View file @
e041ccfc
...
@@ -187,39 +187,19 @@ RUN if [ ! -e /usr/bin/python3 ]; then \
...
@@ -187,39 +187,19 @@ RUN if [ ! -e /usr/bin/python3 ]; then \
fi
;
\
fi
;
\
fi
fi
# Copy UCX and NIXL libraries for dev stage compilation.
# The upstream SGLang runtime image doesn't include NIXL, but cargo build needs to link against
# -lnixl, -lnixl_build, and -lnixl_common. Runtime stage doesn't need this since it uses pre-built
# wheels, but dev stage needs it for maturin develop and cargo build from source.
# - SGLang: Copy NIXL/UCX/libfabric/gdrcopy binaries from wheel_builder (not in upstream lmsysorg/sglang runtime).
# - vllm/trtllm/none: NIXL/UCX are already present in runtime (no-op).
ARG
TARGETARCH
RUN
--mount
=
from
=
wheel_builder,target
=
/wheel_builder
\
if
[
"
${
FRAMEWORK
}
"
=
"sglang"
]
;
then
\
if
[
-d
/wheel_builder/usr/local/ucx
]
&&
[
-d
/wheel_builder/opt/nvidia/nvda_nixl
]
;
then
\
mkdir
-p
/opt/nvidia /usr/include /usr/lib64 /etc/ld.so.conf.d
;
\
cp
-r
/wheel_builder/opt/nvidia/nvda_nixl /opt/nvidia/
;
\
cp
-r
/wheel_builder/usr/local/ucx /usr/local/
;
\
cp
-r
/wheel_builder/usr/local/libfabric /usr/local/
;
\
cp
/wheel_builder/usr/include/gdrapi.h /usr/include/
;
\
cp
/wheel_builder/usr/lib64/libgdrapi.so
*
/usr/lib64/
;
\
echo
"/usr/lib64"
>>
/etc/ld.so.conf.d/gdrcopy.conf
;
\
fi
;
\
fi
{% if device == "xpu" %}
{% if device == "xpu" %}
ENV
NIXL_LIB_DIR=/opt/intel/intel_nixl/lib/x86_64-linux-gnu \
ENV
NIXL_LIB_DIR=/opt/intel/intel_nixl/lib/x86_64-linux-gnu \
NIXL_PLUGIN_DIR=/opt/intel/intel_nixl/lib/x86_64-linux-gnu/plugins \
NIXL_PLUGIN_DIR=/opt/intel/intel_nixl/lib/x86_64-linux-gnu/plugins \
NIXL_PREFIX=/opt/intel/intel_nixl
NIXL_PREFIX=/opt/intel/intel_nixl
{% else %}
{% elif framework != "sglang" %}
# NIXL is installed under lib64 (manylinux/AlmaLinux convention used by the wheel_builder).
# Non-SGLang runtimes use the Dynamo-built NIXL install from wheel_builder.
# All frameworks reference NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib64.
# Reset the same values already set in runtime (no harm).
# For vllm/trtllm/none: This resets the same values already set in runtime (no harm).
# For sglang: This sets them for the first time (required).
ENV
NIXL_PREFIX=/opt/nvidia/nvda_nixl \
ENV
NIXL_PREFIX=/opt/nvidia/nvda_nixl \
NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib64 \
NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib64 \
NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib64/plugins
NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib64/plugins
{% endif %}
{% if device != "xpu" %}
# Set universal CUDA development environment variables (all frameworks)
# Set universal CUDA development environment variables (all frameworks)
# vLLM: Dockerfile.vllm line 533, 597
# vLLM: Dockerfile.vllm line 533, 597
# TRT-LLM: Dockerfile.trtllm lines 600-606
# TRT-LLM: Dockerfile.trtllm lines 600-606
...
@@ -235,15 +215,18 @@ ENV CUDA_HOME=/usr/local/cuda \
...
@@ -235,15 +215,18 @@ ENV CUDA_HOME=/usr/local/cuda \
NVIDIA_DRIVER_CAPABILITIES=video,compute,utility
NVIDIA_DRIVER_CAPABILITIES=video,compute,utility
{% endif %}
{% endif %}
{% if framework != "sglang" %}
# Base LD_LIBRARY_PATH with universal paths (all frameworks have these)
# Base LD_LIBRARY_PATH with universal paths (all frameworks have these)
# Framework-specific paths are conditionally added in /etc/profile.d/50-framework-paths.sh
# Framework-specific paths are conditionally added in /etc/profile.d/50-framework-paths.sh
ARG
PYTHON_VERSION
ENV
LD_LIBRARY_PATH=\
ENV
LD_LIBRARY_PATH=\
${NIXL_LIB_DIR}:\
${NIXL_LIB_DIR}:\
${NIXL_PLUGIN_DIR}:\
${NIXL_PLUGIN_DIR}:\
/usr/local/ucx/lib:\
/usr/local/ucx/lib:\
/usr/local/ucx/lib/ucx:\
/usr/local/ucx/lib/ucx:\
${LD_LIBRARY_PATH}
${LD_LIBRARY_PATH}
{% else %}
# SGLang dev/local-dev inherit the upstream SGLang/NIXL runtime stack.
{% endif %}
# Copy shell profile script for framework-specific environment variables
# Copy shell profile script for framework-specific environment variables
# This script conditionally adds PATH/LD_LIBRARY_PATH entries based on what exists
# This script conditionally adds PATH/LD_LIBRARY_PATH entries based on what exists
...
...
container/templates/sglang_runtime.Dockerfile
View file @
e041ccfc
...
@@ -40,8 +40,10 @@ RUN --mount=type=bind,from=wheel_builder,source=/usr/local/,target=/tmp/usr/loca
...
@@ -40,8 +40,10 @@ RUN --mount=type=bind,from=wheel_builder,source=/usr/local/,target=/tmp/usr/loca
{% endif %}
{% endif %}
{% if target not in ("dev", "local-dev") %}
{% if target not in ("dev", "local-dev") %}
# Runtime target installs the prebuilt Dynamo wheels. Dev/local-dev build from
# Runtime target installs only the prebuilt Dynamo wheels. SGLang and its NIXL
# source later in the shared dev stage after the workspace is bind-mounted.
# packages come from the upstream lmsysorg/sglang runtime image; --no-deps keeps
# pip from replacing that stack. Dev/local-dev build from source later in the
# shared dev stage after the workspace is bind-mounted.
COPY
--chmod=775 --chown=dynamo:0 --from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/wheelhouse/
COPY
--chmod=775 --chown=dynamo:0 --from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/wheelhouse/
RUN
--mount
=
type
=
cache,target
=
/root/.cache/pip,sharing
=
locked
\
RUN
--mount
=
type
=
cache,target
=
/root/.cache/pip,sharing
=
locked
\
...
...
container/templates/wheel_builder.Dockerfile
View file @
e041ccfc
...
@@ -495,6 +495,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
...
@@ -495,6 +495,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
##################################
##################################
##### wheel_builder
##############
##### wheel_builder
##############
##################################
##################################
{% if "nixl_ref" in context[framework] %}
# Builds nixl (native + Python wheel) and kvbm wheel, then consolidates all wheels.
# Builds nixl (native + Python wheel) and kvbm wheel, then consolidates all wheels.
# Runtime templates COPY from this stage.
# Runtime templates COPY from this stage.
...
@@ -626,3 +627,11 @@ RUN --mount=type=secret,id=aws-web-identity-token,target=/run/secrets/aws-token
...
@@ -626,3 +627,11 @@ RUN --mount=type=secret,id=aws-web-identity-token,target=/run/secrets/aws-token
# Consolidate all wheels from the runtime wheel builder stage
# Consolidate all wheels from the runtime wheel builder stage
COPY
--from=runtime_wheel_builder /opt/dynamo/dist/ /opt/dynamo/dist/
COPY
--from=runtime_wheel_builder /opt/dynamo/dist/ /opt/dynamo/dist/
{% else %}
# SGLang uses NIXL from the upstream lmsysorg/sglang runtime image and does not
# build Dynamo KVBM. Keep this alias so downstream stages can still COPY Dynamo
# wheels and build tools from a common wheel_builder stage name.
# SGLang dev/source builds may link nixl-sys against stubs when native NIXL is
# absent; block-manager/KVBM runtime work should use vllm/trtllm/none images.
FROM
runtime_wheel_builder AS wheel_builder
{% endif %}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment