ci: remove unused SGLang NIXL ref (#8472)

e041ccfc · Alec · GitHub · 36b4208e · e041ccfc · e041ccfc
Unverified Commit e041ccfc authored Apr 22, 2026 by Alec Committed by GitHub Apr 22, 2026
7 changed files
--- a/components/src/dynamo/frontend/sglang_prepost.py
+++ b/components/src/dynamo/frontend/sglang_prepost.py
@@ -675,6 +675,23 @@ class SglangStreamingPostProcessor:
                        if tc.parameters:
                            self._tool_call_args[seq_idx] = [tc.parameters]
+            # Do not emit partial tool calls. A streaming parser can detect a
+            # tool name before the model finishes malformed JSON; if the
+            # finish-time re-parse cannot recover valid arguments, treat the
+            # response as plain text instead of surfacing name + empty args.
+            dropped_names = []
+            for idx in list(self._tool_call_names):
+                if not "".join(self._tool_call_args.get(idx, [])):
+                    dropped_names.append(self._tool_call_names[idx])
+                    del self._tool_call_names[idx]
+                    self._tool_call_ids.pop(idx, None)
+                    self._tool_call_args.pop(idx, None)
+            if dropped_names:
+                logger.warning(
+                    "Dropping incomplete SGLang tool calls with no valid arguments: %s",
+                    dropped_names,
+                )
        if finish_reason and self._tool_call_names:
            tool_calls_out: list[dict[str, Any]] = []
            for idx in sorted(self._tool_call_names):

--- a/components/src/dynamo/frontend/tests/test_sglang_tool_calls.py
+++ b/components/src/dynamo/frontend/tests/test_sglang_tool_calls.py
@@ -467,6 +467,50 @@ class TestSingleChunkFallback:
        assert choice["finish_reason"] == "tool_calls"
+class TestMalformedToolCalls:
+    def test_incomplete_arguments_are_not_emitted(self):
+        class DummyTokenizer:
+            def decode(self, token_ids, skip_special_tokens=True):
+                return "".join(chr(x) for x in token_ids)
+        class DummyToolCall:
+            def __init__(self, tool_index, name, parameters):
+                self.tool_index = tool_index
+                self.name = name
+                self.parameters = parameters
+        class DummyParser:
+            def parse_stream_chunk(self, text):
+                return "", [DummyToolCall(0, "get_weather", '{"city": "Paris"')]
+            def has_tool_call(self, text):
+                return "<tool_call>" in text
+            def parse_non_stream(self, text):
+                return "", []
+        post = SglangStreamingPostProcessor(
+            tokenizer=DummyTokenizer(),
+            tool_call_parser=DummyParser(),
+            reasoning_parser=None,
+        )
+        malformed = (
+            '<tool_call>\n{"name": "get_weather", '
+            '"arguments": {"city": "Paris"}\n</tool_call>'
+        )
+        choice = post.process_output(
+            {
+                "token_ids": [ord(c) for c in malformed],
+                "finish_reason": "stop",
+            }
+        )
+        assert choice is not None
+        assert choice["finish_reason"] == "stop"
+        assert choice.get("delta", {}).get("tool_calls", []) == []
 # ---------------------------------------------------------------------------
 # JsonArrayParser path (tool_choice="required" / named function)
 # ---------------------------------------------------------------------------

--- a/container/context.yaml
+++ b/container/context.yaml
@@ -85,7 +85,9 @@ sglang:
    runtime_image: lmsysorg/sglang
    base_image_tag: 25.11-cuda13.0-devel-ubuntu24.04
    runtime_image_tag: v0.5.10.post1-cu130-runtime
-  nixl_ref: v1.0.1
+  # SGLang uses the NIXL stack from the upstream lmsysorg/sglang runtime image.
+  # Do not add nixl_ref here: Dynamo does not build or install its NIXL wheel
+  # for SGLang, and SGLang does not use Dynamo KVBM/block-manager at runtime.
  enable_media_ffmpeg: "true"
  enable_gpu_memory_service: "true"
  enable_kvbm: "false"

--- a/container/templates/args.Dockerfile
+++ b/container/templates/args.Dockerfile
@@ -66,7 +66,9 @@ ARG SCCACHE_REGION=""
 # NIXL configuration
 ARG NIXL_UCX_REF={{ context.dynamo.nixl_ucx_ref }}
+{% if "nixl_ref" in context[framework] -%}
 ARG NIXL_REF={{ context[framework].nixl_ref }}
+{% endif -%}
 {% if device == "cuda" %}
 ARG NIXL_GDRCOPY_REF={{ context.dynamo.nixl_gdrcopy_ref }}
 ARG NIXL_LIBFABRIC_REF={{ context.dynamo.nixl_libfabric_ref }}

--- a/container/templates/dev.Dockerfile
+++ b/container/templates/dev.Dockerfile
@@ -187,39 +187,19 @@ RUN if [ ! -e /usr/bin/python3 ]; then \
        fi; \
    fi
-# Copy UCX and NIXL libraries for dev stage compilation.
-# The upstream SGLang runtime image doesn't include NIXL, but cargo build needs to link against
-# -lnixl, -lnixl_build, and -lnixl_common. Runtime stage doesn't need this since it uses pre-built
-# wheels, but dev stage needs it for maturin develop and cargo build from source.
-# - SGLang: Copy NIXL/UCX/libfabric/gdrcopy binaries from wheel_builder (not in upstream lmsysorg/sglang runtime).
-# - vllm/trtllm/none: NIXL/UCX are already present in runtime (no-op).
-ARG TARGETARCH
-RUN --mount=from=wheel_builder,target=/wheel_builder \
-    if [ "${FRAMEWORK}" = "sglang" ]; then \
-        if [ -d /wheel_builder/usr/local/ucx ] && [ -d /wheel_builder/opt/nvidia/nvda_nixl ]; then \
-            mkdir -p /opt/nvidia /usr/include /usr/lib64 /etc/ld.so.conf.d; \
-            cp -r /wheel_builder/opt/nvidia/nvda_nixl /opt/nvidia/; \
-            cp -r /wheel_builder/usr/local/ucx /usr/local/; \
-            cp -r /wheel_builder/usr/local/libfabric /usr/local/; \
-            cp /wheel_builder/usr/include/gdrapi.h /usr/include/; \
-            cp /wheel_builder/usr/lib64/libgdrapi.so* /usr/lib64/; \
-            echo "/usr/lib64" >> /etc/ld.so.conf.d/gdrcopy.conf; \
-        fi; \
-    fi
 {% if device == "xpu" %}
 ENV NIXL_LIB_DIR=/opt/intel/intel_nixl/lib/x86_64-linux-gnu  \
    NIXL_PLUGIN_DIR=/opt/intel/intel_nixl/lib/x86_64-linux-gnu/plugins \
    NIXL_PREFIX=/opt/intel/intel_nixl
-{% else %}
+{% elif framework != "sglang" %}
-# NIXL is installed under lib64 (manylinux/AlmaLinux convention used by the wheel_builder).
+# Non-SGLang runtimes use the Dynamo-built NIXL install from wheel_builder.
-# All frameworks reference NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib64.
+# Reset the same values already set in runtime (no harm).
-# For vllm/trtllm/none: This resets the same values already set in runtime (no harm).
-# For sglang: This sets them for the first time (required).
 ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl \
    NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib64 \
    NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib64/plugins
+{% endif %}
+{% if device != "xpu" %}
 # Set universal CUDA development environment variables (all frameworks)
 # vLLM: Dockerfile.vllm line 533, 597
 # TRT-LLM: Dockerfile.trtllm lines 600-606
@@ -235,15 +215,18 @@ ENV CUDA_HOME=/usr/local/cuda \
    NVIDIA_DRIVER_CAPABILITIES=video,compute,utility
 {% endif %}
+{% if framework != "sglang" %}
 # Base LD_LIBRARY_PATH with universal paths (all frameworks have these)
 # Framework-specific paths are conditionally added in /etc/profile.d/50-framework-paths.sh
-ARG PYTHON_VERSION
 ENV LD_LIBRARY_PATH=\
 ${NIXL_LIB_DIR}:\
 ${NIXL_PLUGIN_DIR}:\
 /usr/local/ucx/lib:\
 /usr/local/ucx/lib/ucx:\
 ${LD_LIBRARY_PATH}
+{% else %}
+# SGLang dev/local-dev inherit the upstream SGLang/NIXL runtime stack.
+{% endif %}
 # Copy shell profile script for framework-specific environment variables
 # This script conditionally adds PATH/LD_LIBRARY_PATH entries based on what exists

--- a/container/templates/sglang_runtime.Dockerfile
+++ b/container/templates/sglang_runtime.Dockerfile
@@ -40,8 +40,10 @@ RUN --mount=type=bind,from=wheel_builder,source=/usr/local/,target=/tmp/usr/loca
 {% endif %}
 {% if target not in ("dev", "local-dev") %}
-# Runtime target installs the prebuilt Dynamo wheels. Dev/local-dev build from
+# Runtime target installs only the prebuilt Dynamo wheels. SGLang and its NIXL
-# source later in the shared dev stage after the workspace is bind-mounted.
+# packages come from the upstream lmsysorg/sglang runtime image; --no-deps keeps
+# pip from replacing that stack. Dev/local-dev build from source later in the
+# shared dev stage after the workspace is bind-mounted.
 COPY --chmod=775 --chown=dynamo:0 --from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/wheelhouse/
 RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \

--- a/container/templates/wheel_builder.Dockerfile
+++ b/container/templates/wheel_builder.Dockerfile
@@ -495,6 +495,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
 ##################################
 ##### wheel_builder ##############
 ##################################
+{% if "nixl_ref" in context[framework] %}
 # Builds nixl (native + Python wheel) and kvbm wheel, then consolidates all wheels.
 # Runtime templates COPY from this stage.
@@ -626,3 +627,11 @@ RUN --mount=type=secret,id=aws-web-identity-token,target=/run/secrets/aws-token
 # Consolidate all wheels from the runtime wheel builder stage
 COPY --from=runtime_wheel_builder /opt/dynamo/dist/ /opt/dynamo/dist/
+{% else %}
+# SGLang uses NIXL from the upstream lmsysorg/sglang runtime image and does not
+# build Dynamo KVBM. Keep this alias so downstream stages can still COPY Dynamo
+# wheels and build tools from a common wheel_builder stage name.
+# SGLang dev/source builds may link nixl-sys against stubs when native NIXL is
+# absent; block-manager/KVBM runtime work should use vllm/trtllm/none images.
+FROM runtime_wheel_builder AS wheel_builder
+{% endif %}