Unverified Commit e041ccfc authored by Alec's avatar Alec Committed by GitHub
Browse files

ci: remove unused SGLang NIXL ref (#8472)

parent 36b4208e
...@@ -675,6 +675,23 @@ class SglangStreamingPostProcessor: ...@@ -675,6 +675,23 @@ class SglangStreamingPostProcessor:
if tc.parameters: if tc.parameters:
self._tool_call_args[seq_idx] = [tc.parameters] self._tool_call_args[seq_idx] = [tc.parameters]
# Do not emit partial tool calls. A streaming parser can detect a
# tool name before the model finishes malformed JSON; if the
# finish-time re-parse cannot recover valid arguments, treat the
# response as plain text instead of surfacing name + empty args.
dropped_names = []
for idx in list(self._tool_call_names):
if not "".join(self._tool_call_args.get(idx, [])):
dropped_names.append(self._tool_call_names[idx])
del self._tool_call_names[idx]
self._tool_call_ids.pop(idx, None)
self._tool_call_args.pop(idx, None)
if dropped_names:
logger.warning(
"Dropping incomplete SGLang tool calls with no valid arguments: %s",
dropped_names,
)
if finish_reason and self._tool_call_names: if finish_reason and self._tool_call_names:
tool_calls_out: list[dict[str, Any]] = [] tool_calls_out: list[dict[str, Any]] = []
for idx in sorted(self._tool_call_names): for idx in sorted(self._tool_call_names):
......
...@@ -467,6 +467,50 @@ class TestSingleChunkFallback: ...@@ -467,6 +467,50 @@ class TestSingleChunkFallback:
assert choice["finish_reason"] == "tool_calls" assert choice["finish_reason"] == "tool_calls"
class TestMalformedToolCalls:
def test_incomplete_arguments_are_not_emitted(self):
class DummyTokenizer:
def decode(self, token_ids, skip_special_tokens=True):
return "".join(chr(x) for x in token_ids)
class DummyToolCall:
def __init__(self, tool_index, name, parameters):
self.tool_index = tool_index
self.name = name
self.parameters = parameters
class DummyParser:
def parse_stream_chunk(self, text):
return "", [DummyToolCall(0, "get_weather", '{"city": "Paris"')]
def has_tool_call(self, text):
return "<tool_call>" in text
def parse_non_stream(self, text):
return "", []
post = SglangStreamingPostProcessor(
tokenizer=DummyTokenizer(),
tool_call_parser=DummyParser(),
reasoning_parser=None,
)
malformed = (
'<tool_call>\n{"name": "get_weather", '
'"arguments": {"city": "Paris"}\n</tool_call>'
)
choice = post.process_output(
{
"token_ids": [ord(c) for c in malformed],
"finish_reason": "stop",
}
)
assert choice is not None
assert choice["finish_reason"] == "stop"
assert choice.get("delta", {}).get("tool_calls", []) == []
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# JsonArrayParser path (tool_choice="required" / named function) # JsonArrayParser path (tool_choice="required" / named function)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
......
...@@ -85,7 +85,9 @@ sglang: ...@@ -85,7 +85,9 @@ sglang:
runtime_image: lmsysorg/sglang runtime_image: lmsysorg/sglang
base_image_tag: 25.11-cuda13.0-devel-ubuntu24.04 base_image_tag: 25.11-cuda13.0-devel-ubuntu24.04
runtime_image_tag: v0.5.10.post1-cu130-runtime runtime_image_tag: v0.5.10.post1-cu130-runtime
nixl_ref: v1.0.1 # SGLang uses the NIXL stack from the upstream lmsysorg/sglang runtime image.
# Do not add nixl_ref here: Dynamo does not build or install its NIXL wheel
# for SGLang, and SGLang does not use Dynamo KVBM/block-manager at runtime.
enable_media_ffmpeg: "true" enable_media_ffmpeg: "true"
enable_gpu_memory_service: "true" enable_gpu_memory_service: "true"
enable_kvbm: "false" enable_kvbm: "false"
......
...@@ -66,7 +66,9 @@ ARG SCCACHE_REGION="" ...@@ -66,7 +66,9 @@ ARG SCCACHE_REGION=""
# NIXL configuration # NIXL configuration
ARG NIXL_UCX_REF={{ context.dynamo.nixl_ucx_ref }} ARG NIXL_UCX_REF={{ context.dynamo.nixl_ucx_ref }}
{% if "nixl_ref" in context[framework] -%}
ARG NIXL_REF={{ context[framework].nixl_ref }} ARG NIXL_REF={{ context[framework].nixl_ref }}
{% endif -%}
{% if device == "cuda" %} {% if device == "cuda" %}
ARG NIXL_GDRCOPY_REF={{ context.dynamo.nixl_gdrcopy_ref }} ARG NIXL_GDRCOPY_REF={{ context.dynamo.nixl_gdrcopy_ref }}
ARG NIXL_LIBFABRIC_REF={{ context.dynamo.nixl_libfabric_ref }} ARG NIXL_LIBFABRIC_REF={{ context.dynamo.nixl_libfabric_ref }}
......
...@@ -187,39 +187,19 @@ RUN if [ ! -e /usr/bin/python3 ]; then \ ...@@ -187,39 +187,19 @@ RUN if [ ! -e /usr/bin/python3 ]; then \
fi; \ fi; \
fi fi
# Copy UCX and NIXL libraries for dev stage compilation.
# The upstream SGLang runtime image doesn't include NIXL, but cargo build needs to link against
# -lnixl, -lnixl_build, and -lnixl_common. Runtime stage doesn't need this since it uses pre-built
# wheels, but dev stage needs it for maturin develop and cargo build from source.
# - SGLang: Copy NIXL/UCX/libfabric/gdrcopy binaries from wheel_builder (not in upstream lmsysorg/sglang runtime).
# - vllm/trtllm/none: NIXL/UCX are already present in runtime (no-op).
ARG TARGETARCH
RUN --mount=from=wheel_builder,target=/wheel_builder \
if [ "${FRAMEWORK}" = "sglang" ]; then \
if [ -d /wheel_builder/usr/local/ucx ] && [ -d /wheel_builder/opt/nvidia/nvda_nixl ]; then \
mkdir -p /opt/nvidia /usr/include /usr/lib64 /etc/ld.so.conf.d; \
cp -r /wheel_builder/opt/nvidia/nvda_nixl /opt/nvidia/; \
cp -r /wheel_builder/usr/local/ucx /usr/local/; \
cp -r /wheel_builder/usr/local/libfabric /usr/local/; \
cp /wheel_builder/usr/include/gdrapi.h /usr/include/; \
cp /wheel_builder/usr/lib64/libgdrapi.so* /usr/lib64/; \
echo "/usr/lib64" >> /etc/ld.so.conf.d/gdrcopy.conf; \
fi; \
fi
{% if device == "xpu" %} {% if device == "xpu" %}
ENV NIXL_LIB_DIR=/opt/intel/intel_nixl/lib/x86_64-linux-gnu \ ENV NIXL_LIB_DIR=/opt/intel/intel_nixl/lib/x86_64-linux-gnu \
NIXL_PLUGIN_DIR=/opt/intel/intel_nixl/lib/x86_64-linux-gnu/plugins \ NIXL_PLUGIN_DIR=/opt/intel/intel_nixl/lib/x86_64-linux-gnu/plugins \
NIXL_PREFIX=/opt/intel/intel_nixl NIXL_PREFIX=/opt/intel/intel_nixl
{% else %} {% elif framework != "sglang" %}
# NIXL is installed under lib64 (manylinux/AlmaLinux convention used by the wheel_builder). # Non-SGLang runtimes use the Dynamo-built NIXL install from wheel_builder.
# All frameworks reference NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib64. # Reset the same values already set in runtime (no harm).
# For vllm/trtllm/none: This resets the same values already set in runtime (no harm).
# For sglang: This sets them for the first time (required).
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl \ ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl \
NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib64 \ NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib64 \
NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib64/plugins NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib64/plugins
{% endif %}
{% if device != "xpu" %}
# Set universal CUDA development environment variables (all frameworks) # Set universal CUDA development environment variables (all frameworks)
# vLLM: Dockerfile.vllm line 533, 597 # vLLM: Dockerfile.vllm line 533, 597
# TRT-LLM: Dockerfile.trtllm lines 600-606 # TRT-LLM: Dockerfile.trtllm lines 600-606
...@@ -235,15 +215,18 @@ ENV CUDA_HOME=/usr/local/cuda \ ...@@ -235,15 +215,18 @@ ENV CUDA_HOME=/usr/local/cuda \
NVIDIA_DRIVER_CAPABILITIES=video,compute,utility NVIDIA_DRIVER_CAPABILITIES=video,compute,utility
{% endif %} {% endif %}
{% if framework != "sglang" %}
# Base LD_LIBRARY_PATH with universal paths (all frameworks have these) # Base LD_LIBRARY_PATH with universal paths (all frameworks have these)
# Framework-specific paths are conditionally added in /etc/profile.d/50-framework-paths.sh # Framework-specific paths are conditionally added in /etc/profile.d/50-framework-paths.sh
ARG PYTHON_VERSION
ENV LD_LIBRARY_PATH=\ ENV LD_LIBRARY_PATH=\
${NIXL_LIB_DIR}:\ ${NIXL_LIB_DIR}:\
${NIXL_PLUGIN_DIR}:\ ${NIXL_PLUGIN_DIR}:\
/usr/local/ucx/lib:\ /usr/local/ucx/lib:\
/usr/local/ucx/lib/ucx:\ /usr/local/ucx/lib/ucx:\
${LD_LIBRARY_PATH} ${LD_LIBRARY_PATH}
{% else %}
# SGLang dev/local-dev inherit the upstream SGLang/NIXL runtime stack.
{% endif %}
# Copy shell profile script for framework-specific environment variables # Copy shell profile script for framework-specific environment variables
# This script conditionally adds PATH/LD_LIBRARY_PATH entries based on what exists # This script conditionally adds PATH/LD_LIBRARY_PATH entries based on what exists
......
...@@ -40,8 +40,10 @@ RUN --mount=type=bind,from=wheel_builder,source=/usr/local/,target=/tmp/usr/loca ...@@ -40,8 +40,10 @@ RUN --mount=type=bind,from=wheel_builder,source=/usr/local/,target=/tmp/usr/loca
{% endif %} {% endif %}
{% if target not in ("dev", "local-dev") %} {% if target not in ("dev", "local-dev") %}
# Runtime target installs the prebuilt Dynamo wheels. Dev/local-dev build from # Runtime target installs only the prebuilt Dynamo wheels. SGLang and its NIXL
# source later in the shared dev stage after the workspace is bind-mounted. # packages come from the upstream lmsysorg/sglang runtime image; --no-deps keeps
# pip from replacing that stack. Dev/local-dev build from source later in the
# shared dev stage after the workspace is bind-mounted.
COPY --chmod=775 --chown=dynamo:0 --from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/wheelhouse/ COPY --chmod=775 --chown=dynamo:0 --from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/wheelhouse/
RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \ RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \
......
...@@ -495,6 +495,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \ ...@@ -495,6 +495,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
################################## ##################################
##### wheel_builder ############## ##### wheel_builder ##############
################################## ##################################
{% if "nixl_ref" in context[framework] %}
# Builds nixl (native + Python wheel) and kvbm wheel, then consolidates all wheels. # Builds nixl (native + Python wheel) and kvbm wheel, then consolidates all wheels.
# Runtime templates COPY from this stage. # Runtime templates COPY from this stage.
...@@ -626,3 +627,11 @@ RUN --mount=type=secret,id=aws-web-identity-token,target=/run/secrets/aws-token ...@@ -626,3 +627,11 @@ RUN --mount=type=secret,id=aws-web-identity-token,target=/run/secrets/aws-token
# Consolidate all wheels from the runtime wheel builder stage # Consolidate all wheels from the runtime wheel builder stage
COPY --from=runtime_wheel_builder /opt/dynamo/dist/ /opt/dynamo/dist/ COPY --from=runtime_wheel_builder /opt/dynamo/dist/ /opt/dynamo/dist/
{% else %}
# SGLang uses NIXL from the upstream lmsysorg/sglang runtime image and does not
# build Dynamo KVBM. Keep this alias so downstream stages can still COPY Dynamo
# wheels and build tools from a common wheel_builder stage name.
# SGLang dev/source builds may link nixl-sys against stubs when native NIXL is
# absent; block-manager/KVBM runtime work should use vllm/trtllm/none images.
FROM runtime_wheel_builder AS wheel_builder
{% endif %}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment