feat: migrate SGLang configuration (#6280)

Signed-off-by: jh-nv <jihao@nvidia.com>

feat: migrate SGLang configuration (#6280)
Signed-off-by: jh-nv <jihao@nvidia.com>
f4f82762 · jh-nv · GitHub · 359765d3 · f4f82762 · f4f82762
Unverified Commit f4f82762 authored Feb 14, 2026 by jh-nv Committed by GitHub Feb 14, 2026
7 changed files
--- a/components/src/dynamo/common/configuration/groups/runtime_args.py
+++ b/components/src/dynamo/common/configuration/groups/runtime_args.py
@@ -15,6 +15,7 @@ class DynamoRuntimeConfig(ConfigBase):
    """Configuration for Dynamo runtime (common across all backends)."""

    namespace: str
+    endpoint: Optional[str] = None
    discovery_backend: str
    request_plane: str
    event_plane: str
@@ -52,6 +53,13 @@ class DynamoRuntimeArgGroup(ArgGroup):
            default="dynamo",
            help="Dynamo namespace",
        )
+        add_argument(
+            g,
+            flag_name="--endpoint",
+            env_var="DYN_ENDPOINT",
+            default=None,
+            help="Dynamo endpoint string in 'dyn://namespace.component.endpoint' format. Example: dyn://dynamo.backend.generate. Currently used only by TRT-LLM and SGLang backends.",
+        )
        add_argument(
            g,
            flag_name="--discovery-backend",

--- a/components/src/dynamo/sglang/args.py
+++ b/components/src/dynamo/sglang/args.py
--- a/components/src/dynamo/sglang/backend_args.py
+++ b/components/src/dynamo/sglang/backend_args.py
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""Dynamo SGLang wrapper configuration ArgGroup."""
+
+from typing import Optional
+
+from dynamo.common.configuration.arg_group import ArgGroup
+from dynamo.common.configuration.config_base import ConfigBase
+from dynamo.common.configuration.utils import add_argument, add_negatable_bool_argument
+
+from . import __version__
+
+
+class DynamoSGLangArgGroup(ArgGroup):
+    """SGLang-specific Dynamo wrapper configuration (not native SGLang engine args)."""
+
+    name = "dynamo-sglang"
+
+    def add_arguments(self, parser) -> None:
+        """Add Dynamo SGLang arguments to parser."""
+
+        parser.add_argument(
+            "--version",
+            action="version",
+            version=f"Dynamo Backend SGLang {__version__}",
+        )
+
+        g = parser.add_argument_group("Dynamo SGLang Options")
+
+        add_negatable_bool_argument(
+            g,
+            flag_name="--use-sglang-tokenizer",
+            env_var="DYN_SGL_USE_TOKENIZER",
+            default=False,
+            help="Use SGLang's tokenizer for pre and post processing. This bypasses Dynamo's preprocessor and only v1/chat/completions will be available through the Dynamo frontend. Cannot be used with --custom-jinja-template.",
+        )
+
+        add_negatable_bool_argument(
+            g,
+            flag_name="--multimodal-processor",
+            env_var="DYN_SGL_MULTIMODAL_PROCESSOR",
+            default=False,
+            help="Run as multimodal processor component for handling multimodal requests.",
+        )
+        add_negatable_bool_argument(
+            g,
+            flag_name="--multimodal-encode-worker",
+            env_var="DYN_SGL_MULTIMODAL_ENCODE_WORKER",
+            default=False,
+            help="Run as multimodal encode worker component for processing images/videos.",
+        )
+        add_negatable_bool_argument(
+            g,
+            flag_name="--multimodal-worker",
+            env_var="DYN_SGL_MULTIMODAL_WORKER",
+            default=False,
+            help="Run as multimodal worker component for LLM inference with multimodal data.",
+        )
+
+        add_negatable_bool_argument(
+            g,
+            flag_name="--embedding-worker",
+            env_var="DYN_SGL_EMBEDDING_WORKER",
+            default=False,
+            help="Run as embedding worker component (Dynamo flag, also sets SGLang's --is-embedding).",
+        )
+
+        add_negatable_bool_argument(
+            g,
+            flag_name="--image-diffusion-worker",
+            env_var="DYN_SGL_IMAGE_DIFFUSION_WORKER",
+            default=False,
+            help="Run as image diffusion worker for image generation.",
+        )
+        add_argument(
+            g,
+            flag_name="--image-diffusion-fs-url",
+            env_var="DYN_SGL_IMAGE_DIFFUSION_FS_URL",
+            default=None,
+            help="Filesystem URL for storing generated images using fsspec (e.g., s3://bucket/path, gs://bucket/path, file:///local/path). Supports any fsspec-compatible filesystem.",
+        )
+        add_argument(
+            g,
+            flag_name="--image-diffusion-base-url",
+            env_var="DYN_SGL_IMAGE_DIFFUSION_BASE_URL",
+            default="http://localhost:8008/",
+            help="Base URL for rewriting image URLs in responses (e.g., http://localhost:8008/). When set, generated image URLs will use this base instead of filesystem URLs.",
+        )
+        add_argument(
+            g,
+            flag_name="--disagg-config",
+            env_var="DYN_SGL_DISAGG_CONFIG",
+            default=None,
+            help="Disaggregation configuration file in YAML format.",
+        )
+        add_argument(
+            g,
+            flag_name="--disagg-config-key",
+            env_var="DYN_SGL_DISAGG_CONFIG_KEY",
+            default=None,
+            help="Key to select from nested disaggregation configuration file (e.g., 'prefill', 'decode').",
+        )
+        add_negatable_bool_argument(
+            g,
+            flag_name="--video-generation-worker",
+            env_var="DYN_SGL_VIDEO_GENERATION_WORKER",
+            default=False,
+            help="Run as video generation worker for video generation (T2V/I2V).",
+        )
+        add_argument(
+            g,
+            flag_name="--video-generation-fs-url",
+            env_var="DYN_SGL_VIDEO_GENERATION_FS_URL",
+            default=None,
+            help="Filesystem URL for storing generated videos using fsspec (e.g., s3://bucket/path, gs://bucket/path, file:///local/path). Supports any fsspec-compatible filesystem.",
+        )
+
+
+class DynamoSGLangConfig(ConfigBase):
+    """Configuration for Dynamo SGLang wrapper (SGLang-specific only)."""
+
+    use_sglang_tokenizer: bool
+    multimodal_processor: bool
+    multimodal_encode_worker: bool
+    multimodal_worker: bool
+    embedding_worker: bool
+    image_diffusion_worker: bool
+    image_diffusion_fs_url: Optional[str] = None
+    image_diffusion_base_url: Optional[str] = None
+
+    disagg_config: Optional[str] = None
+    disagg_config_key: Optional[str] = None
+
+    video_generation_worker: bool
+    video_generation_fs_url: Optional[str] = None
+
+    def validate(self) -> None:
+        if (self.disagg_config is not None) ^ (self.disagg_config_key is not None):
+            raise ValueError(
+                "Both 'disagg_config' and 'disagg_config_key' must be provided together."
+            )
--- a/components/src/dynamo/sglang/main.py
+++ b/components/src/dynamo/sglang/main.py
@@ -285,13 +285,8 @@ async def init(
        engine, use_text_input=dynamo_args.use_sglang_tokenizer
    ).to_dict()

-    logging.info(
-        f"Registering model with endpoint types: {dynamo_args.dyn_endpoint_types}"
-    )
-    if (
-        dynamo_args.custom_jinja_template
-        and "chat" not in dynamo_args.dyn_endpoint_types
-    ):
+    logging.info(f"Registering model with endpoint types: {dynamo_args.endpoint_types}")
+    if dynamo_args.custom_jinja_template and "chat" not in dynamo_args.endpoint_types:
        logging.warning(
            "Custom Jinja template provided (--custom-jinja-template) but 'chat' not in --dyn-endpoint-types. "
            "The chat template will be loaded but the /v1/chat/completions endpoint will not be available."
@@ -312,7 +307,7 @@ async def init(
                generate_endpoint,
                server_args,
                dynamo_args,
-                output_type=parse_endpoint_types(dynamo_args.dyn_endpoint_types),
+                output_type=parse_endpoint_types(dynamo_args.endpoint_types),
                readiness_gate=ready_event,
            ),
        )
@@ -462,7 +457,7 @@ async def init_diffusion(
    ).to_dict()

    logging.info(
-        f"Registering diffusion model with endpoint types: {dynamo_args.dyn_endpoint_types}"
+        f"Registering diffusion model with endpoint types: {dynamo_args.endpoint_types}"
    )

    try:
@@ -479,7 +474,7 @@ async def init_diffusion(
                generate_endpoint,
                server_args,
                dynamo_args,
-                output_type=parse_endpoint_types(dynamo_args.dyn_endpoint_types),
+                output_type=parse_endpoint_types(dynamo_args.endpoint_types),
                readiness_gate=ready_event,
            ),
        )

--- a/components/src/dynamo/sglang/register.py
+++ b/components/src/dynamo/sglang/register.py
@@ -12,14 +12,14 @@ from sglang.srt.utils import get_local_ip_auto

 from dynamo._core import Endpoint
 from dynamo.llm import ModelInput, ModelRuntimeConfig, ModelType, register_model
-from dynamo.sglang.args import DynamoArgs
+from dynamo.sglang.args import DynamoConfig


 async def _register_model_with_runtime_config(
    engine: sgl.Engine,
    endpoint: Endpoint,
    server_args: ServerArgs,
-    dynamo_args: DynamoArgs,
+    dynamo_args: DynamoConfig,
    input_type: Optional[ModelInput] = ModelInput.Tokens,
    output_type: Optional[ModelType] = ModelType.Chat | ModelType.Completions,
 ) -> bool:
@@ -144,7 +144,7 @@ def _get_bootstrap_info_for_config(


 async def _get_runtime_config(
-    engine: sgl.Engine, server_args: ServerArgs, dynamo_args: DynamoArgs
+    engine: sgl.Engine, server_args: ServerArgs, dynamo_args: DynamoConfig
 ) -> Optional[ModelRuntimeConfig]:
    """Extract runtime configuration from SGLang engine and args.

@@ -158,8 +158,8 @@ async def _get_runtime_config(
    """
    runtime_config = ModelRuntimeConfig()
    # set reasoning parser and tool call parser
-    runtime_config.reasoning_parser = dynamo_args.reasoning_parser
-    runtime_config.tool_call_parser = dynamo_args.tool_call_parser
+    runtime_config.reasoning_parser = dynamo_args.dyn_reasoning_parser
+    runtime_config.tool_call_parser = dynamo_args.dyn_tool_call_parser
    # Decode workers don't create the WorkerKvQuery endpoint, so don't advertise local indexer
    is_decode_worker = server_args.disaggregation_mode == "decode"
    runtime_config.enable_local_indexer = (
@@ -235,7 +235,7 @@ async def register_model_with_readiness_gate(
    engine: sgl.Engine,
    generate_endpoint: Endpoint,
    server_args: ServerArgs,
-    dynamo_args: DynamoArgs,
+    dynamo_args: DynamoConfig,
    input_type: Optional[ModelInput] = ModelInput.Tokens,
    output_type: Optional[ModelType] = ModelType.Chat | ModelType.Completions,
    readiness_gate: Optional[asyncio.Event] = None,

--- a/components/src/dynamo/sglang/tests/test_sglang_unit.py
+++ b/components/src/dynamo/sglang/tests/test_sglang_unit.py
@@ -8,6 +8,7 @@ import sys
 from pathlib import Path

 import pytest
+import yaml

 from dynamo.sglang.args import parse_args
 from dynamo.sglang.tests.conftest import make_cli_args_fixture
@@ -92,7 +93,7 @@ async def test_tool_call_parser_valid_with_dynamo_tokenizer(mock_sglang_cli):

    config = await parse_args(sys.argv[1:])

-    assert config.dynamo_args.tool_call_parser == "hermes"
+    assert config.dynamo_args.dyn_tool_call_parser == "hermes"


 @pytest.mark.asyncio
@@ -120,3 +121,147 @@ async def test_tool_call_parser_both_flags_error(mock_sglang_cli):

    with pytest.raises(SystemExit):
        await parse_args(sys.argv[1:])
+
+
+@pytest.mark.asyncio
+async def test_namespace_flag_drives_default_endpoint_namespace(mock_sglang_cli):
+    """CLI namespace should be used for auto-derived endpoint."""
+    mock_sglang_cli(
+        "--model",
+        "Qwen/Qwen3-0.6B",
+        "--namespace",
+        "custom-ns",
+    )
+
+    config = await parse_args(sys.argv[1:])
+    assert config.dynamo_args.namespace == "custom-ns"
+
+
+@pytest.mark.asyncio
+async def test_obsolete_dyn_endpoint_types_flag_is_supported(mock_sglang_cli):
+    """Obsolete --dyn-endpoint-types alias should map to endpoint_types."""
+    mock_sglang_cli(
+        "--model",
+        "Qwen/Qwen3-0.6B",
+        "--dyn-endpoint-types",
+        "completions",
+    )
+
+    config = await parse_args(sys.argv[1:])
+    assert config.dynamo_args.endpoint_types == "completions"
+
+
+@pytest.mark.asyncio
+async def test_disagg_config_requires_disagg_config_key(mock_sglang_cli):
+    """--disagg-config and --disagg-config-key must be provided together."""
+    mock_sglang_cli(
+        "--model",
+        "Qwen/Qwen3-0.6B",
+        "--disagg-config",
+        "/tmp/nonexistent.yaml",
+    )
+
+    with pytest.raises(ValueError, match="disagg_config.*disagg_config_key.*together"):
+        await parse_args(sys.argv[1:])
+
+
+@pytest.mark.asyncio
+async def test_disagg_config_key_requires_disagg_config(mock_sglang_cli):
+    """--disagg-config-key alone should fail."""
+    mock_sglang_cli(
+        "--model",
+        "Qwen/Qwen3-0.6B",
+        "--disagg-config-key",
+        "prefill",
+    )
+
+    with pytest.raises(ValueError, match="disagg_config.*disagg_config_key.*together"):
+        await parse_args(sys.argv[1:])
+
+
+@pytest.mark.asyncio
+async def test_disagg_config_key_not_found_error(tmp_path, mock_sglang_cli):
+    """Missing disagg section key should raise a clear ValueError."""
+    config_path = tmp_path / "disagg.yaml"
+    config_path.write_text(
+        yaml.safe_dump({"prefill": {"tensor_parallel_size": 1}}), encoding="utf-8"
+    )
+
+    mock_sglang_cli(
+        "--model",
+        "Qwen/Qwen3-0.6B",
+        "--disagg-config",
+        str(config_path),
+        "--disagg-config-key",
+        "decode",
+    )
+
+    with pytest.raises(ValueError, match="Disagg config key 'decode' not found"):
+        await parse_args(sys.argv[1:])
+
+
+@pytest.mark.asyncio
+async def test_disagg_config_section_must_be_dict(tmp_path, mock_sglang_cli):
+    """Selected disagg section must be a dictionary."""
+    config_path = tmp_path / "disagg.yaml"
+    config_path.write_text(yaml.safe_dump({"prefill": "not-a-dict"}), encoding="utf-8")
+
+    mock_sglang_cli(
+        "--model",
+        "Qwen/Qwen3-0.6B",
+        "--disagg-config",
+        str(config_path),
+        "--disagg-config-key",
+        "prefill",
+    )
+
+    with pytest.raises(
+        ValueError, match="Disagg config section 'prefill' must be a dictionary"
+    ):
+        await parse_args(sys.argv[1:])
+
+
+@pytest.mark.asyncio
+async def test_disagg_config_preserves_bootstrap_port(tmp_path, mock_sglang_cli):
+    """Bootstrap port from disagg section should not be overridden by auto-port logic."""
+    config_path = tmp_path / "disagg.yaml"
+    config_path.write_text(
+        yaml.safe_dump({"prefill": {"disaggregation-bootstrap-port": 42345}}),
+        encoding="utf-8",
+    )
+
+    mock_sglang_cli(
+        "--model",
+        "Qwen/Qwen3-0.6B",
+        "--disagg-config",
+        str(config_path),
+        "--disagg-config-key",
+        "prefill",
+    )
+
+    config = await parse_args(sys.argv[1:])
+    assert config.server_args.disaggregation_bootstrap_port == 42345
+
+
+@pytest.mark.asyncio
+async def test_disagg_config_rejects_dynamo_keys(tmp_path, mock_sglang_cli, capfd):
+    """Disagg config should only accept SGLang-native keys."""
+    config_path = tmp_path / "disagg.yaml"
+    config_path.write_text(
+        yaml.safe_dump({"prefill": {"store-kv": "mem"}}), encoding="utf-8"
+    )
+
+    mock_sglang_cli(
+        "--model",
+        "Qwen/Qwen3-0.6B",
+        "--disagg-config",
+        str(config_path),
+        "--disagg-config-key",
+        "prefill",
+    )
+
+    with pytest.raises(SystemExit):
+        await parse_args(sys.argv[1:])
+
+    out, err = capfd.readouterr()
+    assert "unrecognized arguments: --store-kv mem" in err
--- a/components/src/dynamo/vllm/args.py
+++ b/components/src/dynamo/vllm/args.py
@@ -33,7 +33,6 @@ VALID_CONNECTORS = {"nixl", "lmcache", "kvbm", "null", "none"}

 class Config(DynamoRuntimeConfig, DynamoVllmConfig):
    component: str
-    endpoint: str
    is_prefill_worker: bool
    is_decode_worker: bool
    custom_jinja_template: Optional[str] = None