Unverified Commit f4f82762 authored by jh-nv's avatar jh-nv Committed by GitHub
Browse files

feat: migrate SGLang configuration (#6280)


Signed-off-by: default avatarjh-nv <jihao@nvidia.com>
parent 359765d3
......@@ -15,6 +15,7 @@ class DynamoRuntimeConfig(ConfigBase):
"""Configuration for Dynamo runtime (common across all backends)."""
namespace: str
endpoint: Optional[str] = None
discovery_backend: str
request_plane: str
event_plane: str
......@@ -52,6 +53,13 @@ class DynamoRuntimeArgGroup(ArgGroup):
default="dynamo",
help="Dynamo namespace",
)
add_argument(
g,
flag_name="--endpoint",
env_var="DYN_ENDPOINT",
default=None,
help="Dynamo endpoint string in 'dyn://namespace.component.endpoint' format. Example: dyn://dynamo.backend.generate. Currently used only by TRT-LLM and SGLang backends.",
)
add_argument(
g,
flag_name="--discovery-backend",
......
This diff is collapsed.
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Dynamo SGLang wrapper configuration ArgGroup."""
from typing import Optional
from dynamo.common.configuration.arg_group import ArgGroup
from dynamo.common.configuration.config_base import ConfigBase
from dynamo.common.configuration.utils import add_argument, add_negatable_bool_argument
from . import __version__
class DynamoSGLangArgGroup(ArgGroup):
"""SGLang-specific Dynamo wrapper configuration (not native SGLang engine args)."""
name = "dynamo-sglang"
def add_arguments(self, parser) -> None:
"""Add Dynamo SGLang arguments to parser."""
parser.add_argument(
"--version",
action="version",
version=f"Dynamo Backend SGLang {__version__}",
)
g = parser.add_argument_group("Dynamo SGLang Options")
add_negatable_bool_argument(
g,
flag_name="--use-sglang-tokenizer",
env_var="DYN_SGL_USE_TOKENIZER",
default=False,
help="Use SGLang's tokenizer for pre and post processing. This bypasses Dynamo's preprocessor and only v1/chat/completions will be available through the Dynamo frontend. Cannot be used with --custom-jinja-template.",
)
add_negatable_bool_argument(
g,
flag_name="--multimodal-processor",
env_var="DYN_SGL_MULTIMODAL_PROCESSOR",
default=False,
help="Run as multimodal processor component for handling multimodal requests.",
)
add_negatable_bool_argument(
g,
flag_name="--multimodal-encode-worker",
env_var="DYN_SGL_MULTIMODAL_ENCODE_WORKER",
default=False,
help="Run as multimodal encode worker component for processing images/videos.",
)
add_negatable_bool_argument(
g,
flag_name="--multimodal-worker",
env_var="DYN_SGL_MULTIMODAL_WORKER",
default=False,
help="Run as multimodal worker component for LLM inference with multimodal data.",
)
add_negatable_bool_argument(
g,
flag_name="--embedding-worker",
env_var="DYN_SGL_EMBEDDING_WORKER",
default=False,
help="Run as embedding worker component (Dynamo flag, also sets SGLang's --is-embedding).",
)
add_negatable_bool_argument(
g,
flag_name="--image-diffusion-worker",
env_var="DYN_SGL_IMAGE_DIFFUSION_WORKER",
default=False,
help="Run as image diffusion worker for image generation.",
)
add_argument(
g,
flag_name="--image-diffusion-fs-url",
env_var="DYN_SGL_IMAGE_DIFFUSION_FS_URL",
default=None,
help="Filesystem URL for storing generated images using fsspec (e.g., s3://bucket/path, gs://bucket/path, file:///local/path). Supports any fsspec-compatible filesystem.",
)
add_argument(
g,
flag_name="--image-diffusion-base-url",
env_var="DYN_SGL_IMAGE_DIFFUSION_BASE_URL",
default="http://localhost:8008/",
help="Base URL for rewriting image URLs in responses (e.g., http://localhost:8008/). When set, generated image URLs will use this base instead of filesystem URLs.",
)
add_argument(
g,
flag_name="--disagg-config",
env_var="DYN_SGL_DISAGG_CONFIG",
default=None,
help="Disaggregation configuration file in YAML format.",
)
add_argument(
g,
flag_name="--disagg-config-key",
env_var="DYN_SGL_DISAGG_CONFIG_KEY",
default=None,
help="Key to select from nested disaggregation configuration file (e.g., 'prefill', 'decode').",
)
add_negatable_bool_argument(
g,
flag_name="--video-generation-worker",
env_var="DYN_SGL_VIDEO_GENERATION_WORKER",
default=False,
help="Run as video generation worker for video generation (T2V/I2V).",
)
add_argument(
g,
flag_name="--video-generation-fs-url",
env_var="DYN_SGL_VIDEO_GENERATION_FS_URL",
default=None,
help="Filesystem URL for storing generated videos using fsspec (e.g., s3://bucket/path, gs://bucket/path, file:///local/path). Supports any fsspec-compatible filesystem.",
)
class DynamoSGLangConfig(ConfigBase):
"""Configuration for Dynamo SGLang wrapper (SGLang-specific only)."""
use_sglang_tokenizer: bool
multimodal_processor: bool
multimodal_encode_worker: bool
multimodal_worker: bool
embedding_worker: bool
image_diffusion_worker: bool
image_diffusion_fs_url: Optional[str] = None
image_diffusion_base_url: Optional[str] = None
disagg_config: Optional[str] = None
disagg_config_key: Optional[str] = None
video_generation_worker: bool
video_generation_fs_url: Optional[str] = None
def validate(self) -> None:
if (self.disagg_config is not None) ^ (self.disagg_config_key is not None):
raise ValueError(
"Both 'disagg_config' and 'disagg_config_key' must be provided together."
)
......@@ -285,13 +285,8 @@ async def init(
engine, use_text_input=dynamo_args.use_sglang_tokenizer
).to_dict()
logging.info(
f"Registering model with endpoint types: {dynamo_args.dyn_endpoint_types}"
)
if (
dynamo_args.custom_jinja_template
and "chat" not in dynamo_args.dyn_endpoint_types
):
logging.info(f"Registering model with endpoint types: {dynamo_args.endpoint_types}")
if dynamo_args.custom_jinja_template and "chat" not in dynamo_args.endpoint_types:
logging.warning(
"Custom Jinja template provided (--custom-jinja-template) but 'chat' not in --dyn-endpoint-types. "
"The chat template will be loaded but the /v1/chat/completions endpoint will not be available."
......@@ -312,7 +307,7 @@ async def init(
generate_endpoint,
server_args,
dynamo_args,
output_type=parse_endpoint_types(dynamo_args.dyn_endpoint_types),
output_type=parse_endpoint_types(dynamo_args.endpoint_types),
readiness_gate=ready_event,
),
)
......@@ -462,7 +457,7 @@ async def init_diffusion(
).to_dict()
logging.info(
f"Registering diffusion model with endpoint types: {dynamo_args.dyn_endpoint_types}"
f"Registering diffusion model with endpoint types: {dynamo_args.endpoint_types}"
)
try:
......@@ -479,7 +474,7 @@ async def init_diffusion(
generate_endpoint,
server_args,
dynamo_args,
output_type=parse_endpoint_types(dynamo_args.dyn_endpoint_types),
output_type=parse_endpoint_types(dynamo_args.endpoint_types),
readiness_gate=ready_event,
),
)
......
......@@ -12,14 +12,14 @@ from sglang.srt.utils import get_local_ip_auto
from dynamo._core import Endpoint
from dynamo.llm import ModelInput, ModelRuntimeConfig, ModelType, register_model
from dynamo.sglang.args import DynamoArgs
from dynamo.sglang.args import DynamoConfig
async def _register_model_with_runtime_config(
engine: sgl.Engine,
endpoint: Endpoint,
server_args: ServerArgs,
dynamo_args: DynamoArgs,
dynamo_args: DynamoConfig,
input_type: Optional[ModelInput] = ModelInput.Tokens,
output_type: Optional[ModelType] = ModelType.Chat | ModelType.Completions,
) -> bool:
......@@ -144,7 +144,7 @@ def _get_bootstrap_info_for_config(
async def _get_runtime_config(
engine: sgl.Engine, server_args: ServerArgs, dynamo_args: DynamoArgs
engine: sgl.Engine, server_args: ServerArgs, dynamo_args: DynamoConfig
) -> Optional[ModelRuntimeConfig]:
"""Extract runtime configuration from SGLang engine and args.
......@@ -158,8 +158,8 @@ async def _get_runtime_config(
"""
runtime_config = ModelRuntimeConfig()
# set reasoning parser and tool call parser
runtime_config.reasoning_parser = dynamo_args.reasoning_parser
runtime_config.tool_call_parser = dynamo_args.tool_call_parser
runtime_config.reasoning_parser = dynamo_args.dyn_reasoning_parser
runtime_config.tool_call_parser = dynamo_args.dyn_tool_call_parser
# Decode workers don't create the WorkerKvQuery endpoint, so don't advertise local indexer
is_decode_worker = server_args.disaggregation_mode == "decode"
runtime_config.enable_local_indexer = (
......@@ -235,7 +235,7 @@ async def register_model_with_readiness_gate(
engine: sgl.Engine,
generate_endpoint: Endpoint,
server_args: ServerArgs,
dynamo_args: DynamoArgs,
dynamo_args: DynamoConfig,
input_type: Optional[ModelInput] = ModelInput.Tokens,
output_type: Optional[ModelType] = ModelType.Chat | ModelType.Completions,
readiness_gate: Optional[asyncio.Event] = None,
......
......@@ -8,6 +8,7 @@ import sys
from pathlib import Path
import pytest
import yaml
from dynamo.sglang.args import parse_args
from dynamo.sglang.tests.conftest import make_cli_args_fixture
......@@ -92,7 +93,7 @@ async def test_tool_call_parser_valid_with_dynamo_tokenizer(mock_sglang_cli):
config = await parse_args(sys.argv[1:])
assert config.dynamo_args.tool_call_parser == "hermes"
assert config.dynamo_args.dyn_tool_call_parser == "hermes"
@pytest.mark.asyncio
......@@ -120,3 +121,147 @@ async def test_tool_call_parser_both_flags_error(mock_sglang_cli):
with pytest.raises(SystemExit):
await parse_args(sys.argv[1:])
@pytest.mark.asyncio
async def test_namespace_flag_drives_default_endpoint_namespace(mock_sglang_cli):
"""CLI namespace should be used for auto-derived endpoint."""
mock_sglang_cli(
"--model",
"Qwen/Qwen3-0.6B",
"--namespace",
"custom-ns",
)
config = await parse_args(sys.argv[1:])
assert config.dynamo_args.namespace == "custom-ns"
@pytest.mark.asyncio
async def test_obsolete_dyn_endpoint_types_flag_is_supported(mock_sglang_cli):
"""Obsolete --dyn-endpoint-types alias should map to endpoint_types."""
mock_sglang_cli(
"--model",
"Qwen/Qwen3-0.6B",
"--dyn-endpoint-types",
"completions",
)
config = await parse_args(sys.argv[1:])
assert config.dynamo_args.endpoint_types == "completions"
@pytest.mark.asyncio
async def test_disagg_config_requires_disagg_config_key(mock_sglang_cli):
"""--disagg-config and --disagg-config-key must be provided together."""
mock_sglang_cli(
"--model",
"Qwen/Qwen3-0.6B",
"--disagg-config",
"/tmp/nonexistent.yaml",
)
with pytest.raises(ValueError, match="disagg_config.*disagg_config_key.*together"):
await parse_args(sys.argv[1:])
@pytest.mark.asyncio
async def test_disagg_config_key_requires_disagg_config(mock_sglang_cli):
"""--disagg-config-key alone should fail."""
mock_sglang_cli(
"--model",
"Qwen/Qwen3-0.6B",
"--disagg-config-key",
"prefill",
)
with pytest.raises(ValueError, match="disagg_config.*disagg_config_key.*together"):
await parse_args(sys.argv[1:])
@pytest.mark.asyncio
async def test_disagg_config_key_not_found_error(tmp_path, mock_sglang_cli):
"""Missing disagg section key should raise a clear ValueError."""
config_path = tmp_path / "disagg.yaml"
config_path.write_text(
yaml.safe_dump({"prefill": {"tensor_parallel_size": 1}}), encoding="utf-8"
)
mock_sglang_cli(
"--model",
"Qwen/Qwen3-0.6B",
"--disagg-config",
str(config_path),
"--disagg-config-key",
"decode",
)
with pytest.raises(ValueError, match="Disagg config key 'decode' not found"):
await parse_args(sys.argv[1:])
@pytest.mark.asyncio
async def test_disagg_config_section_must_be_dict(tmp_path, mock_sglang_cli):
"""Selected disagg section must be a dictionary."""
config_path = tmp_path / "disagg.yaml"
config_path.write_text(yaml.safe_dump({"prefill": "not-a-dict"}), encoding="utf-8")
mock_sglang_cli(
"--model",
"Qwen/Qwen3-0.6B",
"--disagg-config",
str(config_path),
"--disagg-config-key",
"prefill",
)
with pytest.raises(
ValueError, match="Disagg config section 'prefill' must be a dictionary"
):
await parse_args(sys.argv[1:])
@pytest.mark.asyncio
async def test_disagg_config_preserves_bootstrap_port(tmp_path, mock_sglang_cli):
"""Bootstrap port from disagg section should not be overridden by auto-port logic."""
config_path = tmp_path / "disagg.yaml"
config_path.write_text(
yaml.safe_dump({"prefill": {"disaggregation-bootstrap-port": 42345}}),
encoding="utf-8",
)
mock_sglang_cli(
"--model",
"Qwen/Qwen3-0.6B",
"--disagg-config",
str(config_path),
"--disagg-config-key",
"prefill",
)
config = await parse_args(sys.argv[1:])
assert config.server_args.disaggregation_bootstrap_port == 42345
@pytest.mark.asyncio
async def test_disagg_config_rejects_dynamo_keys(tmp_path, mock_sglang_cli, capfd):
"""Disagg config should only accept SGLang-native keys."""
config_path = tmp_path / "disagg.yaml"
config_path.write_text(
yaml.safe_dump({"prefill": {"store-kv": "mem"}}), encoding="utf-8"
)
mock_sglang_cli(
"--model",
"Qwen/Qwen3-0.6B",
"--disagg-config",
str(config_path),
"--disagg-config-key",
"prefill",
)
with pytest.raises(SystemExit):
await parse_args(sys.argv[1:])
out, err = capfd.readouterr()
assert "unrecognized arguments: --store-kv mem" in err
......@@ -33,7 +33,6 @@ VALID_CONNECTORS = {"nixl", "lmcache", "kvbm", "null", "none"}
class Config(DynamoRuntimeConfig, DynamoVllmConfig):
component: str
endpoint: str
is_prefill_worker: bool
is_decode_worker: bool
custom_jinja_template: Optional[str] = None
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment