Unverified Commit 9352da7a authored by Alec's avatar Alec Committed by GitHub
Browse files

feat: add --endpoint flag support to dynamo.vllm (#6360)


Signed-off-by: default avataralec-flowers <aflowers@nvidia.com>
Co-authored-by: default avatarClaude Opus 4.6 <noreply@anthropic.com>
parent bfabb5d1
......@@ -74,7 +74,7 @@ class DynamoRuntimeArgGroup(ArgGroup):
flag_name="--endpoint",
env_var="DYN_ENDPOINT",
default=None,
help="Dynamo endpoint string in 'dyn://namespace.component.endpoint' format. Example: dyn://dynamo.backend.generate. Currently used only by TRT-LLM and SGLang backends.",
help="Dynamo endpoint string in 'dyn://namespace.component.endpoint' format. Example: dyn://dynamo.backend.generate.",
)
add_argument(
g,
......
......@@ -21,6 +21,7 @@ from dynamo.common.configuration.groups.runtime_args import (
DynamoRuntimeArgGroup,
DynamoRuntimeConfig,
)
from dynamo.common.utils.runtime import parse_endpoint
from dynamo.vllm.backend_args import DynamoVllmArgGroup, DynamoVllmConfig
from . import envs
......@@ -152,6 +153,9 @@ def update_dynamo_config_with_engine(
else:
dynamo_config.served_model_name = None
# Capture user-provided --endpoint before defaults overwrite it
user_endpoint = dynamo_config.endpoint
# TODO: move to "disaggregation_mode" as the other engines.
if dynamo_config.route_to_encoder:
dynamo_config.component = "processor"
......@@ -178,6 +182,13 @@ def update_dynamo_config_with_engine(
dynamo_config.component = "backend"
dynamo_config.endpoint = "generate"
# If user provided --endpoint, override namespace/component/endpoint
if user_endpoint is not None:
parsed_ns, parsed_comp, parsed_ep = parse_endpoint(user_endpoint)
dynamo_config.namespace = parsed_ns
dynamo_config.component = parsed_comp
dynamo_config.endpoint = parsed_ep
if dynamo_config.custom_jinja_template is not None:
expanded_template_path = os.path.expanduser(
os.path.expandvars(dynamo_config.custom_jinja_template)
......
......@@ -140,3 +140,56 @@ def test_model_express_url_none_for_default_load_format(mock_vllm_cli):
mock_vllm_cli("--model", "Qwen/Qwen3-0.6B")
config = parse_args()
assert config.model_express_url is None
# --endpoint flag tests
def test_endpoint_overrides_defaults(mock_vllm_cli):
"""Test that --endpoint overrides default namespace/component/endpoint."""
mock_vllm_cli(
"--model",
"Qwen/Qwen3-0.6B",
"--endpoint",
"dyn://mynamespace.mycomponent.myendpoint",
)
config = parse_args()
assert config.namespace == "mynamespace"
assert config.component == "mycomponent"
assert config.endpoint == "myendpoint"
def test_endpoint_not_provided_preserves_defaults(mock_vllm_cli):
"""Test that without --endpoint, defaults are preserved."""
mock_vllm_cli("--model", "Qwen/Qwen3-0.6B")
config = parse_args()
assert config.namespace == "dynamo"
assert config.component == "backend"
assert config.endpoint == "generate"
def test_endpoint_overrides_with_prefill_worker(mock_vllm_cli):
"""Test that --endpoint overrides even with --is-prefill-worker."""
mock_vllm_cli(
"--model",
"Qwen/Qwen3-0.6B",
"--endpoint",
"dyn://custom.worker.serve",
"--is-prefill-worker",
)
config = parse_args()
assert config.namespace == "custom"
assert config.component == "worker"
assert config.endpoint == "serve"
def test_endpoint_invalid_format_raises(mock_vllm_cli):
"""Test that invalid --endpoint format raises ValueError."""
mock_vllm_cli(
"--model",
"Qwen/Qwen3-0.6B",
"--endpoint",
"invalid-endpoint",
)
with pytest.raises(ValueError, match="Invalid endpoint format"):
parse_args()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment