Unverified Commit 2cd4288a authored by Hongkuan Zhou's avatar Hongkuan Zhou Committed by GitHub
Browse files

fix(planner): match MDC component field against backend default, not DGD key (#8489)


Signed-off-by: default avatarhongkuanz <hongkuanz@nvidia.com>
parent dfd3aa99
......@@ -388,19 +388,38 @@ class KubernetesConnector(PlannerConnector):
) -> tuple[Optional[str], str]:
"""Return (dgd_service_name, component_name_for_filter).
Uses the DGD when available; otherwise falls back to backend defaults
so that stale/missing DGD state still lets us filter out LoRA cards
by their expected component name.
``dgd_service_name`` is the DGD ``spec.services`` dict key (typically
PascalCase, e.g. ``"VllmPrefillWorker"``) and is used for Kubernetes
operations like patching replica counts.
``component_name_for_filter`` is the component name that the Rust
runtime registers via ``Endpoint`` and writes into the MDC
``component`` field. Source of truth, in priority order:
1. The user's ``--endpoint <ns>.<component>.<ep>`` override in the
worker's container args (supported by all backends --
see vllm/args.py:171-176, sglang/args.py:428, trtllm/args.py:137).
2. The backend-specific default from
:func:`build_worker_info_from_defaults` (e.g. ``"prefill"`` /
``"backend"`` / ``"tensorrt_llm"``).
Note: the DGD services dict key (``service.name``) must NOT be used
here -- it is typically PascalCase (``"VllmPrefillWorker"``) and
would never match the lowercase value the worker writes to MDC.
"""
defaults = build_worker_info_from_defaults(backend, sub_component_type)
expected_component = defaults.component_name or ""
try:
deployment = self.kube_api.get_graph_deployment(self.graph_deployment_name)
service = get_service_from_sub_component_type_or_name(
deployment, sub_component_type
)
return service.name, service.name
user_component = service.get_component_name_from_endpoint_arg()
if user_component:
expected_component = user_component
return service.name, expected_component
except PlannerError:
defaults = build_worker_info_from_defaults(backend, sub_component_type)
return None, defaults.component_name or ""
return None, expected_component
def get_worker_info(
self,
......
......@@ -19,6 +19,7 @@ from typing import Optional
from pydantic import BaseModel
from dynamo.common.utils.runtime import parse_endpoint
from dynamo.planner.config.defaults import SubComponentType
from dynamo.planner.errors import DuplicateSubComponentError, SubComponentNotFoundError
from dynamo.runtime.logging import configure_dynamo_logging
......@@ -71,6 +72,34 @@ class Service(BaseModel):
return None
def get_component_name_from_endpoint_arg(self) -> Optional[str]:
"""Return the component name from ``--endpoint`` in the container args.
Worker backends (vLLM, SGLang, TRT-LLM) accept
``--endpoint <namespace>.<component>.<endpoint_name>`` (optionally
prefixed with ``dyn://``) which overrides the default component
name written to the MDC ``component`` field. When the user sets
this, the Planner's MDC filter must match the user's value, not
the backend default. Returns ``None`` if ``--endpoint`` is not
present or malformed.
"""
args = (
self.service.get("extraPodSpec", {})
.get("mainContainer", {})
.get("args", [])
)
args = break_arguments(args)
if "--endpoint" not in args:
return None
idx = args.index("--endpoint")
if len(args) <= idx + 1:
return None
try:
_, component, _ = parse_endpoint(args[idx + 1])
return component
except ValueError:
return None
def get_gpu_count(self) -> int:
"""Get the GPU count from the service's resource specification.
......
......@@ -1040,3 +1040,252 @@ def test_get_actual_worker_counts_no_components(kubernetes_connector, mock_kube_
assert prefill_count == 0
assert decode_count == 0
assert is_stable is True
# Tests for _resolve_dgd_service / get_worker_info component-filter.
#
# Regression: the filter that compares an MDC entry's ``component`` field
# against ``expected_component`` must use the lowercase backend-default
# name (what the Rust runtime writes to MDC), NOT the DGD ``spec.services``
# dict key. The DGD key is typically PascalCase (``VllmPrefillWorker``)
# while MDC carries the Endpoint name (``prefill`` / ``backend`` /
# ``tensorrt_llm``); returning the DGD key for the filter would cause
# every real-world MDC entry to be skipped, leaving WorkerInfo without
# ``context_length`` and silently breaking easy-mode load scaling.
def test_resolve_dgd_service_prefill_uses_backend_default_for_filter(
kubernetes_connector, mock_kube_api
):
"""vLLM prefill: filter name = "prefill" (MDC side), not DGD services key."""
mock_deployment = {
"metadata": {"name": "test-graph"},
"spec": {
"services": {
"VllmPrefillWorker": {
"replicas": 1,
"subComponentType": "prefill",
},
}
},
}
mock_kube_api.get_graph_deployment.return_value = mock_deployment
dgd_service_name, expected_component = kubernetes_connector._resolve_dgd_service(
SubComponentType.PREFILL, backend="vllm"
)
# k8s operations (e.g. replica patch) still target the PascalCase DGD key.
assert dgd_service_name == "VllmPrefillWorker"
# The filter side must match what the Rust runtime writes to MDC.
assert expected_component == "prefill"
def test_resolve_dgd_service_decode_uses_backend_default_for_filter(
kubernetes_connector, mock_kube_api
):
"""vLLM decode: MDC carries "backend", NOT "decode"; filter must match that."""
mock_deployment = {
"metadata": {"name": "test-graph"},
"spec": {
"services": {
"VllmDecodeWorker": {
"replicas": 1,
"subComponentType": "decode",
},
}
},
}
mock_kube_api.get_graph_deployment.return_value = mock_deployment
dgd_service_name, expected_component = kubernetes_connector._resolve_dgd_service(
SubComponentType.DECODE, backend="vllm"
)
assert dgd_service_name == "VllmDecodeWorker"
# Critically, vLLM's decode-worker component name is "backend" (from
# VllmComponentName.decode_worker_component_name). Using
# SubComponentType.DECODE.value ("decode") here would break decode
# filtering on every backend.
assert expected_component == "backend"
def test_resolve_dgd_service_trtllm_decode_uses_tensorrt_llm_name(
kubernetes_connector, mock_kube_api
):
"""TRT-LLM decode: MDC carries "tensorrt_llm"; filter must match."""
mock_deployment = {
"metadata": {"name": "test-graph"},
"spec": {
"services": {
"TRTLLMDecodeWorker": {
"replicas": 1,
"subComponentType": "decode",
},
}
},
}
mock_kube_api.get_graph_deployment.return_value = mock_deployment
_, expected_component = kubernetes_connector._resolve_dgd_service(
SubComponentType.DECODE, backend="trtllm"
)
assert expected_component == "tensorrt_llm"
def test_resolve_dgd_service_missing_dgd_still_returns_backend_default(
kubernetes_connector, mock_kube_api
):
"""When DGD lookup fails, still return the backend default for filtering."""
mock_kube_api.get_graph_deployment.side_effect = DynamoGraphDeploymentNotFoundError(
"test-graph", "default"
)
dgd_service_name, expected_component = kubernetes_connector._resolve_dgd_service(
SubComponentType.PREFILL, backend="vllm"
)
assert dgd_service_name is None
assert expected_component == "prefill"
def test_resolve_dgd_service_respects_user_endpoint_override(
kubernetes_connector, mock_kube_api
):
"""If the DGD passes --endpoint ns.comp.ep, the MDC filter must use 'comp'."""
mock_deployment = {
"metadata": {"name": "test-graph"},
"spec": {
"services": {
"VllmPrefillWorker": {
"replicas": 1,
"subComponentType": "prefill",
"extraPodSpec": {
"mainContainer": {
"args": [
"--endpoint",
"my-ns.my-custom-prefill.generate",
"--model",
"Qwen/Qwen3-8B",
]
}
},
},
}
},
}
mock_kube_api.get_graph_deployment.return_value = mock_deployment
dgd_service_name, expected_component = kubernetes_connector._resolve_dgd_service(
SubComponentType.PREFILL, backend="vllm"
)
# k8s operations still target the DGD services key.
assert dgd_service_name == "VllmPrefillWorker"
# Filter must match what the worker will actually write to MDC, which
# comes from the user's --endpoint override, not the backend default.
assert expected_component == "my-custom-prefill"
def test_resolve_dgd_service_endpoint_override_with_dyn_prefix(
kubernetes_connector, mock_kube_api
):
"""parse_endpoint accepts 'dyn://' prefix; the extracted component must strip it."""
mock_deployment = {
"metadata": {"name": "test-graph"},
"spec": {
"services": {
"VllmDecodeWorker": {
"replicas": 1,
"subComponentType": "decode",
"extraPodSpec": {
"mainContainer": {
"args": [
"--endpoint",
"dyn://ns.user-decode.generate",
]
}
},
},
}
},
}
mock_kube_api.get_graph_deployment.return_value = mock_deployment
_, expected_component = kubernetes_connector._resolve_dgd_service(
SubComponentType.DECODE, backend="vllm"
)
assert expected_component == "user-decode"
def test_resolve_dgd_service_malformed_endpoint_falls_back_to_default(
kubernetes_connector, mock_kube_api
):
"""Malformed --endpoint (wrong number of parts) falls back to backend default."""
mock_deployment = {
"metadata": {"name": "test-graph"},
"spec": {
"services": {
"VllmPrefillWorker": {
"replicas": 1,
"subComponentType": "prefill",
"extraPodSpec": {
"mainContainer": {
"args": ["--endpoint", "only-two.parts"],
}
},
},
}
},
}
mock_kube_api.get_graph_deployment.return_value = mock_deployment
_, expected_component = kubernetes_connector._resolve_dgd_service(
SubComponentType.PREFILL, backend="vllm"
)
assert expected_component == "prefill"
def test_service_get_component_name_from_endpoint_arg_present():
service = Service(
name="VllmPrefillWorker",
service={
"extraPodSpec": {
"mainContainer": {
"args": [
"--endpoint",
"ns.custom-comp.generate",
"--other",
"flag",
]
}
}
},
)
assert service.get_component_name_from_endpoint_arg() == "custom-comp"
def test_service_get_component_name_from_endpoint_arg_absent():
service = Service(
name="VllmPrefillWorker",
service={
"extraPodSpec": {
"mainContainer": {
"args": ["--model", "Qwen/Qwen3-8B"],
}
}
},
)
assert service.get_component_name_from_endpoint_arg() is None
def test_service_get_component_name_from_endpoint_arg_missing_value():
"""--endpoint with no following arg should return None, not raise IndexError."""
service = Service(
name="VllmPrefillWorker",
service={"extraPodSpec": {"mainContainer": {"args": ["--endpoint"]}}},
)
assert service.get_component_name_from_endpoint_arg() is None
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment