Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
2cd4288a
Unverified
Commit
2cd4288a
authored
Apr 22, 2026
by
Hongkuan Zhou
Committed by
GitHub
Apr 22, 2026
Browse files
fix(planner): match MDC component field against backend default, not DGD key (#8489)
Signed-off-by:
hongkuanz
<
hongkuanz@nvidia.com
>
parent
dfd3aa99
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
303 additions
and
6 deletions
+303
-6
components/src/dynamo/planner/connectors/kubernetes.py
components/src/dynamo/planner/connectors/kubernetes.py
+25
-6
components/src/dynamo/planner/monitoring/dgd_services.py
components/src/dynamo/planner/monitoring/dgd_services.py
+29
-0
components/src/dynamo/planner/tests/unit/test_kubernetes_connector.py
...rc/dynamo/planner/tests/unit/test_kubernetes_connector.py
+249
-0
No files found.
components/src/dynamo/planner/connectors/kubernetes.py
View file @
2cd4288a
...
@@ -388,19 +388,38 @@ class KubernetesConnector(PlannerConnector):
...
@@ -388,19 +388,38 @@ class KubernetesConnector(PlannerConnector):
)
->
tuple
[
Optional
[
str
],
str
]:
)
->
tuple
[
Optional
[
str
],
str
]:
"""Return (dgd_service_name, component_name_for_filter).
"""Return (dgd_service_name, component_name_for_filter).
Uses the DGD when available; otherwise falls back to backend defaults
``dgd_service_name`` is the DGD ``spec.services`` dict key (typically
so that stale/missing DGD state still lets us filter out LoRA cards
PascalCase, e.g. ``"VllmPrefillWorker"``) and is used for Kubernetes
by their expected component name.
operations like patching replica counts.
``component_name_for_filter`` is the component name that the Rust
runtime registers via ``Endpoint`` and writes into the MDC
``component`` field. Source of truth, in priority order:
1. The user's ``--endpoint <ns>.<component>.<ep>`` override in the
worker's container args (supported by all backends --
see vllm/args.py:171-176, sglang/args.py:428, trtllm/args.py:137).
2. The backend-specific default from
:func:`build_worker_info_from_defaults` (e.g. ``"prefill"`` /
``"backend"`` / ``"tensorrt_llm"``).
Note: the DGD services dict key (``service.name``) must NOT be used
here -- it is typically PascalCase (``"VllmPrefillWorker"``) and
would never match the lowercase value the worker writes to MDC.
"""
"""
defaults
=
build_worker_info_from_defaults
(
backend
,
sub_component_type
)
expected_component
=
defaults
.
component_name
or
""
try
:
try
:
deployment
=
self
.
kube_api
.
get_graph_deployment
(
self
.
graph_deployment_name
)
deployment
=
self
.
kube_api
.
get_graph_deployment
(
self
.
graph_deployment_name
)
service
=
get_service_from_sub_component_type_or_name
(
service
=
get_service_from_sub_component_type_or_name
(
deployment
,
sub_component_type
deployment
,
sub_component_type
)
)
return
service
.
name
,
service
.
name
user_component
=
service
.
get_component_name_from_endpoint_arg
()
if
user_component
:
expected_component
=
user_component
return
service
.
name
,
expected_component
except
PlannerError
:
except
PlannerError
:
defaults
=
build_worker_info_from_defaults
(
backend
,
sub_component_type
)
return
None
,
expected_component
return
None
,
defaults
.
component_name
or
""
def
get_worker_info
(
def
get_worker_info
(
self
,
self
,
...
...
components/src/dynamo/planner/monitoring/dgd_services.py
View file @
2cd4288a
...
@@ -19,6 +19,7 @@ from typing import Optional
...
@@ -19,6 +19,7 @@ from typing import Optional
from
pydantic
import
BaseModel
from
pydantic
import
BaseModel
from
dynamo.common.utils.runtime
import
parse_endpoint
from
dynamo.planner.config.defaults
import
SubComponentType
from
dynamo.planner.config.defaults
import
SubComponentType
from
dynamo.planner.errors
import
DuplicateSubComponentError
,
SubComponentNotFoundError
from
dynamo.planner.errors
import
DuplicateSubComponentError
,
SubComponentNotFoundError
from
dynamo.runtime.logging
import
configure_dynamo_logging
from
dynamo.runtime.logging
import
configure_dynamo_logging
...
@@ -71,6 +72,34 @@ class Service(BaseModel):
...
@@ -71,6 +72,34 @@ class Service(BaseModel):
return
None
return
None
def
get_component_name_from_endpoint_arg
(
self
)
->
Optional
[
str
]:
"""Return the component name from ``--endpoint`` in the container args.
Worker backends (vLLM, SGLang, TRT-LLM) accept
``--endpoint <namespace>.<component>.<endpoint_name>`` (optionally
prefixed with ``dyn://``) which overrides the default component
name written to the MDC ``component`` field. When the user sets
this, the Planner's MDC filter must match the user's value, not
the backend default. Returns ``None`` if ``--endpoint`` is not
present or malformed.
"""
args
=
(
self
.
service
.
get
(
"extraPodSpec"
,
{})
.
get
(
"mainContainer"
,
{})
.
get
(
"args"
,
[])
)
args
=
break_arguments
(
args
)
if
"--endpoint"
not
in
args
:
return
None
idx
=
args
.
index
(
"--endpoint"
)
if
len
(
args
)
<=
idx
+
1
:
return
None
try
:
_
,
component
,
_
=
parse_endpoint
(
args
[
idx
+
1
])
return
component
except
ValueError
:
return
None
def
get_gpu_count
(
self
)
->
int
:
def
get_gpu_count
(
self
)
->
int
:
"""Get the GPU count from the service's resource specification.
"""Get the GPU count from the service's resource specification.
...
...
components/src/dynamo/planner/tests/unit/test_kubernetes_connector.py
View file @
2cd4288a
...
@@ -1040,3 +1040,252 @@ def test_get_actual_worker_counts_no_components(kubernetes_connector, mock_kube_
...
@@ -1040,3 +1040,252 @@ def test_get_actual_worker_counts_no_components(kubernetes_connector, mock_kube_
assert
prefill_count
==
0
assert
prefill_count
==
0
assert
decode_count
==
0
assert
decode_count
==
0
assert
is_stable
is
True
assert
is_stable
is
True
# Tests for _resolve_dgd_service / get_worker_info component-filter.
#
# Regression: the filter that compares an MDC entry's ``component`` field
# against ``expected_component`` must use the lowercase backend-default
# name (what the Rust runtime writes to MDC), NOT the DGD ``spec.services``
# dict key. The DGD key is typically PascalCase (``VllmPrefillWorker``)
# while MDC carries the Endpoint name (``prefill`` / ``backend`` /
# ``tensorrt_llm``); returning the DGD key for the filter would cause
# every real-world MDC entry to be skipped, leaving WorkerInfo without
# ``context_length`` and silently breaking easy-mode load scaling.
def
test_resolve_dgd_service_prefill_uses_backend_default_for_filter
(
kubernetes_connector
,
mock_kube_api
):
"""vLLM prefill: filter name = "prefill" (MDC side), not DGD services key."""
mock_deployment
=
{
"metadata"
:
{
"name"
:
"test-graph"
},
"spec"
:
{
"services"
:
{
"VllmPrefillWorker"
:
{
"replicas"
:
1
,
"subComponentType"
:
"prefill"
,
},
}
},
}
mock_kube_api
.
get_graph_deployment
.
return_value
=
mock_deployment
dgd_service_name
,
expected_component
=
kubernetes_connector
.
_resolve_dgd_service
(
SubComponentType
.
PREFILL
,
backend
=
"vllm"
)
# k8s operations (e.g. replica patch) still target the PascalCase DGD key.
assert
dgd_service_name
==
"VllmPrefillWorker"
# The filter side must match what the Rust runtime writes to MDC.
assert
expected_component
==
"prefill"
def
test_resolve_dgd_service_decode_uses_backend_default_for_filter
(
kubernetes_connector
,
mock_kube_api
):
"""vLLM decode: MDC carries "backend", NOT "decode"; filter must match that."""
mock_deployment
=
{
"metadata"
:
{
"name"
:
"test-graph"
},
"spec"
:
{
"services"
:
{
"VllmDecodeWorker"
:
{
"replicas"
:
1
,
"subComponentType"
:
"decode"
,
},
}
},
}
mock_kube_api
.
get_graph_deployment
.
return_value
=
mock_deployment
dgd_service_name
,
expected_component
=
kubernetes_connector
.
_resolve_dgd_service
(
SubComponentType
.
DECODE
,
backend
=
"vllm"
)
assert
dgd_service_name
==
"VllmDecodeWorker"
# Critically, vLLM's decode-worker component name is "backend" (from
# VllmComponentName.decode_worker_component_name). Using
# SubComponentType.DECODE.value ("decode") here would break decode
# filtering on every backend.
assert
expected_component
==
"backend"
def
test_resolve_dgd_service_trtllm_decode_uses_tensorrt_llm_name
(
kubernetes_connector
,
mock_kube_api
):
"""TRT-LLM decode: MDC carries "tensorrt_llm"; filter must match."""
mock_deployment
=
{
"metadata"
:
{
"name"
:
"test-graph"
},
"spec"
:
{
"services"
:
{
"TRTLLMDecodeWorker"
:
{
"replicas"
:
1
,
"subComponentType"
:
"decode"
,
},
}
},
}
mock_kube_api
.
get_graph_deployment
.
return_value
=
mock_deployment
_
,
expected_component
=
kubernetes_connector
.
_resolve_dgd_service
(
SubComponentType
.
DECODE
,
backend
=
"trtllm"
)
assert
expected_component
==
"tensorrt_llm"
def
test_resolve_dgd_service_missing_dgd_still_returns_backend_default
(
kubernetes_connector
,
mock_kube_api
):
"""When DGD lookup fails, still return the backend default for filtering."""
mock_kube_api
.
get_graph_deployment
.
side_effect
=
DynamoGraphDeploymentNotFoundError
(
"test-graph"
,
"default"
)
dgd_service_name
,
expected_component
=
kubernetes_connector
.
_resolve_dgd_service
(
SubComponentType
.
PREFILL
,
backend
=
"vllm"
)
assert
dgd_service_name
is
None
assert
expected_component
==
"prefill"
def
test_resolve_dgd_service_respects_user_endpoint_override
(
kubernetes_connector
,
mock_kube_api
):
"""If the DGD passes --endpoint ns.comp.ep, the MDC filter must use 'comp'."""
mock_deployment
=
{
"metadata"
:
{
"name"
:
"test-graph"
},
"spec"
:
{
"services"
:
{
"VllmPrefillWorker"
:
{
"replicas"
:
1
,
"subComponentType"
:
"prefill"
,
"extraPodSpec"
:
{
"mainContainer"
:
{
"args"
:
[
"--endpoint"
,
"my-ns.my-custom-prefill.generate"
,
"--model"
,
"Qwen/Qwen3-8B"
,
]
}
},
},
}
},
}
mock_kube_api
.
get_graph_deployment
.
return_value
=
mock_deployment
dgd_service_name
,
expected_component
=
kubernetes_connector
.
_resolve_dgd_service
(
SubComponentType
.
PREFILL
,
backend
=
"vllm"
)
# k8s operations still target the DGD services key.
assert
dgd_service_name
==
"VllmPrefillWorker"
# Filter must match what the worker will actually write to MDC, which
# comes from the user's --endpoint override, not the backend default.
assert
expected_component
==
"my-custom-prefill"
def
test_resolve_dgd_service_endpoint_override_with_dyn_prefix
(
kubernetes_connector
,
mock_kube_api
):
"""parse_endpoint accepts 'dyn://' prefix; the extracted component must strip it."""
mock_deployment
=
{
"metadata"
:
{
"name"
:
"test-graph"
},
"spec"
:
{
"services"
:
{
"VllmDecodeWorker"
:
{
"replicas"
:
1
,
"subComponentType"
:
"decode"
,
"extraPodSpec"
:
{
"mainContainer"
:
{
"args"
:
[
"--endpoint"
,
"dyn://ns.user-decode.generate"
,
]
}
},
},
}
},
}
mock_kube_api
.
get_graph_deployment
.
return_value
=
mock_deployment
_
,
expected_component
=
kubernetes_connector
.
_resolve_dgd_service
(
SubComponentType
.
DECODE
,
backend
=
"vllm"
)
assert
expected_component
==
"user-decode"
def
test_resolve_dgd_service_malformed_endpoint_falls_back_to_default
(
kubernetes_connector
,
mock_kube_api
):
"""Malformed --endpoint (wrong number of parts) falls back to backend default."""
mock_deployment
=
{
"metadata"
:
{
"name"
:
"test-graph"
},
"spec"
:
{
"services"
:
{
"VllmPrefillWorker"
:
{
"replicas"
:
1
,
"subComponentType"
:
"prefill"
,
"extraPodSpec"
:
{
"mainContainer"
:
{
"args"
:
[
"--endpoint"
,
"only-two.parts"
],
}
},
},
}
},
}
mock_kube_api
.
get_graph_deployment
.
return_value
=
mock_deployment
_
,
expected_component
=
kubernetes_connector
.
_resolve_dgd_service
(
SubComponentType
.
PREFILL
,
backend
=
"vllm"
)
assert
expected_component
==
"prefill"
def
test_service_get_component_name_from_endpoint_arg_present
():
service
=
Service
(
name
=
"VllmPrefillWorker"
,
service
=
{
"extraPodSpec"
:
{
"mainContainer"
:
{
"args"
:
[
"--endpoint"
,
"ns.custom-comp.generate"
,
"--other"
,
"flag"
,
]
}
}
},
)
assert
service
.
get_component_name_from_endpoint_arg
()
==
"custom-comp"
def
test_service_get_component_name_from_endpoint_arg_absent
():
service
=
Service
(
name
=
"VllmPrefillWorker"
,
service
=
{
"extraPodSpec"
:
{
"mainContainer"
:
{
"args"
:
[
"--model"
,
"Qwen/Qwen3-8B"
],
}
}
},
)
assert
service
.
get_component_name_from_endpoint_arg
()
is
None
def
test_service_get_component_name_from_endpoint_arg_missing_value
():
"""--endpoint with no following arg should return None, not raise IndexError."""
service
=
Service
(
name
=
"VllmPrefillWorker"
,
service
=
{
"extraPodSpec"
:
{
"mainContainer"
:
{
"args"
:
[
"--endpoint"
]}}},
)
assert
service
.
get_component_name_from_endpoint_arg
()
is
None
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment