Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
37adc0a8
Unverified
Commit
37adc0a8
authored
Aug 28, 2025
by
julienmancuso
Committed by
GitHub
Aug 28, 2025
Browse files
feat: update planner to use DYN_PARENT_DGD_K8S_NAME (#2774)
Signed-off-by:
Julien Mancuso
<
jmancuso@nvidia.com
>
parent
e28ff8d2
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
123 additions
and
115 deletions
+123
-115
components/planner/src/dynamo/planner/kube.py
components/planner/src/dynamo/planner/kube.py
+20
-59
components/planner/src/dynamo/planner/kubernetes_connector.py
...onents/planner/src/dynamo/planner/kubernetes_connector.py
+10
-49
components/planner/test/kube.py
components/planner/test/kube.py
+38
-0
components/planner/test/kubernetes_connector.py
components/planner/test/kubernetes_connector.py
+55
-7
No files found.
components/planner/src/dynamo/planner/kube.py
View file @
37adc0a8
...
@@ -14,6 +14,7 @@
...
@@ -14,6 +14,7 @@
# limitations under the License.
# limitations under the License.
import
asyncio
import
asyncio
import
os
from
typing
import
Optional
from
typing
import
Optional
from
kubernetes
import
client
,
config
from
kubernetes
import
client
,
config
...
@@ -53,77 +54,37 @@ class KubernetesAPI:
...
@@ -53,77 +54,37 @@ class KubernetesAPI:
name
=
graph_deployment_name
,
name
=
graph_deployment_name
,
)
)
async
def
get_graph_deployment
(
async
def
get_parent_graph_deployment
(
self
)
->
Optional
[
dict
]:
self
,
component_name
:
str
,
dynamo_namespace
:
str
)
->
Optional
[
dict
]:
"""
"""
Get DynamoGraphDeployment by first finding the associated DynamoComponentDeployment
Get the parent DynamoGraphDeployment using environment variable.
and then retrieving its owner reference.
Args:
Uses DYN_PARENT_DGD_K8S_NAME environment variable and assumes the DGD
component_name: The name of the component
is in the same namespace as this component (self.current_namespace).
dynamo_namespace: The dynamo namespace
Returns:
Returns:
The DynamoGraphDeployment object or None if
not found
The DynamoGraphDeployment object or None if
env var is not set
"""
"""
try
:
dgd_name
=
os
.
getenv
(
"DYN_PARENT_DGD_K8S_NAME"
)
# First, find the DynamoComponentDeployment using the component name and namespace labels
label_selector
=
f
"nvidia.com/dynamo-component=
{
component_name
}
,nvidia.com/dynamo-namespace=
{
dynamo_namespace
}
"
component_deployments
=
self
.
custom_api
.
list_namespaced_custom_object
(
group
=
"nvidia.com"
,
version
=
"v1alpha1"
,
namespace
=
self
.
current_namespace
,
plural
=
"dynamocomponentdeployments"
,
label_selector
=
label_selector
,
)
items
=
component_deployments
.
get
(
"items"
,
[])
if
not
items
:
return
None
if
len
(
items
)
>
1
:
raise
ValueError
(
f
"Multiple component deployments found for component
{
component_name
}
in dynamo namespace
{
dynamo_namespace
}
. "
"Expected exactly one deployment."
)
# Get the component deployment and extract the owner reference
component_deployment
=
items
[
0
]
owner_refs
=
component_deployment
.
get
(
"metadata"
,
{}).
get
(
"ownerReferences"
,
[]
)
# Find the DynamoGraphDeployment in the owner references
graph_deployment_ref
=
None
for
ref
in
owner_refs
:
if
(
ref
.
get
(
"apiVersion"
)
==
"nvidia.com/v1alpha1"
and
ref
.
get
(
"kind"
)
==
"DynamoGraphDeployment"
):
graph_deployment_ref
=
ref
break
if
not
graph_deployment_ref
:
return
None
# Get the actual DynamoGraphDeployment using the name from the owner reference
graph_deployment_name
=
graph_deployment_ref
.
get
(
"name"
)
if
not
graph_deployment_name
:
return
None
graph_deployment
=
self
.
_get_graph_deployment_from_name
(
graph_deployment_name
)
return
graph_deployment
if
not
dgd_name
:
return
None
try
:
return
self
.
_get_graph_deployment_from_name
(
dgd_name
)
except
client
.
ApiException
as
e
:
except
client
.
ApiException
as
e
:
if
e
.
status
==
404
:
if
e
.
status
==
404
:
return
None
return
None
raise
raise
async
def
get_graph_deployment
(
self
)
->
Optional
[
dict
]:
"""
Get the parent DynamoGraphDeployment using environment variable.
Returns:
The DynamoGraphDeployment object or None if env var is not set
"""
return
await
self
.
get_parent_graph_deployment
()
async
def
update_graph_replicas
(
async
def
update_graph_replicas
(
self
,
graph_deployment_name
:
str
,
component_name
:
str
,
replicas
:
int
self
,
graph_deployment_name
:
str
,
component_name
:
str
,
replicas
:
int
)
->
None
:
)
->
None
:
...
...
components/planner/src/dynamo/planner/kubernetes_connector.py
View file @
37adc0a8
...
@@ -32,13 +32,9 @@ class KubernetesConnector(PlannerConnector):
...
@@ -32,13 +32,9 @@ class KubernetesConnector(PlannerConnector):
async
def
add_component
(
self
,
component_name
:
str
,
blocking
:
bool
=
True
):
async
def
add_component
(
self
,
component_name
:
str
,
blocking
:
bool
=
True
):
"""Add a component by increasing its replica count by 1"""
"""Add a component by increasing its replica count by 1"""
deployment
=
await
self
.
kube_api
.
get_graph_deployment
(
deployment
=
await
self
.
kube_api
.
get_graph_deployment
()
component_name
,
self
.
dynamo_namespace
)
if
deployment
is
None
:
if
deployment
is
None
:
raise
ValueError
(
raise
ValueError
(
"Parent DynamoGraphDeployment not found"
)
f
"Graph not found for component
{
component_name
}
in dynamo namespace
{
self
.
dynamo_namespace
}
"
)
# get current replicas or 1 if not found
# get current replicas or 1 if not found
current_replicas
=
self
.
_get_current_replicas
(
deployment
,
component_name
)
current_replicas
=
self
.
_get_current_replicas
(
deployment
,
component_name
)
...
@@ -55,13 +51,9 @@ class KubernetesConnector(PlannerConnector):
...
@@ -55,13 +51,9 @@ class KubernetesConnector(PlannerConnector):
async
def
remove_component
(
self
,
component_name
:
str
,
blocking
:
bool
=
True
):
async
def
remove_component
(
self
,
component_name
:
str
,
blocking
:
bool
=
True
):
"""Remove a component by decreasing its replica count by 1"""
"""Remove a component by decreasing its replica count by 1"""
deployment
=
await
self
.
kube_api
.
get_graph_deployment
(
deployment
=
await
self
.
kube_api
.
get_graph_deployment
()
component_name
,
self
.
dynamo_namespace
)
if
deployment
is
None
:
if
deployment
is
None
:
raise
ValueError
(
raise
ValueError
(
"Parent DynamoGraphDeployment not found"
)
f
"Graph
{
component_name
}
not found for namespace
{
self
.
dynamo_namespace
}
"
)
# get current replicas or 1 if not found
# get current replicas or 1 if not found
current_replicas
=
self
.
_get_current_replicas
(
deployment
,
component_name
)
current_replicas
=
self
.
_get_current_replicas
(
deployment
,
component_name
)
...
@@ -76,48 +68,17 @@ class KubernetesConnector(PlannerConnector):
...
@@ -76,48 +68,17 @@ class KubernetesConnector(PlannerConnector):
self
.
_get_graph_deployment_name
(
deployment
)
self
.
_get_graph_deployment_name
(
deployment
)
)
)
async
def
_validate_components_same_deployment
(
async
def
set_component_replicas
(
self
,
target_replicas
:
dict
[
str
,
int
]
self
,
target_replicas
:
dict
[
str
,
int
],
blocking
:
bool
=
True
)
->
dict
:
):
"""
"""Set the replicas for multiple components at once"""
Validate that all target components belong to the same DynamoGraphDeployment.
"""
if
not
target_replicas
:
if
not
target_replicas
:
raise
ValueError
(
"target_replicas cannot be empty"
)
raise
ValueError
(
"target_replicas cannot be empty"
)
# Get deployment for first component
deployment
=
await
self
.
kube_api
.
get_graph_deployment
()
first_component
=
next
(
iter
(
target_replicas
))
deployment
=
await
self
.
kube_api
.
get_graph_deployment
(
first_component
,
self
.
dynamo_namespace
)
if
deployment
is
None
:
if
deployment
is
None
:
raise
ValueError
(
raise
ValueError
(
"Parent DynamoGraphDeployment not found"
)
f
"Component
{
first_component
}
not found in namespace
{
self
.
dynamo_namespace
}
"
)
# Validate that all components belong to the same DGD
graph_name
=
deployment
[
"metadata"
][
"name"
]
for
component
in
target_replicas
:
comp_deployment
=
await
self
.
kube_api
.
get_graph_deployment
(
component
,
self
.
dynamo_namespace
)
if
comp_deployment
is
None
:
raise
ValueError
(
f
"Component
{
component
}
not found in namespace
{
self
.
dynamo_namespace
}
"
)
if
comp_deployment
[
"metadata"
][
"name"
]
!=
graph_name
:
raise
ValueError
(
f
"Component
{
component
}
belongs to graph '
{
comp_deployment
[
'metadata'
][
'name'
]
}
' "
f
"but expected graph '
{
graph_name
}
'. All components must belong to the same GraphDeployment."
)
return
deployment
async
def
set_component_replicas
(
self
,
target_replicas
:
dict
[
str
,
int
],
blocking
:
bool
=
True
):
"""Set the replicas for multiple components at once"""
deployment
=
await
self
.
_validate_components_same_deployment
(
target_replicas
)
if
not
await
self
.
kube_api
.
is_deployment_ready
(
if
not
await
self
.
kube_api
.
is_deployment_ready
(
self
.
_get_graph_deployment_name
(
deployment
)
self
.
_get_graph_deployment_name
(
deployment
)
):
):
...
...
components/planner/test/kube.py
View file @
37adc0a8
...
@@ -13,6 +13,7 @@
...
@@ -13,6 +13,7 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
import
os
from
typing
import
Any
,
Dict
from
typing
import
Any
,
Dict
from
unittest.mock
import
MagicMock
,
patch
from
unittest.mock
import
MagicMock
,
patch
...
@@ -245,3 +246,40 @@ async def test_wait_for_graph_deployment_ready_on_second_attempt(
...
@@ -245,3 +246,40 @@ async def test_wait_for_graph_deployment_ready_on_second_attempt(
await
k8s_api
.
wait_for_graph_deployment_ready
(
await
k8s_api
.
wait_for_graph_deployment_ready
(
"test-deployment"
,
max_attempts
=
2
,
delay_seconds
=
0.1
"test-deployment"
,
max_attempts
=
2
,
delay_seconds
=
0.1
)
)
@
pytest
.
mark
.
asyncio
async
def
test_get_parent_graph_deployment_with_env_var
(
k8s_api
,
mock_custom_api
):
"""Test get_parent_graph_deployment with environment variable set"""
mock_deployment
=
{
"metadata"
:
{
"name"
:
"parent-dgd"
}}
with
patch
.
dict
(
os
.
environ
,
{
"DYN_PARENT_DGD_K8S_NAME"
:
"parent-dgd"
}):
with
patch
.
object
(
k8s_api
,
"_get_graph_deployment_from_name"
,
return_value
=
mock_deployment
)
as
mock_get
:
result
=
await
k8s_api
.
get_parent_graph_deployment
()
assert
result
==
mock_deployment
mock_get
.
assert_called_once_with
(
"parent-dgd"
)
@
pytest
.
mark
.
asyncio
async
def
test_get_parent_graph_deployment_without_env_var
(
k8s_api
,
mock_custom_api
):
"""Test get_parent_graph_deployment without environment variable"""
with
patch
.
dict
(
os
.
environ
,
{},
clear
=
True
):
result
=
await
k8s_api
.
get_parent_graph_deployment
()
assert
result
is
None
@
pytest
.
mark
.
asyncio
async
def
test_get_graph_deployment_delegates_to_parent
(
k8s_api
,
mock_custom_api
):
"""Test get_graph_deployment delegates to get_parent_graph_deployment"""
mock_deployment
=
{
"metadata"
:
{
"name"
:
"parent-dgd"
}}
with
patch
.
object
(
k8s_api
,
"get_parent_graph_deployment"
,
return_value
=
mock_deployment
)
as
mock_parent
:
result
=
await
k8s_api
.
get_graph_deployment
()
assert
result
==
mock_deployment
mock_parent
.
assert_called_once
()
components/planner/test/kubernetes_connector.py
View file @
37adc0a8
...
@@ -26,6 +26,7 @@ def mock_kube_api():
...
@@ -26,6 +26,7 @@ def mock_kube_api():
mock_api
.
get_graph_deployment
=
AsyncMock
()
mock_api
.
get_graph_deployment
=
AsyncMock
()
mock_api
.
update_graph_replicas
=
AsyncMock
()
mock_api
.
update_graph_replicas
=
AsyncMock
()
mock_api
.
wait_for_graph_deployment_ready
=
AsyncMock
()
mock_api
.
wait_for_graph_deployment_ready
=
AsyncMock
()
mock_api
.
is_deployment_ready
=
AsyncMock
()
return
mock_api
return
mock_api
...
@@ -42,7 +43,7 @@ def kubernetes_connector(mock_kube_api_class, monkeypatch):
...
@@ -42,7 +43,7 @@ def kubernetes_connector(mock_kube_api_class, monkeypatch):
monkeypatch
.
setattr
(
monkeypatch
.
setattr
(
"dynamo.planner.kubernetes_connector.KubernetesAPI"
,
mock_kube_api_class
"dynamo.planner.kubernetes_connector.KubernetesAPI"
,
mock_kube_api_class
)
)
connector
=
KubernetesConnector
(
"default"
)
connector
=
KubernetesConnector
(
"test-dynamo-namespace"
,
"default"
)
return
connector
return
connector
...
@@ -62,9 +63,7 @@ async def test_add_component_increases_replicas(kubernetes_connector, mock_kube_
...
@@ -62,9 +63,7 @@ async def test_add_component_increases_replicas(kubernetes_connector, mock_kube_
await
kubernetes_connector
.
add_component
(
component_name
)
await
kubernetes_connector
.
add_component
(
component_name
)
# Assert
# Assert
mock_kube_api
.
get_graph_deployment
.
assert_called_once_with
(
mock_kube_api
.
get_graph_deployment
.
assert_called_once
()
component_name
,
kubernetes_connector
.
dynamo_namespace
)
mock_kube_api
.
update_graph_replicas
.
assert_called_once_with
(
mock_kube_api
.
update_graph_replicas
.
assert_called_once_with
(
"test-graph"
,
component_name
,
2
"test-graph"
,
component_name
,
2
)
)
...
@@ -100,9 +99,7 @@ async def test_add_component_deployment_not_found(kubernetes_connector, mock_kub
...
@@ -100,9 +99,7 @@ async def test_add_component_deployment_not_found(kubernetes_connector, mock_kub
mock_kube_api
.
get_graph_deployment
.
return_value
=
None
mock_kube_api
.
get_graph_deployment
.
return_value
=
None
# Act & Assert
# Act & Assert
with
pytest
.
raises
(
with
pytest
.
raises
(
ValueError
,
match
=
"Parent DynamoGraphDeployment not found"
):
ValueError
,
match
=
f
"Graph not found for component
{
component_name
}
"
):
await
kubernetes_connector
.
add_component
(
component_name
)
await
kubernetes_connector
.
add_component
(
component_name
)
...
@@ -142,3 +139,54 @@ async def test_remove_component_with_zero_replicas(kubernetes_connector, mock_ku
...
@@ -142,3 +139,54 @@ async def test_remove_component_with_zero_replicas(kubernetes_connector, mock_ku
# Assert
# Assert
mock_kube_api
.
update_graph_replicas
.
assert_not_called
()
mock_kube_api
.
update_graph_replicas
.
assert_not_called
()
mock_kube_api
.
wait_for_graph_deployment_ready
.
assert_not_called
()
mock_kube_api
.
wait_for_graph_deployment_ready
.
assert_not_called
()
@
pytest
.
mark
.
asyncio
async
def
test_set_component_replicas
(
kubernetes_connector
,
mock_kube_api
):
# Arrange
target_replicas
=
{
"component1"
:
3
,
"component2"
:
2
}
mock_deployment
=
{
"metadata"
:
{
"name"
:
"test-graph"
},
"spec"
:
{
"services"
:
{
"component1"
:
{
"replicas"
:
1
},
"component2"
:
{
"replicas"
:
1
}}
},
}
mock_kube_api
.
get_graph_deployment
.
return_value
=
mock_deployment
mock_kube_api
.
is_deployment_ready
.
return_value
=
True
mock_kube_api
.
update_graph_replicas
.
return_value
=
None
mock_kube_api
.
wait_for_graph_deployment_ready
.
return_value
=
None
# Act
await
kubernetes_connector
.
set_component_replicas
(
target_replicas
)
# Assert
mock_kube_api
.
get_graph_deployment
.
assert_called_once
()
mock_kube_api
.
is_deployment_ready
.
assert_called_once_with
(
"test-graph"
)
# Should be called twice, once for each component
assert
mock_kube_api
.
update_graph_replicas
.
call_count
==
2
mock_kube_api
.
wait_for_graph_deployment_ready
.
assert_called_once_with
(
"test-graph"
)
@
pytest
.
mark
.
asyncio
async
def
test_set_component_replicas_deployment_not_found
(
kubernetes_connector
,
mock_kube_api
):
# Arrange
target_replicas
=
{
"component1"
:
3
}
mock_kube_api
.
get_graph_deployment
.
return_value
=
None
# Act & Assert
with
pytest
.
raises
(
ValueError
,
match
=
"Parent DynamoGraphDeployment not found"
):
await
kubernetes_connector
.
set_component_replicas
(
target_replicas
)
@
pytest
.
mark
.
asyncio
async
def
test_set_component_replicas_empty_target_replicas
(
kubernetes_connector
,
mock_kube_api
):
# Arrange
target_replicas
:
dict
[
str
,
int
]
=
{}
# Act & Assert
with
pytest
.
raises
(
ValueError
,
match
=
"target_replicas cannot be empty"
):
await
kubernetes_connector
.
set_component_replicas
(
target_replicas
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment