Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
c939da0c
"platforms/reference/vscode:/vscode.git/clone" did not exist on "8c693ef797c77f482d0d65a6ae01cb6981c3d825"
Unverified
Commit
c939da0c
authored
May 30, 2025
by
mohammedabdulwahhab
Committed by
GitHub
May 30, 2025
Browse files
fix: wait until probing on vllm examples to prevent timeouts (#1293)
parent
98a5fab1
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
29 additions
and
34 deletions
+29
-34
deploy/cloud/operator/internal/controller/dynamocomponentdeployment_controller.go
...ternal/controller/dynamocomponentdeployment_controller.go
+24
-0
deploy/cloud/operator/internal/controller/dynamocomponentdeployment_controller_test.go
...l/controller/dynamocomponentdeployment_controller_test.go
+0
-29
deploy/sdk/src/dynamo/sdk/cli/serve_dynamo.py
deploy/sdk/src/dynamo/sdk/cli/serve_dynamo.py
+3
-3
examples/llm/configs/disagg.yaml
examples/llm/configs/disagg.yaml
+2
-2
No files found.
deploy/cloud/operator/internal/controller/dynamocomponentdeployment_controller.go
View file @
c939da0c
...
...
@@ -595,6 +595,12 @@ func (r *DynamoComponentDeploymentReconciler) generateLeaderPodTemplateSpec(ctx
return
nil
,
fmt
.
Errorf
(
"generateLeaderPodTemplateSpec: GPU limit is not set for Ray leader pod"
)
}
// TODO: Liveness and readiness probes are temporarily disabled for leader worker sets
// until we implement proper probe configuration that can differentiate between
// leader and worker pods.
leaderPodTemplateSpec
.
Spec
.
Containers
[
0
]
.
LivenessProbe
=
nil
leaderPodTemplateSpec
.
Spec
.
Containers
[
0
]
.
ReadinessProbe
=
nil
leaderPodTemplateSpec
.
Spec
.
Containers
[
0
]
.
Args
[
0
]
=
fmt
.
Sprintf
(
"ray start --head --port=6379 && %s"
,
currentArgs
)
return
leaderPodTemplateSpec
,
nil
...
...
@@ -634,6 +640,12 @@ func (r *DynamoComponentDeploymentReconciler) generateWorkerPodTemplateSpec(ctx
return
nil
,
fmt
.
Errorf
(
"generateWorkerPodTemplateSpec: GPU limit is not set for Ray worker pod"
)
}
// TODO: Liveness and readiness probes are temporarily disabled for leader worker sets
// until we implement proper probe configuration that can differentiate between
// leader and worker pods.
workerPodTemplateSpec
.
Spec
.
Containers
[
0
]
.
LivenessProbe
=
nil
workerPodTemplateSpec
.
Spec
.
Containers
[
0
]
.
ReadinessProbe
=
nil
workerPodTemplateSpec
.
Spec
.
Containers
[
0
]
.
Args
[
0
]
=
"ray start --address=$(LWS_LEADER_ADDRESS):6379 --block"
return
workerPodTemplateSpec
,
nil
...
...
@@ -1569,6 +1581,12 @@ func (r *DynamoComponentDeploymentReconciler) generatePodTemplateSpec(ctx contex
// Set default probes if none are provided
if
livenessProbe
==
nil
{
container
.
LivenessProbe
=
&
corev1
.
Probe
{
// TODO: Initial delay and other probe settings should be read off sdk, these are default settings that should cover vllm / hello-world
InitialDelaySeconds
:
60
,
// 1 minute
PeriodSeconds
:
60
,
// Check every 1 minute
TimeoutSeconds
:
5
,
// 5 second timeout
FailureThreshold
:
10
,
// Allow 10 failures before declaring unhealthy
SuccessThreshold
:
1
,
// Need 1 success to be considered healthy
ProbeHandler
:
corev1
.
ProbeHandler
{
HTTPGet
:
&
corev1
.
HTTPGetAction
{
Path
:
"/healthz"
,
...
...
@@ -1580,6 +1598,12 @@ func (r *DynamoComponentDeploymentReconciler) generatePodTemplateSpec(ctx contex
if
readinessProbe
==
nil
{
container
.
ReadinessProbe
=
&
corev1
.
Probe
{
// TODO: Initial delay and other probe settings should be read off sdk, these are default settings that should cover vllm / hello-world
InitialDelaySeconds
:
60
,
// 1 minute
PeriodSeconds
:
60
,
// Check every 1 minute
TimeoutSeconds
:
5
,
// 5 second timeout
FailureThreshold
:
10
,
// Allow 10 failures before declaring not ready
SuccessThreshold
:
1
,
// Need 1 success to be considered ready
ProbeHandler
:
corev1
.
ProbeHandler
{
HTTPGet
:
&
corev1
.
HTTPGetAction
{
Path
:
"/readyz"
,
...
...
deploy/cloud/operator/internal/controller/dynamocomponentdeployment_controller_test.go
View file @
c939da0c
...
...
@@ -38,7 +38,6 @@ import (
networkingv1
"k8s.io/api/networking/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1
"k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/client-go/kubernetes/scheme"
"k8s.io/client-go/tools/record"
"k8s.io/utils/ptr"
...
...
@@ -962,20 +961,6 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
"nvidia.com/gpu"
:
resource
.
MustParse
(
"1"
),
},
},
LivenessProbe
:
&
corev1
.
Probe
{
ProbeHandler
:
corev1
.
ProbeHandler
{
HTTPGet
:
&
corev1
.
HTTPGetAction
{
Path
:
"/healthz"
,
Port
:
intstr
.
FromString
(
commonconsts
.
DynamoHealthPortName
),
},
},
},
ReadinessProbe
:
&
corev1
.
Probe
{
ProbeHandler
:
corev1
.
ProbeHandler
{
HTTPGet
:
&
corev1
.
HTTPGetAction
{
Path
:
"/readyz"
,
Port
:
intstr
.
FromString
(
commonconsts
.
DynamoHealthPortName
),
},
},
},
},
},
Volumes
:
[]
corev1
.
Volume
{{
Name
:
"shared-memory"
,
VolumeSource
:
corev1
.
VolumeSource
{
EmptyDir
:
&
corev1
.
EmptyDirVolumeSource
{
Medium
:
corev1
.
StorageMediumMemory
,
SizeLimit
:
limit
}}}},
...
...
@@ -1014,20 +999,6 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
Requests
:
corev1
.
ResourceList
{
corev1
.
ResourceCPU
:
resource
.
MustParse
(
"300m"
),
corev1
.
ResourceMemory
:
resource
.
MustParse
(
"500Mi"
)},
Limits
:
corev1
.
ResourceList
{
corev1
.
ResourceCPU
:
resource
.
MustParse
(
"500m"
),
corev1
.
ResourceMemory
:
resource
.
MustParse
(
"1Gi"
),
"nvidia.com/gpu"
:
resource
.
MustParse
(
"1"
)},
},
LivenessProbe
:
&
corev1
.
Probe
{
ProbeHandler
:
corev1
.
ProbeHandler
{
HTTPGet
:
&
corev1
.
HTTPGetAction
{
Path
:
"/healthz"
,
Port
:
intstr
.
FromString
(
commonconsts
.
DynamoHealthPortName
),
},
},
},
ReadinessProbe
:
&
corev1
.
Probe
{
ProbeHandler
:
corev1
.
ProbeHandler
{
HTTPGet
:
&
corev1
.
HTTPGetAction
{
Path
:
"/readyz"
,
Port
:
intstr
.
FromString
(
commonconsts
.
DynamoHealthPortName
),
},
},
},
},
},
Volumes
:
[]
corev1
.
Volume
{{
Name
:
"shared-memory"
,
VolumeSource
:
corev1
.
VolumeSource
{
EmptyDir
:
&
corev1
.
EmptyDirVolumeSource
{
Medium
:
corev1
.
StorageMediumMemory
,
SizeLimit
:
limit
}}}},
...
...
deploy/sdk/src/dynamo/sdk/cli/serve_dynamo.py
View file @
c939da0c
...
...
@@ -87,7 +87,7 @@ def add_fastapi_routes(app, service, class_instance):
return
added_routes
app
=
typer
.
Typer
()
app
=
typer
.
Typer
(
pretty_exceptions_enable
=
False
)
@
app
.
command
()
...
...
@@ -207,6 +207,8 @@ def main(
dynamo_context
[
"component"
]
=
component
dynamo_context
[
"endpoints"
]
=
endpoints
class_instance
=
service
.
inner
()
# signal that class_instance (and its setup) is done
instanceReady
.
set
()
dynamo_handlers
=
[]
for
name
,
endpoint
in
dynamo_endpoints
.
items
():
if
DynamoTransport
.
DEFAULT
in
endpoint
.
transports
:
...
...
@@ -234,8 +236,6 @@ def main(
logger
.
info
(
f
"Starting
{
service
.
name
}
instance with all registered endpoints"
)
# signal that class_instance (and its setup) is done
instanceReady
.
set
()
# Launch serve_endpoint for all endpoints concurrently
tasks
=
[
endpoint
.
serve_endpoint
(
handler
)
...
...
examples/llm/configs/disagg.yaml
View file @
c939da0c
...
...
@@ -35,7 +35,7 @@ VllmWorker:
ServiceArgs
:
workers
:
1
resources
:
gpu
:
1
gpu
:
'
1'
common-configs
:
[
model
,
block-size
,
max-model-len
,
kv-transfer-config
]
PrefillWorker
:
...
...
@@ -43,7 +43,7 @@ PrefillWorker:
ServiceArgs
:
workers
:
1
resources
:
gpu
:
1
gpu
:
'
1'
common-configs
:
[
model
,
block-size
,
max-model-len
,
kv-transfer-config
]
Planner
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment