Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
c939da0c
Unverified
Commit
c939da0c
authored
May 30, 2025
by
mohammedabdulwahhab
Committed by
GitHub
May 30, 2025
Browse files
fix: wait until probing on vllm examples to prevent timeouts (#1293)
parent
98a5fab1
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
29 additions
and
34 deletions
+29
-34
deploy/cloud/operator/internal/controller/dynamocomponentdeployment_controller.go
...ternal/controller/dynamocomponentdeployment_controller.go
+24
-0
deploy/cloud/operator/internal/controller/dynamocomponentdeployment_controller_test.go
...l/controller/dynamocomponentdeployment_controller_test.go
+0
-29
deploy/sdk/src/dynamo/sdk/cli/serve_dynamo.py
deploy/sdk/src/dynamo/sdk/cli/serve_dynamo.py
+3
-3
examples/llm/configs/disagg.yaml
examples/llm/configs/disagg.yaml
+2
-2
No files found.
deploy/cloud/operator/internal/controller/dynamocomponentdeployment_controller.go
View file @
c939da0c
...
@@ -595,6 +595,12 @@ func (r *DynamoComponentDeploymentReconciler) generateLeaderPodTemplateSpec(ctx
...
@@ -595,6 +595,12 @@ func (r *DynamoComponentDeploymentReconciler) generateLeaderPodTemplateSpec(ctx
return
nil
,
fmt
.
Errorf
(
"generateLeaderPodTemplateSpec: GPU limit is not set for Ray leader pod"
)
return
nil
,
fmt
.
Errorf
(
"generateLeaderPodTemplateSpec: GPU limit is not set for Ray leader pod"
)
}
}
// TODO: Liveness and readiness probes are temporarily disabled for leader worker sets
// until we implement proper probe configuration that can differentiate between
// leader and worker pods.
leaderPodTemplateSpec
.
Spec
.
Containers
[
0
]
.
LivenessProbe
=
nil
leaderPodTemplateSpec
.
Spec
.
Containers
[
0
]
.
ReadinessProbe
=
nil
leaderPodTemplateSpec
.
Spec
.
Containers
[
0
]
.
Args
[
0
]
=
fmt
.
Sprintf
(
"ray start --head --port=6379 && %s"
,
currentArgs
)
leaderPodTemplateSpec
.
Spec
.
Containers
[
0
]
.
Args
[
0
]
=
fmt
.
Sprintf
(
"ray start --head --port=6379 && %s"
,
currentArgs
)
return
leaderPodTemplateSpec
,
nil
return
leaderPodTemplateSpec
,
nil
...
@@ -634,6 +640,12 @@ func (r *DynamoComponentDeploymentReconciler) generateWorkerPodTemplateSpec(ctx
...
@@ -634,6 +640,12 @@ func (r *DynamoComponentDeploymentReconciler) generateWorkerPodTemplateSpec(ctx
return
nil
,
fmt
.
Errorf
(
"generateWorkerPodTemplateSpec: GPU limit is not set for Ray worker pod"
)
return
nil
,
fmt
.
Errorf
(
"generateWorkerPodTemplateSpec: GPU limit is not set for Ray worker pod"
)
}
}
// TODO: Liveness and readiness probes are temporarily disabled for leader worker sets
// until we implement proper probe configuration that can differentiate between
// leader and worker pods.
workerPodTemplateSpec
.
Spec
.
Containers
[
0
]
.
LivenessProbe
=
nil
workerPodTemplateSpec
.
Spec
.
Containers
[
0
]
.
ReadinessProbe
=
nil
workerPodTemplateSpec
.
Spec
.
Containers
[
0
]
.
Args
[
0
]
=
"ray start --address=$(LWS_LEADER_ADDRESS):6379 --block"
workerPodTemplateSpec
.
Spec
.
Containers
[
0
]
.
Args
[
0
]
=
"ray start --address=$(LWS_LEADER_ADDRESS):6379 --block"
return
workerPodTemplateSpec
,
nil
return
workerPodTemplateSpec
,
nil
...
@@ -1569,6 +1581,12 @@ func (r *DynamoComponentDeploymentReconciler) generatePodTemplateSpec(ctx contex
...
@@ -1569,6 +1581,12 @@ func (r *DynamoComponentDeploymentReconciler) generatePodTemplateSpec(ctx contex
// Set default probes if none are provided
// Set default probes if none are provided
if
livenessProbe
==
nil
{
if
livenessProbe
==
nil
{
container
.
LivenessProbe
=
&
corev1
.
Probe
{
container
.
LivenessProbe
=
&
corev1
.
Probe
{
// TODO: Initial delay and other probe settings should be read off sdk, these are default settings that should cover vllm / hello-world
InitialDelaySeconds
:
60
,
// 1 minute
PeriodSeconds
:
60
,
// Check every 1 minute
TimeoutSeconds
:
5
,
// 5 second timeout
FailureThreshold
:
10
,
// Allow 10 failures before declaring unhealthy
SuccessThreshold
:
1
,
// Need 1 success to be considered healthy
ProbeHandler
:
corev1
.
ProbeHandler
{
ProbeHandler
:
corev1
.
ProbeHandler
{
HTTPGet
:
&
corev1
.
HTTPGetAction
{
HTTPGet
:
&
corev1
.
HTTPGetAction
{
Path
:
"/healthz"
,
Path
:
"/healthz"
,
...
@@ -1580,6 +1598,12 @@ func (r *DynamoComponentDeploymentReconciler) generatePodTemplateSpec(ctx contex
...
@@ -1580,6 +1598,12 @@ func (r *DynamoComponentDeploymentReconciler) generatePodTemplateSpec(ctx contex
if
readinessProbe
==
nil
{
if
readinessProbe
==
nil
{
container
.
ReadinessProbe
=
&
corev1
.
Probe
{
container
.
ReadinessProbe
=
&
corev1
.
Probe
{
// TODO: Initial delay and other probe settings should be read off sdk, these are default settings that should cover vllm / hello-world
InitialDelaySeconds
:
60
,
// 1 minute
PeriodSeconds
:
60
,
// Check every 1 minute
TimeoutSeconds
:
5
,
// 5 second timeout
FailureThreshold
:
10
,
// Allow 10 failures before declaring not ready
SuccessThreshold
:
1
,
// Need 1 success to be considered ready
ProbeHandler
:
corev1
.
ProbeHandler
{
ProbeHandler
:
corev1
.
ProbeHandler
{
HTTPGet
:
&
corev1
.
HTTPGetAction
{
HTTPGet
:
&
corev1
.
HTTPGetAction
{
Path
:
"/readyz"
,
Path
:
"/readyz"
,
...
...
deploy/cloud/operator/internal/controller/dynamocomponentdeployment_controller_test.go
View file @
c939da0c
...
@@ -38,7 +38,6 @@ import (
...
@@ -38,7 +38,6 @@ import (
networkingv1
"k8s.io/api/networking/v1"
networkingv1
"k8s.io/api/networking/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/api/resource"
metav1
"k8s.io/apimachinery/pkg/apis/meta/v1"
metav1
"k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/client-go/kubernetes/scheme"
"k8s.io/client-go/kubernetes/scheme"
"k8s.io/client-go/tools/record"
"k8s.io/client-go/tools/record"
"k8s.io/utils/ptr"
"k8s.io/utils/ptr"
...
@@ -962,20 +961,6 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
...
@@ -962,20 +961,6 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
"nvidia.com/gpu"
:
resource
.
MustParse
(
"1"
),
"nvidia.com/gpu"
:
resource
.
MustParse
(
"1"
),
},
},
},
},
LivenessProbe
:
&
corev1
.
Probe
{
ProbeHandler
:
corev1
.
ProbeHandler
{
HTTPGet
:
&
corev1
.
HTTPGetAction
{
Path
:
"/healthz"
,
Port
:
intstr
.
FromString
(
commonconsts
.
DynamoHealthPortName
),
},
},
},
ReadinessProbe
:
&
corev1
.
Probe
{
ProbeHandler
:
corev1
.
ProbeHandler
{
HTTPGet
:
&
corev1
.
HTTPGetAction
{
Path
:
"/readyz"
,
Port
:
intstr
.
FromString
(
commonconsts
.
DynamoHealthPortName
),
},
},
},
},
},
},
},
Volumes
:
[]
corev1
.
Volume
{{
Name
:
"shared-memory"
,
VolumeSource
:
corev1
.
VolumeSource
{
EmptyDir
:
&
corev1
.
EmptyDirVolumeSource
{
Medium
:
corev1
.
StorageMediumMemory
,
SizeLimit
:
limit
}}}},
Volumes
:
[]
corev1
.
Volume
{{
Name
:
"shared-memory"
,
VolumeSource
:
corev1
.
VolumeSource
{
EmptyDir
:
&
corev1
.
EmptyDirVolumeSource
{
Medium
:
corev1
.
StorageMediumMemory
,
SizeLimit
:
limit
}}}},
...
@@ -1014,20 +999,6 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
...
@@ -1014,20 +999,6 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
Requests
:
corev1
.
ResourceList
{
corev1
.
ResourceCPU
:
resource
.
MustParse
(
"300m"
),
corev1
.
ResourceMemory
:
resource
.
MustParse
(
"500Mi"
)},
Requests
:
corev1
.
ResourceList
{
corev1
.
ResourceCPU
:
resource
.
MustParse
(
"300m"
),
corev1
.
ResourceMemory
:
resource
.
MustParse
(
"500Mi"
)},
Limits
:
corev1
.
ResourceList
{
corev1
.
ResourceCPU
:
resource
.
MustParse
(
"500m"
),
corev1
.
ResourceMemory
:
resource
.
MustParse
(
"1Gi"
),
"nvidia.com/gpu"
:
resource
.
MustParse
(
"1"
)},
Limits
:
corev1
.
ResourceList
{
corev1
.
ResourceCPU
:
resource
.
MustParse
(
"500m"
),
corev1
.
ResourceMemory
:
resource
.
MustParse
(
"1Gi"
),
"nvidia.com/gpu"
:
resource
.
MustParse
(
"1"
)},
},
},
LivenessProbe
:
&
corev1
.
Probe
{
ProbeHandler
:
corev1
.
ProbeHandler
{
HTTPGet
:
&
corev1
.
HTTPGetAction
{
Path
:
"/healthz"
,
Port
:
intstr
.
FromString
(
commonconsts
.
DynamoHealthPortName
),
},
},
},
ReadinessProbe
:
&
corev1
.
Probe
{
ProbeHandler
:
corev1
.
ProbeHandler
{
HTTPGet
:
&
corev1
.
HTTPGetAction
{
Path
:
"/readyz"
,
Port
:
intstr
.
FromString
(
commonconsts
.
DynamoHealthPortName
),
},
},
},
},
},
},
},
Volumes
:
[]
corev1
.
Volume
{{
Name
:
"shared-memory"
,
VolumeSource
:
corev1
.
VolumeSource
{
EmptyDir
:
&
corev1
.
EmptyDirVolumeSource
{
Medium
:
corev1
.
StorageMediumMemory
,
SizeLimit
:
limit
}}}},
Volumes
:
[]
corev1
.
Volume
{{
Name
:
"shared-memory"
,
VolumeSource
:
corev1
.
VolumeSource
{
EmptyDir
:
&
corev1
.
EmptyDirVolumeSource
{
Medium
:
corev1
.
StorageMediumMemory
,
SizeLimit
:
limit
}}}},
...
...
deploy/sdk/src/dynamo/sdk/cli/serve_dynamo.py
View file @
c939da0c
...
@@ -87,7 +87,7 @@ def add_fastapi_routes(app, service, class_instance):
...
@@ -87,7 +87,7 @@ def add_fastapi_routes(app, service, class_instance):
return
added_routes
return
added_routes
app
=
typer
.
Typer
()
app
=
typer
.
Typer
(
pretty_exceptions_enable
=
False
)
@
app
.
command
()
@
app
.
command
()
...
@@ -207,6 +207,8 @@ def main(
...
@@ -207,6 +207,8 @@ def main(
dynamo_context
[
"component"
]
=
component
dynamo_context
[
"component"
]
=
component
dynamo_context
[
"endpoints"
]
=
endpoints
dynamo_context
[
"endpoints"
]
=
endpoints
class_instance
=
service
.
inner
()
class_instance
=
service
.
inner
()
# signal that class_instance (and its setup) is done
instanceReady
.
set
()
dynamo_handlers
=
[]
dynamo_handlers
=
[]
for
name
,
endpoint
in
dynamo_endpoints
.
items
():
for
name
,
endpoint
in
dynamo_endpoints
.
items
():
if
DynamoTransport
.
DEFAULT
in
endpoint
.
transports
:
if
DynamoTransport
.
DEFAULT
in
endpoint
.
transports
:
...
@@ -234,8 +236,6 @@ def main(
...
@@ -234,8 +236,6 @@ def main(
logger
.
info
(
logger
.
info
(
f
"Starting
{
service
.
name
}
instance with all registered endpoints"
f
"Starting
{
service
.
name
}
instance with all registered endpoints"
)
)
# signal that class_instance (and its setup) is done
instanceReady
.
set
()
# Launch serve_endpoint for all endpoints concurrently
# Launch serve_endpoint for all endpoints concurrently
tasks
=
[
tasks
=
[
endpoint
.
serve_endpoint
(
handler
)
endpoint
.
serve_endpoint
(
handler
)
...
...
examples/llm/configs/disagg.yaml
View file @
c939da0c
...
@@ -35,7 +35,7 @@ VllmWorker:
...
@@ -35,7 +35,7 @@ VllmWorker:
ServiceArgs
:
ServiceArgs
:
workers
:
1
workers
:
1
resources
:
resources
:
gpu
:
1
gpu
:
'
1'
common-configs
:
[
model
,
block-size
,
max-model-len
,
kv-transfer-config
]
common-configs
:
[
model
,
block-size
,
max-model-len
,
kv-transfer-config
]
PrefillWorker
:
PrefillWorker
:
...
@@ -43,7 +43,7 @@ PrefillWorker:
...
@@ -43,7 +43,7 @@ PrefillWorker:
ServiceArgs
:
ServiceArgs
:
workers
:
1
workers
:
1
resources
:
resources
:
gpu
:
1
gpu
:
'
1'
common-configs
:
[
model
,
block-size
,
max-model-len
,
kv-transfer-config
]
common-configs
:
[
model
,
block-size
,
max-model-len
,
kv-transfer-config
]
Planner
:
Planner
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment