Unverified Commit c939da0c authored by mohammedabdulwahhab's avatar mohammedabdulwahhab Committed by GitHub
Browse files

fix: wait until probing on vllm examples to prevent timeouts (#1293)

parent 98a5fab1
...@@ -595,6 +595,12 @@ func (r *DynamoComponentDeploymentReconciler) generateLeaderPodTemplateSpec(ctx ...@@ -595,6 +595,12 @@ func (r *DynamoComponentDeploymentReconciler) generateLeaderPodTemplateSpec(ctx
return nil, fmt.Errorf("generateLeaderPodTemplateSpec: GPU limit is not set for Ray leader pod") return nil, fmt.Errorf("generateLeaderPodTemplateSpec: GPU limit is not set for Ray leader pod")
} }
// TODO: Liveness and readiness probes are temporarily disabled for leader worker sets
// until we implement proper probe configuration that can differentiate between
// leader and worker pods.
leaderPodTemplateSpec.Spec.Containers[0].LivenessProbe = nil
leaderPodTemplateSpec.Spec.Containers[0].ReadinessProbe = nil
leaderPodTemplateSpec.Spec.Containers[0].Args[0] = fmt.Sprintf("ray start --head --port=6379 && %s", currentArgs) leaderPodTemplateSpec.Spec.Containers[0].Args[0] = fmt.Sprintf("ray start --head --port=6379 && %s", currentArgs)
return leaderPodTemplateSpec, nil return leaderPodTemplateSpec, nil
...@@ -634,6 +640,12 @@ func (r *DynamoComponentDeploymentReconciler) generateWorkerPodTemplateSpec(ctx ...@@ -634,6 +640,12 @@ func (r *DynamoComponentDeploymentReconciler) generateWorkerPodTemplateSpec(ctx
return nil, fmt.Errorf("generateWorkerPodTemplateSpec: GPU limit is not set for Ray worker pod") return nil, fmt.Errorf("generateWorkerPodTemplateSpec: GPU limit is not set for Ray worker pod")
} }
// TODO: Liveness and readiness probes are temporarily disabled for leader worker sets
// until we implement proper probe configuration that can differentiate between
// leader and worker pods.
workerPodTemplateSpec.Spec.Containers[0].LivenessProbe = nil
workerPodTemplateSpec.Spec.Containers[0].ReadinessProbe = nil
workerPodTemplateSpec.Spec.Containers[0].Args[0] = "ray start --address=$(LWS_LEADER_ADDRESS):6379 --block" workerPodTemplateSpec.Spec.Containers[0].Args[0] = "ray start --address=$(LWS_LEADER_ADDRESS):6379 --block"
return workerPodTemplateSpec, nil return workerPodTemplateSpec, nil
...@@ -1569,6 +1581,12 @@ func (r *DynamoComponentDeploymentReconciler) generatePodTemplateSpec(ctx contex ...@@ -1569,6 +1581,12 @@ func (r *DynamoComponentDeploymentReconciler) generatePodTemplateSpec(ctx contex
// Set default probes if none are provided // Set default probes if none are provided
if livenessProbe == nil { if livenessProbe == nil {
container.LivenessProbe = &corev1.Probe{ container.LivenessProbe = &corev1.Probe{
// TODO: Initial delay and other probe settings should be read off sdk, these are default settings that should cover vllm / hello-world
InitialDelaySeconds: 60, // 1 minute
PeriodSeconds: 60, // Check every 1 minute
TimeoutSeconds: 5, // 5 second timeout
FailureThreshold: 10, // Allow 10 failures before declaring unhealthy
SuccessThreshold: 1, // Need 1 success to be considered healthy
ProbeHandler: corev1.ProbeHandler{ ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{ HTTPGet: &corev1.HTTPGetAction{
Path: "/healthz", Path: "/healthz",
...@@ -1580,6 +1598,12 @@ func (r *DynamoComponentDeploymentReconciler) generatePodTemplateSpec(ctx contex ...@@ -1580,6 +1598,12 @@ func (r *DynamoComponentDeploymentReconciler) generatePodTemplateSpec(ctx contex
if readinessProbe == nil { if readinessProbe == nil {
container.ReadinessProbe = &corev1.Probe{ container.ReadinessProbe = &corev1.Probe{
// TODO: Initial delay and other probe settings should be read off sdk, these are default settings that should cover vllm / hello-world
InitialDelaySeconds: 60, // 1 minute
PeriodSeconds: 60, // Check every 1 minute
TimeoutSeconds: 5, // 5 second timeout
FailureThreshold: 10, // Allow 10 failures before declaring not ready
SuccessThreshold: 1, // Need 1 success to be considered ready
ProbeHandler: corev1.ProbeHandler{ ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{ HTTPGet: &corev1.HTTPGetAction{
Path: "/readyz", Path: "/readyz",
......
...@@ -38,7 +38,6 @@ import ( ...@@ -38,7 +38,6 @@ import (
networkingv1 "k8s.io/api/networking/v1" networkingv1 "k8s.io/api/networking/v1"
"k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/kubernetes/scheme"
"k8s.io/client-go/tools/record" "k8s.io/client-go/tools/record"
"k8s.io/utils/ptr" "k8s.io/utils/ptr"
...@@ -962,20 +961,6 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing. ...@@ -962,20 +961,6 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
"nvidia.com/gpu": resource.MustParse("1"), "nvidia.com/gpu": resource.MustParse("1"),
}, },
}, },
LivenessProbe: &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{
Path: "/healthz", Port: intstr.FromString(commonconsts.DynamoHealthPortName),
},
},
},
ReadinessProbe: &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{
Path: "/readyz", Port: intstr.FromString(commonconsts.DynamoHealthPortName),
},
},
},
}, },
}, },
Volumes: []corev1.Volume{{Name: "shared-memory", VolumeSource: corev1.VolumeSource{EmptyDir: &corev1.EmptyDirVolumeSource{Medium: corev1.StorageMediumMemory, SizeLimit: limit}}}}, Volumes: []corev1.Volume{{Name: "shared-memory", VolumeSource: corev1.VolumeSource{EmptyDir: &corev1.EmptyDirVolumeSource{Medium: corev1.StorageMediumMemory, SizeLimit: limit}}}},
...@@ -1014,20 +999,6 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing. ...@@ -1014,20 +999,6 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
Requests: corev1.ResourceList{corev1.ResourceCPU: resource.MustParse("300m"), corev1.ResourceMemory: resource.MustParse("500Mi")}, Requests: corev1.ResourceList{corev1.ResourceCPU: resource.MustParse("300m"), corev1.ResourceMemory: resource.MustParse("500Mi")},
Limits: corev1.ResourceList{corev1.ResourceCPU: resource.MustParse("500m"), corev1.ResourceMemory: resource.MustParse("1Gi"), "nvidia.com/gpu": resource.MustParse("1")}, Limits: corev1.ResourceList{corev1.ResourceCPU: resource.MustParse("500m"), corev1.ResourceMemory: resource.MustParse("1Gi"), "nvidia.com/gpu": resource.MustParse("1")},
}, },
LivenessProbe: &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{
Path: "/healthz", Port: intstr.FromString(commonconsts.DynamoHealthPortName),
},
},
},
ReadinessProbe: &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{
Path: "/readyz", Port: intstr.FromString(commonconsts.DynamoHealthPortName),
},
},
},
}, },
}, },
Volumes: []corev1.Volume{{Name: "shared-memory", VolumeSource: corev1.VolumeSource{EmptyDir: &corev1.EmptyDirVolumeSource{Medium: corev1.StorageMediumMemory, SizeLimit: limit}}}}, Volumes: []corev1.Volume{{Name: "shared-memory", VolumeSource: corev1.VolumeSource{EmptyDir: &corev1.EmptyDirVolumeSource{Medium: corev1.StorageMediumMemory, SizeLimit: limit}}}},
......
...@@ -87,7 +87,7 @@ def add_fastapi_routes(app, service, class_instance): ...@@ -87,7 +87,7 @@ def add_fastapi_routes(app, service, class_instance):
return added_routes return added_routes
app = typer.Typer() app = typer.Typer(pretty_exceptions_enable=False)
@app.command() @app.command()
...@@ -207,6 +207,8 @@ def main( ...@@ -207,6 +207,8 @@ def main(
dynamo_context["component"] = component dynamo_context["component"] = component
dynamo_context["endpoints"] = endpoints dynamo_context["endpoints"] = endpoints
class_instance = service.inner() class_instance = service.inner()
# signal that class_instance (and its setup) is done
instanceReady.set()
dynamo_handlers = [] dynamo_handlers = []
for name, endpoint in dynamo_endpoints.items(): for name, endpoint in dynamo_endpoints.items():
if DynamoTransport.DEFAULT in endpoint.transports: if DynamoTransport.DEFAULT in endpoint.transports:
...@@ -234,8 +236,6 @@ def main( ...@@ -234,8 +236,6 @@ def main(
logger.info( logger.info(
f"Starting {service.name} instance with all registered endpoints" f"Starting {service.name} instance with all registered endpoints"
) )
# signal that class_instance (and its setup) is done
instanceReady.set()
# Launch serve_endpoint for all endpoints concurrently # Launch serve_endpoint for all endpoints concurrently
tasks = [ tasks = [
endpoint.serve_endpoint(handler) endpoint.serve_endpoint(handler)
......
...@@ -35,7 +35,7 @@ VllmWorker: ...@@ -35,7 +35,7 @@ VllmWorker:
ServiceArgs: ServiceArgs:
workers: 1 workers: 1
resources: resources:
gpu: 1 gpu: '1'
common-configs: [model, block-size, max-model-len, kv-transfer-config] common-configs: [model, block-size, max-model-len, kv-transfer-config]
PrefillWorker: PrefillWorker:
...@@ -43,7 +43,7 @@ PrefillWorker: ...@@ -43,7 +43,7 @@ PrefillWorker:
ServiceArgs: ServiceArgs:
workers: 1 workers: 1
resources: resources:
gpu: 1 gpu: '1'
common-configs: [model, block-size, max-model-len, kv-transfer-config] common-configs: [model, block-size, max-model-len, kv-transfer-config]
Planner: Planner:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment