Unverified Commit c939da0c authored by mohammedabdulwahhab's avatar mohammedabdulwahhab Committed by GitHub
Browse files

fix: wait until probing on vllm examples to prevent timeouts (#1293)

parent 98a5fab1
......@@ -595,6 +595,12 @@ func (r *DynamoComponentDeploymentReconciler) generateLeaderPodTemplateSpec(ctx
return nil, fmt.Errorf("generateLeaderPodTemplateSpec: GPU limit is not set for Ray leader pod")
}
// TODO: Liveness and readiness probes are temporarily disabled for leader worker sets
// until we implement proper probe configuration that can differentiate between
// leader and worker pods.
leaderPodTemplateSpec.Spec.Containers[0].LivenessProbe = nil
leaderPodTemplateSpec.Spec.Containers[0].ReadinessProbe = nil
leaderPodTemplateSpec.Spec.Containers[0].Args[0] = fmt.Sprintf("ray start --head --port=6379 && %s", currentArgs)
return leaderPodTemplateSpec, nil
......@@ -634,6 +640,12 @@ func (r *DynamoComponentDeploymentReconciler) generateWorkerPodTemplateSpec(ctx
return nil, fmt.Errorf("generateWorkerPodTemplateSpec: GPU limit is not set for Ray worker pod")
}
// TODO: Liveness and readiness probes are temporarily disabled for leader worker sets
// until we implement proper probe configuration that can differentiate between
// leader and worker pods.
workerPodTemplateSpec.Spec.Containers[0].LivenessProbe = nil
workerPodTemplateSpec.Spec.Containers[0].ReadinessProbe = nil
workerPodTemplateSpec.Spec.Containers[0].Args[0] = "ray start --address=$(LWS_LEADER_ADDRESS):6379 --block"
return workerPodTemplateSpec, nil
......@@ -1569,6 +1581,12 @@ func (r *DynamoComponentDeploymentReconciler) generatePodTemplateSpec(ctx contex
// Set default probes if none are provided
if livenessProbe == nil {
container.LivenessProbe = &corev1.Probe{
// TODO: Initial delay and other probe settings should be read off sdk, these are default settings that should cover vllm / hello-world
InitialDelaySeconds: 60, // 1 minute
PeriodSeconds: 60, // Check every 1 minute
TimeoutSeconds: 5, // 5 second timeout
FailureThreshold: 10, // Allow 10 failures before declaring unhealthy
SuccessThreshold: 1, // Need 1 success to be considered healthy
ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{
Path: "/healthz",
......@@ -1580,6 +1598,12 @@ func (r *DynamoComponentDeploymentReconciler) generatePodTemplateSpec(ctx contex
if readinessProbe == nil {
container.ReadinessProbe = &corev1.Probe{
// TODO: Initial delay and other probe settings should be read off sdk, these are default settings that should cover vllm / hello-world
InitialDelaySeconds: 60, // 1 minute
PeriodSeconds: 60, // Check every 1 minute
TimeoutSeconds: 5, // 5 second timeout
FailureThreshold: 10, // Allow 10 failures before declaring not ready
SuccessThreshold: 1, // Need 1 success to be considered ready
ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{
Path: "/readyz",
......
......@@ -38,7 +38,6 @@ import (
networkingv1 "k8s.io/api/networking/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/client-go/kubernetes/scheme"
"k8s.io/client-go/tools/record"
"k8s.io/utils/ptr"
......@@ -962,20 +961,6 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
"nvidia.com/gpu": resource.MustParse("1"),
},
},
LivenessProbe: &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{
Path: "/healthz", Port: intstr.FromString(commonconsts.DynamoHealthPortName),
},
},
},
ReadinessProbe: &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{
Path: "/readyz", Port: intstr.FromString(commonconsts.DynamoHealthPortName),
},
},
},
},
},
Volumes: []corev1.Volume{{Name: "shared-memory", VolumeSource: corev1.VolumeSource{EmptyDir: &corev1.EmptyDirVolumeSource{Medium: corev1.StorageMediumMemory, SizeLimit: limit}}}},
......@@ -1014,20 +999,6 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
Requests: corev1.ResourceList{corev1.ResourceCPU: resource.MustParse("300m"), corev1.ResourceMemory: resource.MustParse("500Mi")},
Limits: corev1.ResourceList{corev1.ResourceCPU: resource.MustParse("500m"), corev1.ResourceMemory: resource.MustParse("1Gi"), "nvidia.com/gpu": resource.MustParse("1")},
},
LivenessProbe: &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{
Path: "/healthz", Port: intstr.FromString(commonconsts.DynamoHealthPortName),
},
},
},
ReadinessProbe: &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{
Path: "/readyz", Port: intstr.FromString(commonconsts.DynamoHealthPortName),
},
},
},
},
},
Volumes: []corev1.Volume{{Name: "shared-memory", VolumeSource: corev1.VolumeSource{EmptyDir: &corev1.EmptyDirVolumeSource{Medium: corev1.StorageMediumMemory, SizeLimit: limit}}}},
......
......@@ -87,7 +87,7 @@ def add_fastapi_routes(app, service, class_instance):
return added_routes
app = typer.Typer()
app = typer.Typer(pretty_exceptions_enable=False)
@app.command()
......@@ -207,6 +207,8 @@ def main(
dynamo_context["component"] = component
dynamo_context["endpoints"] = endpoints
class_instance = service.inner()
# signal that class_instance (and its setup) is done
instanceReady.set()
dynamo_handlers = []
for name, endpoint in dynamo_endpoints.items():
if DynamoTransport.DEFAULT in endpoint.transports:
......@@ -234,8 +236,6 @@ def main(
logger.info(
f"Starting {service.name} instance with all registered endpoints"
)
# signal that class_instance (and its setup) is done
instanceReady.set()
# Launch serve_endpoint for all endpoints concurrently
tasks = [
endpoint.serve_endpoint(handler)
......
......@@ -35,7 +35,7 @@ VllmWorker:
ServiceArgs:
workers: 1
resources:
gpu: 1
gpu: '1'
common-configs: [model, block-size, max-model-len, kv-transfer-config]
PrefillWorker:
......@@ -43,7 +43,7 @@ PrefillWorker:
ServiceArgs:
workers: 1
resources:
gpu: 1
gpu: '1'
common-configs: [model, block-size, max-model-len, kv-transfer-config]
Planner:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment