"...git@developer.sourcefind.cn:2222/OpenDAS/vllm_cscc.git" did not exist on "40766ca1b8b0ef92e220595bda96c4336b597e5b"
Unverified Commit 62de76b6 authored by Thomas Montfort's avatar Thomas Montfort Committed by GitHub
Browse files

fix: Operator defaults for liveness and readiness probes (#3968)

parent 7b709c41
...@@ -862,7 +862,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing. ...@@ -862,7 +862,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
Port: intstr.FromString(commonconsts.DynamoSystemPortName), Port: intstr.FromString(commonconsts.DynamoSystemPortName),
}, },
}, },
TimeoutSeconds: 30, TimeoutSeconds: 4,
PeriodSeconds: 5, PeriodSeconds: 5,
SuccessThreshold: 0, SuccessThreshold: 0,
FailureThreshold: 1, FailureThreshold: 1,
...@@ -874,10 +874,10 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing. ...@@ -874,10 +874,10 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
Port: intstr.FromString(commonconsts.DynamoSystemPortName), Port: intstr.FromString(commonconsts.DynamoSystemPortName),
}, },
}, },
TimeoutSeconds: 30, TimeoutSeconds: 4,
PeriodSeconds: 10, PeriodSeconds: 10,
SuccessThreshold: 0, SuccessThreshold: 0,
FailureThreshold: 60, FailureThreshold: 3,
}, },
StartupProbe: &corev1.Probe{ StartupProbe: &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{ ProbeHandler: corev1.ProbeHandler{
......
...@@ -43,30 +43,27 @@ func (f *FrontendDefaults) GetBaseContainer(context ComponentContext) (corev1.Co ...@@ -43,30 +43,27 @@ func (f *FrontendDefaults) GetBaseContainer(context ComponentContext) (corev1.Co
container.LivenessProbe = &corev1.Probe{ container.LivenessProbe = &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{ ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{ HTTPGet: &corev1.HTTPGetAction{
Path: "/health", Path: "/live",
Port: intstr.FromString(commonconsts.DynamoContainerPortName), Port: intstr.FromString(commonconsts.DynamoContainerPortName),
}, },
}, },
InitialDelaySeconds: 60, InitialDelaySeconds: 15, // Frontend ready to serve requests in ~5-10 seconds
PeriodSeconds: 60, PeriodSeconds: 10,
TimeoutSeconds: 30, TimeoutSeconds: 1, // live endpoint performs no i/o
FailureThreshold: 10, FailureThreshold: 3,
} }
container.ReadinessProbe = &corev1.Probe{ container.ReadinessProbe = &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{ ProbeHandler: corev1.ProbeHandler{
Exec: &corev1.ExecAction{ HTTPGet: &corev1.HTTPGetAction{
Command: []string{ Path: "/health",
"/bin/sh", Port: intstr.FromString(commonconsts.DynamoContainerPortName),
"-c",
"curl -s http://localhost:${DYNAMO_PORT}/health | jq -e \".status == \\\"healthy\\\"\"",
},
}, },
}, },
InitialDelaySeconds: 60, InitialDelaySeconds: 10, // Frontend ready to serve requests in ~5-10 seconds
PeriodSeconds: 60, PeriodSeconds: 10,
TimeoutSeconds: 30, TimeoutSeconds: 3,
FailureThreshold: 10, FailureThreshold: 3,
} }
// Add standard environment variables // Add standard environment variables
......
...@@ -42,10 +42,13 @@ func (w *WorkerDefaults) GetBaseContainer(context ComponentContext) (corev1.Cont ...@@ -42,10 +42,13 @@ func (w *WorkerDefaults) GetBaseContainer(context ComponentContext) (corev1.Cont
}, },
}, },
PeriodSeconds: 5, PeriodSeconds: 5,
TimeoutSeconds: 30, TimeoutSeconds: 4, // TimeoutSeconds should be < PeriodSeconds
FailureThreshold: 1, FailureThreshold: 1, // Note this default FailureThreshold is 3, with 1 a single failure will restart Pod
} }
// ReadinessProbe in Dynamo worker context doesn't determine that the worker is ready to receive traffic
// Since worker registration is done through external KvStore and Transport does not use Kubernetes Service
// Still important for external depencies that rely on Pod Readiness
container.ReadinessProbe = &corev1.Probe{ container.ReadinessProbe = &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{ ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{ HTTPGet: &corev1.HTTPGetAction{
...@@ -54,8 +57,8 @@ func (w *WorkerDefaults) GetBaseContainer(context ComponentContext) (corev1.Cont ...@@ -54,8 +57,8 @@ func (w *WorkerDefaults) GetBaseContainer(context ComponentContext) (corev1.Cont
}, },
}, },
PeriodSeconds: 10, PeriodSeconds: 10,
TimeoutSeconds: 30, TimeoutSeconds: 4,
FailureThreshold: 60, FailureThreshold: 3,
} }
container.StartupProbe = &corev1.Probe{ container.StartupProbe = &corev1.Probe{
......
...@@ -1874,7 +1874,7 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) { ...@@ -1874,7 +1874,7 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
Port: intstr.FromString(commonconsts.DynamoSystemPortName), Port: intstr.FromString(commonconsts.DynamoSystemPortName),
}, },
}, },
TimeoutSeconds: 30, TimeoutSeconds: 4,
PeriodSeconds: 5, PeriodSeconds: 5,
SuccessThreshold: 0, SuccessThreshold: 0,
FailureThreshold: 1, FailureThreshold: 1,
...@@ -1886,10 +1886,10 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) { ...@@ -1886,10 +1886,10 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
Port: intstr.FromString(commonconsts.DynamoSystemPortName), Port: intstr.FromString(commonconsts.DynamoSystemPortName),
}, },
}, },
TimeoutSeconds: 30, TimeoutSeconds: 4,
PeriodSeconds: 10, PeriodSeconds: 10,
SuccessThreshold: 0, SuccessThreshold: 0,
FailureThreshold: 60, FailureThreshold: 3,
}, },
StartupProbe: &corev1.Probe{ StartupProbe: &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{ ProbeHandler: corev1.ProbeHandler{
...@@ -4549,7 +4549,7 @@ func TestGenerateBasePodSpec_Worker(t *testing.T) { ...@@ -4549,7 +4549,7 @@ func TestGenerateBasePodSpec_Worker(t *testing.T) {
}, },
}, },
PeriodSeconds: 5, PeriodSeconds: 5,
TimeoutSeconds: 30, TimeoutSeconds: 4,
FailureThreshold: 1, FailureThreshold: 1,
}, },
ReadinessProbe: &corev1.Probe{ ReadinessProbe: &corev1.Probe{
...@@ -4560,8 +4560,8 @@ func TestGenerateBasePodSpec_Worker(t *testing.T) { ...@@ -4560,8 +4560,8 @@ func TestGenerateBasePodSpec_Worker(t *testing.T) {
}, },
}, },
PeriodSeconds: 10, PeriodSeconds: 10,
TimeoutSeconds: 30, TimeoutSeconds: 4,
FailureThreshold: 60, FailureThreshold: 3,
}, },
StartupProbe: &corev1.Probe{ StartupProbe: &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{ ProbeHandler: corev1.ProbeHandler{
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment