Unverified Commit 62de76b6 authored by Thomas Montfort's avatar Thomas Montfort Committed by GitHub
Browse files

fix: Operator defaults for liveness and readiness probes (#3968)

parent 7b709c41
......@@ -862,7 +862,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
Port: intstr.FromString(commonconsts.DynamoSystemPortName),
},
},
TimeoutSeconds: 30,
TimeoutSeconds: 4,
PeriodSeconds: 5,
SuccessThreshold: 0,
FailureThreshold: 1,
......@@ -874,10 +874,10 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
Port: intstr.FromString(commonconsts.DynamoSystemPortName),
},
},
TimeoutSeconds: 30,
TimeoutSeconds: 4,
PeriodSeconds: 10,
SuccessThreshold: 0,
FailureThreshold: 60,
FailureThreshold: 3,
},
StartupProbe: &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
......
......@@ -43,30 +43,27 @@ func (f *FrontendDefaults) GetBaseContainer(context ComponentContext) (corev1.Co
container.LivenessProbe = &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{
Path: "/health",
Path: "/live",
Port: intstr.FromString(commonconsts.DynamoContainerPortName),
},
},
InitialDelaySeconds: 60,
PeriodSeconds: 60,
TimeoutSeconds: 30,
FailureThreshold: 10,
InitialDelaySeconds: 15, // Frontend ready to serve requests in ~5-10 seconds
PeriodSeconds: 10,
TimeoutSeconds: 1, // live endpoint performs no i/o
FailureThreshold: 3,
}
container.ReadinessProbe = &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
Exec: &corev1.ExecAction{
Command: []string{
"/bin/sh",
"-c",
"curl -s http://localhost:${DYNAMO_PORT}/health | jq -e \".status == \\\"healthy\\\"\"",
},
HTTPGet: &corev1.HTTPGetAction{
Path: "/health",
Port: intstr.FromString(commonconsts.DynamoContainerPortName),
},
},
InitialDelaySeconds: 60,
PeriodSeconds: 60,
TimeoutSeconds: 30,
FailureThreshold: 10,
InitialDelaySeconds: 10, // Frontend ready to serve requests in ~5-10 seconds
PeriodSeconds: 10,
TimeoutSeconds: 3,
FailureThreshold: 3,
}
// Add standard environment variables
......
......@@ -42,10 +42,13 @@ func (w *WorkerDefaults) GetBaseContainer(context ComponentContext) (corev1.Cont
},
},
PeriodSeconds: 5,
TimeoutSeconds: 30,
FailureThreshold: 1,
TimeoutSeconds: 4, // TimeoutSeconds should be < PeriodSeconds
FailureThreshold: 1, // Note this default FailureThreshold is 3, with 1 a single failure will restart Pod
}
// ReadinessProbe in Dynamo worker context doesn't determine that the worker is ready to receive traffic
// Since worker registration is done through external KvStore and Transport does not use Kubernetes Service
// Still important for external depencies that rely on Pod Readiness
container.ReadinessProbe = &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{
......@@ -54,8 +57,8 @@ func (w *WorkerDefaults) GetBaseContainer(context ComponentContext) (corev1.Cont
},
},
PeriodSeconds: 10,
TimeoutSeconds: 30,
FailureThreshold: 60,
TimeoutSeconds: 4,
FailureThreshold: 3,
}
container.StartupProbe = &corev1.Probe{
......
......@@ -1874,7 +1874,7 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
Port: intstr.FromString(commonconsts.DynamoSystemPortName),
},
},
TimeoutSeconds: 30,
TimeoutSeconds: 4,
PeriodSeconds: 5,
SuccessThreshold: 0,
FailureThreshold: 1,
......@@ -1886,10 +1886,10 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
Port: intstr.FromString(commonconsts.DynamoSystemPortName),
},
},
TimeoutSeconds: 30,
TimeoutSeconds: 4,
PeriodSeconds: 10,
SuccessThreshold: 0,
FailureThreshold: 60,
FailureThreshold: 3,
},
StartupProbe: &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
......@@ -4549,7 +4549,7 @@ func TestGenerateBasePodSpec_Worker(t *testing.T) {
},
},
PeriodSeconds: 5,
TimeoutSeconds: 30,
TimeoutSeconds: 4,
FailureThreshold: 1,
},
ReadinessProbe: &corev1.Probe{
......@@ -4560,8 +4560,8 @@ func TestGenerateBasePodSpec_Worker(t *testing.T) {
},
},
PeriodSeconds: 10,
TimeoutSeconds: 30,
FailureThreshold: 60,
TimeoutSeconds: 4,
FailureThreshold: 3,
},
StartupProbe: &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment