Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
62de76b6
"...git@developer.sourcefind.cn:2222/OpenDAS/vllm_cscc.git" did not exist on "40766ca1b8b0ef92e220595bda96c4336b597e5b"
Unverified
Commit
62de76b6
authored
Oct 29, 2025
by
Thomas Montfort
Committed by
GitHub
Oct 29, 2025
Browse files
fix: Operator defaults for liveness and readiness probes (#3968)
parent
7b709c41
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
28 additions
and
28 deletions
+28
-28
deploy/cloud/operator/internal/controller/dynamocomponentdeployment_controller_test.go
...l/controller/dynamocomponentdeployment_controller_test.go
+3
-3
deploy/cloud/operator/internal/dynamo/component_frontend.go
deploy/cloud/operator/internal/dynamo/component_frontend.go
+12
-15
deploy/cloud/operator/internal/dynamo/component_worker.go
deploy/cloud/operator/internal/dynamo/component_worker.go
+7
-4
deploy/cloud/operator/internal/dynamo/graph_test.go
deploy/cloud/operator/internal/dynamo/graph_test.go
+6
-6
No files found.
deploy/cloud/operator/internal/controller/dynamocomponentdeployment_controller_test.go
View file @
62de76b6
...
@@ -862,7 +862,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
...
@@ -862,7 +862,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
Port
:
intstr
.
FromString
(
commonconsts
.
DynamoSystemPortName
),
Port
:
intstr
.
FromString
(
commonconsts
.
DynamoSystemPortName
),
},
},
},
},
TimeoutSeconds
:
30
,
TimeoutSeconds
:
4
,
PeriodSeconds
:
5
,
PeriodSeconds
:
5
,
SuccessThreshold
:
0
,
SuccessThreshold
:
0
,
FailureThreshold
:
1
,
FailureThreshold
:
1
,
...
@@ -874,10 +874,10 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
...
@@ -874,10 +874,10 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
Port
:
intstr
.
FromString
(
commonconsts
.
DynamoSystemPortName
),
Port
:
intstr
.
FromString
(
commonconsts
.
DynamoSystemPortName
),
},
},
},
},
TimeoutSeconds
:
30
,
TimeoutSeconds
:
4
,
PeriodSeconds
:
10
,
PeriodSeconds
:
10
,
SuccessThreshold
:
0
,
SuccessThreshold
:
0
,
FailureThreshold
:
60
,
FailureThreshold
:
3
,
},
},
StartupProbe
:
&
corev1
.
Probe
{
StartupProbe
:
&
corev1
.
Probe
{
ProbeHandler
:
corev1
.
ProbeHandler
{
ProbeHandler
:
corev1
.
ProbeHandler
{
...
...
deploy/cloud/operator/internal/dynamo/component_frontend.go
View file @
62de76b6
...
@@ -43,30 +43,27 @@ func (f *FrontendDefaults) GetBaseContainer(context ComponentContext) (corev1.Co
...
@@ -43,30 +43,27 @@ func (f *FrontendDefaults) GetBaseContainer(context ComponentContext) (corev1.Co
container
.
LivenessProbe
=
&
corev1
.
Probe
{
container
.
LivenessProbe
=
&
corev1
.
Probe
{
ProbeHandler
:
corev1
.
ProbeHandler
{
ProbeHandler
:
corev1
.
ProbeHandler
{
HTTPGet
:
&
corev1
.
HTTPGetAction
{
HTTPGet
:
&
corev1
.
HTTPGetAction
{
Path
:
"/
health
"
,
Path
:
"/
live
"
,
Port
:
intstr
.
FromString
(
commonconsts
.
DynamoContainerPortName
),
Port
:
intstr
.
FromString
(
commonconsts
.
DynamoContainerPortName
),
},
},
},
},
InitialDelaySeconds
:
60
,
InitialDelaySeconds
:
15
,
// Frontend ready to serve requests in ~5-10 seconds
PeriodSeconds
:
6
0
,
PeriodSeconds
:
1
0
,
TimeoutSeconds
:
30
,
TimeoutSeconds
:
1
,
// live endpoint performs no i/o
FailureThreshold
:
10
,
FailureThreshold
:
3
,
}
}
container
.
ReadinessProbe
=
&
corev1
.
Probe
{
container
.
ReadinessProbe
=
&
corev1
.
Probe
{
ProbeHandler
:
corev1
.
ProbeHandler
{
ProbeHandler
:
corev1
.
ProbeHandler
{
Exec
:
&
corev1
.
ExecAction
{
HTTPGet
:
&
corev1
.
HTTPGetAction
{
Command
:
[]
string
{
Path
:
"/health"
,
"/bin/sh"
,
Port
:
intstr
.
FromString
(
commonconsts
.
DynamoContainerPortName
),
"-c"
,
"curl -s http://localhost:${DYNAMO_PORT}/health | jq -e
\"
.status ==
\\\"
healthy
\\\"\"
"
,
},
},
},
},
},
InitialDelaySeconds
:
6
0
,
InitialDelaySeconds
:
1
0
,
// Frontend ready to serve requests in ~5-10 seconds
PeriodSeconds
:
6
0
,
PeriodSeconds
:
1
0
,
TimeoutSeconds
:
3
0
,
TimeoutSeconds
:
3
,
FailureThreshold
:
10
,
FailureThreshold
:
3
,
}
}
// Add standard environment variables
// Add standard environment variables
...
...
deploy/cloud/operator/internal/dynamo/component_worker.go
View file @
62de76b6
...
@@ -42,10 +42,13 @@ func (w *WorkerDefaults) GetBaseContainer(context ComponentContext) (corev1.Cont
...
@@ -42,10 +42,13 @@ func (w *WorkerDefaults) GetBaseContainer(context ComponentContext) (corev1.Cont
},
},
},
},
PeriodSeconds
:
5
,
PeriodSeconds
:
5
,
TimeoutSeconds
:
30
,
TimeoutSeconds
:
4
,
// TimeoutSeconds should be < PeriodSeconds
FailureThreshold
:
1
,
FailureThreshold
:
1
,
// Note this default FailureThreshold is 3, with 1 a single failure will restart Pod
}
}
// ReadinessProbe in Dynamo worker context doesn't determine that the worker is ready to receive traffic
// Since worker registration is done through external KvStore and Transport does not use Kubernetes Service
// Still important for external depencies that rely on Pod Readiness
container
.
ReadinessProbe
=
&
corev1
.
Probe
{
container
.
ReadinessProbe
=
&
corev1
.
Probe
{
ProbeHandler
:
corev1
.
ProbeHandler
{
ProbeHandler
:
corev1
.
ProbeHandler
{
HTTPGet
:
&
corev1
.
HTTPGetAction
{
HTTPGet
:
&
corev1
.
HTTPGetAction
{
...
@@ -54,8 +57,8 @@ func (w *WorkerDefaults) GetBaseContainer(context ComponentContext) (corev1.Cont
...
@@ -54,8 +57,8 @@ func (w *WorkerDefaults) GetBaseContainer(context ComponentContext) (corev1.Cont
},
},
},
},
PeriodSeconds
:
10
,
PeriodSeconds
:
10
,
TimeoutSeconds
:
30
,
TimeoutSeconds
:
4
,
FailureThreshold
:
60
,
FailureThreshold
:
3
,
}
}
container
.
StartupProbe
=
&
corev1
.
Probe
{
container
.
StartupProbe
=
&
corev1
.
Probe
{
...
...
deploy/cloud/operator/internal/dynamo/graph_test.go
View file @
62de76b6
...
@@ -1874,7 +1874,7 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
...
@@ -1874,7 +1874,7 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
Port
:
intstr
.
FromString
(
commonconsts
.
DynamoSystemPortName
),
Port
:
intstr
.
FromString
(
commonconsts
.
DynamoSystemPortName
),
},
},
},
},
TimeoutSeconds
:
30
,
TimeoutSeconds
:
4
,
PeriodSeconds
:
5
,
PeriodSeconds
:
5
,
SuccessThreshold
:
0
,
SuccessThreshold
:
0
,
FailureThreshold
:
1
,
FailureThreshold
:
1
,
...
@@ -1886,10 +1886,10 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
...
@@ -1886,10 +1886,10 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
Port
:
intstr
.
FromString
(
commonconsts
.
DynamoSystemPortName
),
Port
:
intstr
.
FromString
(
commonconsts
.
DynamoSystemPortName
),
},
},
},
},
TimeoutSeconds
:
30
,
TimeoutSeconds
:
4
,
PeriodSeconds
:
10
,
PeriodSeconds
:
10
,
SuccessThreshold
:
0
,
SuccessThreshold
:
0
,
FailureThreshold
:
60
,
FailureThreshold
:
3
,
},
},
StartupProbe
:
&
corev1
.
Probe
{
StartupProbe
:
&
corev1
.
Probe
{
ProbeHandler
:
corev1
.
ProbeHandler
{
ProbeHandler
:
corev1
.
ProbeHandler
{
...
@@ -4549,7 +4549,7 @@ func TestGenerateBasePodSpec_Worker(t *testing.T) {
...
@@ -4549,7 +4549,7 @@ func TestGenerateBasePodSpec_Worker(t *testing.T) {
},
},
},
},
PeriodSeconds
:
5
,
PeriodSeconds
:
5
,
TimeoutSeconds
:
30
,
TimeoutSeconds
:
4
,
FailureThreshold
:
1
,
FailureThreshold
:
1
,
},
},
ReadinessProbe
:
&
corev1
.
Probe
{
ReadinessProbe
:
&
corev1
.
Probe
{
...
@@ -4560,8 +4560,8 @@ func TestGenerateBasePodSpec_Worker(t *testing.T) {
...
@@ -4560,8 +4560,8 @@ func TestGenerateBasePodSpec_Worker(t *testing.T) {
},
},
},
},
PeriodSeconds
:
10
,
PeriodSeconds
:
10
,
TimeoutSeconds
:
30
,
TimeoutSeconds
:
4
,
FailureThreshold
:
60
,
FailureThreshold
:
3
,
},
},
StartupProbe
:
&
corev1
.
Probe
{
StartupProbe
:
&
corev1
.
Probe
{
ProbeHandler
:
corev1
.
ProbeHandler
{
ProbeHandler
:
corev1
.
ProbeHandler
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment