Unverified Commit 94d82a48 authored by Julien Mancuso's avatar Julien Mancuso Committed by GitHub
Browse files

fix: correct restart state for parallel restarts (#5949)


Signed-off-by: default avatarJulien Mancuso <jmancuso@nvidia.com>
Co-authored-by: default avatartmontfort <tmontfort@nvidia.com>
parent 0c0336e6
...@@ -695,6 +695,16 @@ func (r *DynamoGraphDeploymentReconciler) computeParallelRestartStatus( ...@@ -695,6 +695,16 @@ func (r *DynamoGraphDeploymentReconciler) computeParallelRestartStatus(
} }
// Sort for deterministic output // Sort for deterministic output
sort.Strings(servicesToCheck) sort.Strings(servicesToCheck)
// For a new restart request with services, immediately return Restarting phase without checking readiness.
if len(servicesToCheck) > 0 {
return &nvidiacomv1alpha1.RestartStatus{
ObservedID: specID,
Phase: nvidiacomv1alpha1.RestartPhaseRestarting,
InProgress: servicesToCheck,
}
}
// If no services, fall through to the empty check below
} else if dgd.Status.Restart != nil && len(dgd.Status.Restart.InProgress) > 0 { } else if dgd.Status.Restart != nil && len(dgd.Status.Restart.InProgress) > 0 {
// Continuing existing restart: use current InProgress list // Continuing existing restart: use current InProgress list
servicesToCheck = dgd.Status.Restart.InProgress servicesToCheck = dgd.Status.Restart.InProgress
......
...@@ -1037,6 +1037,49 @@ func Test_computeRestartStatus(t *testing.T) { ...@@ -1037,6 +1037,49 @@ func Test_computeRestartStatus(t *testing.T) {
Phase: v1alpha1.RestartPhaseCompleted, Phase: v1alpha1.RestartPhaseCompleted,
}, },
}, },
{
name: "parallel restart - new request with ready resources should NOT complete immediately (race condition fix)",
dgdSpec: v1alpha1.DynamoGraphDeploymentSpec{
Restart: &v1alpha1.Restart{
ID: newID,
Strategy: &v1alpha1.RestartStrategy{
Type: v1alpha1.RestartStrategyTypeParallel,
},
},
Services: map[string]*v1alpha1.DynamoComponentDeploymentSharedSpec{
"frontend": {
Replicas: ptr.To(int32(1)),
},
},
},
dgdStatus: v1alpha1.DynamoGraphDeploymentStatus{
// No existing restart status - brand new restart request
},
existingResources: []client.Object{
// DCD is READY - simulating state BEFORE restart annotation is applied
&v1alpha1.DynamoComponentDeployment{
ObjectMeta: metav1.ObjectMeta{
Name: "test-dgd-frontend",
Namespace: "default",
Generation: 1,
},
Status: v1alpha1.DynamoComponentDeploymentStatus{
ObservedGeneration: 1,
Conditions: []metav1.Condition{
{
Type: v1alpha1.DynamoGraphDeploymentConditionTypeAvailable,
Status: metav1.ConditionTrue,
},
},
},
},
},
wantRestartStatus: &v1alpha1.RestartStatus{
ObservedID: newID,
Phase: v1alpha1.RestartPhaseRestarting, // NOT Completed!
InProgress: []string{"frontend"},
},
},
{ {
name: "Grove pathway - parallel restart complete", name: "Grove pathway - parallel restart complete",
dgdSpec: v1alpha1.DynamoGraphDeploymentSpec{ dgdSpec: v1alpha1.DynamoGraphDeploymentSpec{
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment