Unverified Commit 7d3c67f0 authored by hhzhang16's avatar hhzhang16 Committed by GitHub
Browse files

feat: use consts instead of hardcoded string for statuses (#5696)


Signed-off-by: default avatarHannah Zhang <hannahz@nvidia.com>
parent 36ce39bd
......@@ -61,9 +61,9 @@ type Reason string
type Message string
const (
FailedState State = "failed"
ReadyState State = "successful"
PendingState State = "pending"
DGDStateFailed State = "failed"
DGDStateReady State = "successful"
DGDStatePending State = "pending"
)
type etcdStorage interface {
......@@ -109,7 +109,7 @@ func (r *DynamoGraphDeploymentReconciler) Reconcile(ctx context.Context, req ctr
reason := Reason("undefined")
message := Message("")
state := PendingState
state := DGDStatePending
// retrieve the CRD
dynamoDeployment := &nvidiacomv1alpha1.DynamoGraphDeployment{}
if err = r.Get(ctx, req.NamespacedName, dynamoDeployment); err != nil {
......@@ -124,14 +124,14 @@ func (r *DynamoGraphDeploymentReconciler) Reconcile(ctx context.Context, req ctr
}
if err != nil {
state = FailedState
state = DGDStateFailed
message = Message(err.Error())
logger.Error(err, "Reconciliation failed")
}
dynamoDeployment.SetState(string(state))
readyStatus := metav1.ConditionFalse
if state == ReadyState {
if state == DGDStateReady {
readyStatus = metav1.ConditionTrue
}
......@@ -173,7 +173,7 @@ func (r *DynamoGraphDeploymentReconciler) Reconcile(ctx context.Context, req ctr
logger.Error(validationErr, "DynamoGraphDeployment validation failed, refusing to reconcile")
// Set validation error state and reason (defer will update status)
state = FailedState
state = DGDStateFailed
reason = Reason("ValidationFailed")
message = Message(fmt.Sprintf("Validation failed: %v", validationErr))
......@@ -903,14 +903,14 @@ func (r *DynamoGraphDeploymentReconciler) checkResourcesReadiness(resources []Re
if len(notReadyResources) == 0 {
return ReconcileResult{
State: ReadyState,
State: DGDStateReady,
Reason: "all_resources_are_ready",
Message: Message("All resources are ready"),
ServiceStatus: serviceStatuses,
}
}
return ReconcileResult{
State: PendingState,
State: DGDStatePending,
Reason: "some_resources_are_not_ready",
Message: Message(fmt.Sprintf("Resources not ready: %s", strings.Join(notReadyReasons, "; "))),
ServiceStatus: serviceStatuses,
......
......@@ -405,7 +405,7 @@ func Test_reconcileGroveResources(t *testing.T) {
},
},
wantReconcileResult: ReconcileResult{
State: ReadyState,
State: DGDStateReady,
Reason: "all_resources_are_ready",
Message: "All resources are ready",
ServiceStatus: map[string]v1alpha1.ServiceReplicaStatus{
......@@ -467,7 +467,7 @@ func Test_reconcileGroveResources(t *testing.T) {
},
},
wantReconcileResult: ReconcileResult{
State: PendingState,
State: DGDStatePending,
Reason: "some_resources_are_not_ready",
Message: Message("Resources not ready: test-dgd: podclique/test-dgd-0-decode: desired=2, ready=1"),
ServiceStatus: map[string]v1alpha1.ServiceReplicaStatus{
......@@ -542,7 +542,7 @@ func Test_reconcileGroveResources(t *testing.T) {
},
},
wantReconcileResult: ReconcileResult{
State: ReadyState,
State: DGDStateReady,
Reason: "all_resources_are_ready",
Message: "All resources are ready",
ServiceStatus: map[string]v1alpha1.ServiceReplicaStatus{
......@@ -614,7 +614,7 @@ func Test_reconcileGroveResources(t *testing.T) {
},
},
wantReconcileResult: ReconcileResult{
State: PendingState,
State: DGDStatePending,
Reason: "some_resources_are_not_ready",
Message: Message("Resources not ready: test-dgd: pcsg/test-dgd-0-aggregated: desired=2, available=1"),
ServiceStatus: map[string]v1alpha1.ServiceReplicaStatus{
......@@ -1402,7 +1402,7 @@ func Test_reconcileDynamoComponentsDeployments(t *testing.T) {
},
},
wantReconcileResult: ReconcileResult{
State: ReadyState,
State: DGDStateReady,
Reason: "all_resources_are_ready",
Message: "All resources are ready",
ServiceStatus: map[string]v1alpha1.ServiceReplicaStatus{
......@@ -1462,7 +1462,7 @@ func Test_reconcileDynamoComponentsDeployments(t *testing.T) {
},
},
wantReconcileResult: ReconcileResult{
State: PendingState,
State: DGDStatePending,
Reason: "some_resources_are_not_ready",
Message: "Resources not ready: test-dgd-frontend: Component deployment not ready - Available condition not true",
ServiceStatus: map[string]v1alpha1.ServiceReplicaStatus{
......@@ -1592,7 +1592,7 @@ func Test_reconcileDynamoComponentsDeployments(t *testing.T) {
},
},
wantReconcileResult: ReconcileResult{
State: ReadyState,
State: DGDStateReady,
Reason: "all_resources_are_ready",
Message: "All resources are ready",
ServiceStatus: map[string]v1alpha1.ServiceReplicaStatus{
......@@ -1738,7 +1738,7 @@ func Test_reconcileDynamoComponentsDeployments(t *testing.T) {
},
},
wantReconcileResult: ReconcileResult{
State: PendingState,
State: DGDStatePending,
Reason: "some_resources_are_not_ready",
Message: "Resources not ready: test-dgd-decode: Component deployment not ready - Available condition not true",
ServiceStatus: map[string]v1alpha1.ServiceReplicaStatus{
......@@ -1849,7 +1849,7 @@ func Test_reconcileDynamoComponentsDeployments(t *testing.T) {
},
},
wantReconcileResult: ReconcileResult{
State: PendingState,
State: DGDStatePending,
Reason: "some_resources_are_not_ready",
Message: "Resources not ready: test-dgd-decode: Component deployment not ready - Available condition not true; test-dgd-frontend: Component deployment not ready - Available condition not true",
ServiceStatus: map[string]v1alpha1.ServiceReplicaStatus{
......
......@@ -52,14 +52,14 @@ import (
)
const (
// State constants
StateEmpty = ""
StatePending = "Pending"
StateProfiling = "Profiling"
StateDeploying = "Deploying"
StateReady = "Ready"
StateDeploymentDeleted = "DeploymentDeleted"
StateFailed = "Failed"
// DGDR state constants
DGDRStateEmpty = ""
DGDRStatePending = "Pending"
DGDRStateProfiling = "Profiling"
DGDRStateDeploying = "Deploying"
DGDRStateReady = "Ready"
DGDRStateDeploymentDeleted = "DeploymentDeleted"
DGDRStateFailed = "Failed"
// Condition types
ConditionTypeValidation = "Validation"
......@@ -297,8 +297,8 @@ func (r *DynamoGraphDeploymentRequestReconciler) Reconcile(ctx context.Context,
// Check for spec changes (immutability enforcement)
if dgdr.Status.ObservedGeneration > 0 && dgdr.Status.ObservedGeneration != dgdr.Generation {
// Spec changed after initial processing
if dgdr.Status.State == StateProfiling || dgdr.Status.State == StateDeploying ||
dgdr.Status.State == StateReady || dgdr.Status.State == StateDeploymentDeleted {
if dgdr.Status.State == DGDRStateProfiling || dgdr.Status.State == DGDRStateDeploying ||
dgdr.Status.State == DGDRStateReady || dgdr.Status.State == DGDRStateDeploymentDeleted {
logger.Info("Spec change detected in immutable state",
"state", dgdr.Status.State,
"observedGeneration", dgdr.Status.ObservedGeneration,
......@@ -314,23 +314,23 @@ func (r *DynamoGraphDeploymentRequestReconciler) Reconcile(ctx context.Context,
}
// State machine: handle different states
switch dgdr.Status.State {
case StateEmpty:
case DGDRStateEmpty:
return r.handleInitialState(ctx, dgdr)
case StatePending:
case DGDRStatePending:
return r.handlePendingState(ctx, dgdr)
case StateProfiling:
case DGDRStateProfiling:
return r.handleProfilingState(ctx, dgdr)
case StateDeploying:
case DGDRStateDeploying:
return r.handleDeployingState(ctx, dgdr)
case StateReady:
case DGDRStateReady:
return r.handleReadyState(ctx, dgdr)
case StateDeploymentDeleted:
case DGDRStateDeploymentDeleted:
return r.handleDeploymentDeletedState(ctx, dgdr)
case StateFailed:
case DGDRStateFailed:
return r.handleFailedState(ctx, dgdr)
default:
logger.Info("Unknown state", "state", dgdr.Status.State)
return r.updateStateAndRequeue(ctx, dgdr, StateFailed, MessageInvalidState)
return r.updateStateAndRequeue(ctx, dgdr, DGDRStateFailed, MessageInvalidState)
}
}
......@@ -342,7 +342,7 @@ func (r *DynamoGraphDeploymentRequestReconciler) handleInitialState(ctx context.
// Validate the spec
if err := r.validateSpec(ctx, dgdr); err != nil {
r.Recorder.Event(dgdr, corev1.EventTypeWarning, EventReasonValidationFailed, err.Error())
return r.updateStateWithCondition(ctx, dgdr, StateFailed, ConditionTypeValidation, metav1.ConditionFalse, EventReasonValidationFailed, err.Error())
return r.updateStateWithCondition(ctx, dgdr, DGDRStateFailed, ConditionTypeValidation, metav1.ConditionFalse, EventReasonValidationFailed, err.Error())
}
// Set observedGeneration to track the spec we're processing
......@@ -353,7 +353,7 @@ func (r *DynamoGraphDeploymentRequestReconciler) handleInitialState(ctx context.
// Initialize status
r.Recorder.Event(dgdr, corev1.EventTypeNormal, EventReasonInitialized, MessageInitialized)
return r.updateStateAndRequeue(ctx, dgdr, StatePending, MessageInitialized)
return r.updateStateAndRequeue(ctx, dgdr, DGDRStatePending, MessageInitialized)
}
// handlePendingState starts the profiling process
......@@ -364,7 +364,7 @@ func (r *DynamoGraphDeploymentRequestReconciler) handlePendingState(ctx context.
// Create profiling job (online or AIC)
if err := r.createProfilingJob(ctx, dgdr); err != nil {
r.Recorder.Event(dgdr, corev1.EventTypeWarning, EventReasonProfilingJobFailed, err.Error())
return r.updateStateWithCondition(ctx, dgdr, StateFailed, ConditionTypeProfiling, metav1.ConditionFalse, MessageJobCreationFailed, err.Error())
return r.updateStateWithCondition(ctx, dgdr, DGDRStateFailed, ConditionTypeProfiling, metav1.ConditionFalse, MessageJobCreationFailed, err.Error())
}
// Record event with appropriate message
......@@ -375,7 +375,7 @@ func (r *DynamoGraphDeploymentRequestReconciler) handlePendingState(ctx context.
}
// Update to Profiling state with Running status
return r.updateStateWithCondition(ctx, dgdr, StateProfiling, ConditionTypeProfiling, metav1.ConditionFalse, "ProfilingRunning", MessageProfilingInProgress)
return r.updateStateWithCondition(ctx, dgdr, DGDRStateProfiling, ConditionTypeProfiling, metav1.ConditionFalse, "ProfilingRunning", MessageProfilingInProgress)
}
// handleProfilingState monitors profiling progress and generates spec when complete
......@@ -389,7 +389,7 @@ func (r *DynamoGraphDeploymentRequestReconciler) handleProfilingState(ctx contex
if err != nil {
r.Recorder.Event(dgdr, corev1.EventTypeWarning, MessageProfilingCheckFailed, err.Error())
// Job failed - transition to Failed state
return r.updateStateWithCondition(ctx, dgdr, StateFailed, ConditionTypeProfiling, metav1.ConditionFalse, "ProfilingFailed", err.Error())
return r.updateStateWithCondition(ctx, dgdr, DGDRStateFailed, ConditionTypeProfiling, metav1.ConditionFalse, "ProfilingFailed", err.Error())
}
if !completed {
......@@ -410,7 +410,7 @@ func (r *DynamoGraphDeploymentRequestReconciler) handleProfilingState(ctx contex
// Retrieve profiling results and generate spec
if err := r.generateDGDSpec(ctx, dgdr); err != nil {
r.Recorder.Event(dgdr, corev1.EventTypeWarning, MessageGenerationFailed, err.Error())
return r.updateStateWithCondition(ctx, dgdr, StateFailed, ConditionTypeSpecGenerated, metav1.ConditionFalse, MessageGenerationFailed, err.Error())
return r.updateStateWithCondition(ctx, dgdr, DGDRStateFailed, ConditionTypeSpecGenerated, metav1.ConditionFalse, MessageGenerationFailed, err.Error())
}
// Record spec generation event
......@@ -432,11 +432,11 @@ func (r *DynamoGraphDeploymentRequestReconciler) handleProfilingState(ctx contex
// If autoApply is enabled, transition to Deploying state
if dgdr.Spec.AutoApply {
logger.Info("AutoApply enabled, transitioning to Deploying state")
return r.updateStateWithCondition(ctx, dgdr, StateDeploying, ConditionTypeSpecGenerated, metav1.ConditionTrue, EventReasonSpecGenerated, MessageSpecGenerated)
return r.updateStateWithCondition(ctx, dgdr, DGDRStateDeploying, ConditionTypeSpecGenerated, metav1.ConditionTrue, EventReasonSpecGenerated, MessageSpecGenerated)
}
// Otherwise, transition to Ready state
return r.updateStateWithCondition(ctx, dgdr, StateReady, ConditionTypeSpecGenerated, metav1.ConditionTrue, EventReasonSpecGenerated, MessageSpecAvailable)
return r.updateStateWithCondition(ctx, dgdr, DGDRStateReady, ConditionTypeSpecGenerated, metav1.ConditionTrue, EventReasonSpecGenerated, MessageSpecAvailable)
}
// handleReadyState handles DGDR in Ready state
......@@ -469,11 +469,11 @@ func (r *DynamoGraphDeploymentRequestReconciler) handleReadyState(ctx context.Co
dgdr.Status.Deployment.State = dgd.Status.State
// Check if DGD degraded from Ready
if dgd.Status.State != "Ready" {
if dgd.Status.State != string(DGDStateReady) {
logger.Info("DGD degraded, transitioning back to Deploying",
"dgdState", dgd.Status.State)
dgdr.Status.State = StateDeploying
dgdr.Status.State = DGDRStateDeploying
r.Recorder.Event(dgdr, corev1.EventTypeWarning, EventReasonDeploymentDegraded,
fmt.Sprintf(MessageDeploymentDegraded, dgd.Name, dgd.Status.State))
......@@ -497,7 +497,7 @@ func (r *DynamoGraphDeploymentRequestReconciler) handleDeployingState(ctx contex
if !dgdr.Spec.AutoApply {
// Shouldn't be in this state without autoApply
logger.Info("AutoApply not enabled, transitioning to Ready")
dgdr.Status.State = StateReady
dgdr.Status.State = DGDRStateReady
return ctrl.Result{}, r.Status().Update(ctx, dgdr)
}
......@@ -526,9 +526,9 @@ func (r *DynamoGraphDeploymentRequestReconciler) handleDeployingState(ctx contex
dgdr.Status.Deployment.State = dgd.Status.State
// Check if DGD is Ready
if dgd.Status.State == "Ready" {
if dgd.Status.State == string(DGDStateReady) {
logger.Info("DGD is Ready, transitioning to Ready state")
dgdr.Status.State = StateReady
dgdr.Status.State = DGDRStateReady
r.Recorder.Event(dgdr, corev1.EventTypeNormal, EventReasonDeploymentReady,
fmt.Sprintf(MessageDeploymentReady, dgd.Name))
......@@ -556,8 +556,8 @@ func (r *DynamoGraphDeploymentRequestReconciler) handleDGDDeleted(ctx context.Co
logger := log.FromContext(ctx)
logger.Info("DGD was deleted by user, transitioning to DeploymentDeleted state")
dgdr.Status.State = StateDeploymentDeleted
dgdr.Status.Deployment.State = "Deleted"
dgdr.Status.State = DGDRStateDeploymentDeleted
dgdr.Status.Deployment.State = ""
r.Recorder.Event(dgdr, corev1.EventTypeWarning, EventReasonDeploymentDeleted,
fmt.Sprintf(MessageDeploymentDeleted, dgdr.Status.Deployment.Name))
......@@ -669,7 +669,7 @@ func (r *DynamoGraphDeploymentRequestReconciler) createDGD(ctx context.Context,
dgdr.Status.Deployment = &nvidiacomv1alpha1.DeploymentStatus{
Name: dgdName,
Namespace: dgdNamespace,
State: "Pending",
State: string(DGDStatePending),
Created: true,
}
return ctrl.Result{}, r.Status().Update(ctx, dgdr)
......@@ -682,7 +682,7 @@ func (r *DynamoGraphDeploymentRequestReconciler) createDGD(ctx context.Context,
dgdr.Status.Deployment = &nvidiacomv1alpha1.DeploymentStatus{
Name: dgdName,
Namespace: dgdNamespace,
State: "Pending",
State: string(DGDStatePending),
Created: true,
}
......
......@@ -140,7 +140,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
var updated nvidiacomv1alpha1.DynamoGraphDeploymentRequest
_ = k8sClient.Get(ctx, types.NamespacedName{Name: dgdrName, Namespace: namespace}, &updated)
return updated.Status.State
}, timeout, interval).Should(Equal(StatePending))
}, timeout, interval).Should(Equal(DGDRStatePending))
// Verify observedGeneration is set
var updated nvidiacomv1alpha1.DynamoGraphDeploymentRequest
......@@ -190,7 +190,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
var updated nvidiacomv1alpha1.DynamoGraphDeploymentRequest
_ = k8sClient.Get(ctx, types.NamespacedName{Name: dgdrName, Namespace: namespace}, &updated)
return updated.Status.State
}, timeout, interval).Should(Equal(StatePending))
}, timeout, interval).Should(Equal(DGDRStatePending))
})
})
......@@ -424,7 +424,7 @@ var _ = Describe("DynamoGraphDeploymentRequest Controller", func() {
defer func() { _ = k8sClient.Delete(ctx, dgdr) }()
// Update status to Profiling using Status subresource
dgdr.Status.State = StateProfiling
dgdr.Status.State = DGDRStateProfiling
Expect(k8sClient.Status().Update(ctx, dgdr)).Should(Succeed())
// Create completed profiling job
......@@ -499,7 +499,7 @@ spec:
Expect(updated.Status.GeneratedDeployment).NotTo(BeNil())
// Verify state transitioned to Ready (since autoApply is false by default)
Expect(updated.Status.State).Should(Equal(StateReady))
Expect(updated.Status.State).Should(Equal(DGDRStateReady))
})
})
......@@ -539,7 +539,7 @@ spec:
defer func() { _ = k8sClient.Delete(ctx, dgdr) }()
// Update status to Profiling using Status subresource
dgdr.Status.State = StateProfiling
dgdr.Status.State = DGDRStateProfiling
Expect(k8sClient.Status().Update(ctx, dgdr)).Should(Succeed())
// Create completed profiling job
......@@ -609,7 +609,7 @@ spec:
// Get updated DGDR and check state is Deploying
var updated nvidiacomv1alpha1.DynamoGraphDeploymentRequest
Expect(k8sClient.Get(ctx, types.NamespacedName{Name: dgdrName, Namespace: namespace}, &updated)).Should(Succeed())
Expect(updated.Status.State).Should(Equal(StateDeploying))
Expect(updated.Status.State).Should(Equal(DGDRStateDeploying))
// Reconcile again to create DGD
_, err = reconciler.Reconcile(ctx, reconcile.Request{
......@@ -680,7 +680,7 @@ spec:
observedGeneration := current.Status.ObservedGeneration
// Manually set state to Profiling to simulate in-progress profiling
current.Status.State = StateProfiling
current.Status.State = DGDRStateProfiling
Expect(k8sClient.Status().Update(ctx, &current)).Should(Succeed())
// Try to modify spec
......@@ -702,7 +702,7 @@ spec:
Expect(k8sClient.Get(ctx, types.NamespacedName{Name: dgdrName, Namespace: namespace}, &current)).Should(Succeed())
Expect(current.Generation).Should(BeNumerically(">", initialGeneration))
Expect(current.Status.ObservedGeneration).Should(Equal(observedGeneration))
Expect(current.Status.State).Should(Equal(StateProfiling)) // State unchanged
Expect(current.Status.State).Should(Equal(DGDRStateProfiling)) // State unchanged
// Verify event was recorded
Eventually(func() bool {
......@@ -752,12 +752,12 @@ spec:
defer func() { _ = k8sClient.Delete(ctx, dgdr) }()
// Update status to Ready with Deployment info using Status subresource
dgdr.Status.State = StateReady
dgdr.Status.State = DGDRStateReady
dgdr.Status.Deployment = &nvidiacomv1alpha1.DeploymentStatus{
Name: "test-dgd-to-delete",
Namespace: namespace,
Created: true,
State: "Ready",
State: DGDRStateReady,
}
Expect(k8sClient.Status().Update(ctx, dgdr)).Should(Succeed())
......@@ -770,7 +770,7 @@ spec:
// Get updated DGDR and check state transitioned to DeploymentDeleted
var updated nvidiacomv1alpha1.DynamoGraphDeploymentRequest
Expect(k8sClient.Get(ctx, types.NamespacedName{Name: dgdrName, Namespace: namespace}, &updated)).Should(Succeed())
Expect(updated.Status.State).Should(Equal(StateDeploymentDeleted))
Expect(updated.Status.State).Should(Equal(DGDRStateDeploymentDeleted))
})
})
})
......@@ -1249,7 +1249,7 @@ var _ = Describe("DGDR Error Handling", func() {
defer func() { _ = k8sClient.Delete(ctx, dgdr) }()
// Set status to Profiling
dgdr.Status.State = StateProfiling
dgdr.Status.State = DGDRStateProfiling
Expect(k8sClient.Status().Update(ctx, dgdr)).Should(Succeed())
// Create failed job
......@@ -1331,7 +1331,7 @@ var _ = Describe("DGDR Error Handling", func() {
// Verify DGDR transitioned to Failed state
var updated nvidiacomv1alpha1.DynamoGraphDeploymentRequest
Expect(k8sClient.Get(ctx, types.NamespacedName{Name: dgdrName, Namespace: namespace}, &updated)).Should(Succeed())
Expect(updated.Status.State).Should(Equal(StateFailed))
Expect(updated.Status.State).Should(Equal(DGDRStateFailed))
// Verify error condition contains detailed error
condition := meta.FindStatusCondition(updated.Status.Conditions, ConditionTypeProfiling)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment