Commit 044e12e1 authored by julienmancuso's avatar julienmancuso Committed by GitHub
Browse files

fix: fix dynamoNimDeployment status (#551)

parent 0ff6f53d
...@@ -89,7 +89,7 @@ envsubst '${NAMESPACE} ${NGC_TOKEN} ${CI_COMMIT_SHA} ${RELEASE_NAME} ${DYNAMO_IN ...@@ -89,7 +89,7 @@ envsubst '${NAMESPACE} ${NGC_TOKEN} ${CI_COMMIT_SHA} ${RELEASE_NAME} ${DYNAMO_IN
echo "" echo ""
echo "Generated values file saved as generated-values.yaml" echo "Generated values file saved as generated-values.yaml"
Build dependencies before installation # Build dependencies before installation
echo "Building helm dependencies..." echo "Building helm dependencies..."
cd platform cd platform
retry_command "$HELM_CMD dep build" 5 5 retry_command "$HELM_CMD dep build" 5 5
......
...@@ -19,6 +19,7 @@ package controller ...@@ -19,6 +19,7 @@ package controller
import ( import (
"context" "context"
"fmt"
"strings" "strings"
"dario.cat/mergo" "dario.cat/mergo"
...@@ -70,6 +71,7 @@ func (r *DynamoDeploymentReconciler) Reconcile(ctx context.Context, req ctrl.Req ...@@ -70,6 +71,7 @@ func (r *DynamoDeploymentReconciler) Reconcile(ctx context.Context, req ctrl.Req
var err error var err error
reason := "undefined" reason := "undefined"
message := ""
readyStatus := metav1.ConditionFalse readyStatus := metav1.ConditionFalse
// retrieve the CRD // retrieve the CRD
dynamoDeployment := &nvidiacomv1alpha1.DynamoDeployment{} dynamoDeployment := &nvidiacomv1alpha1.DynamoDeployment{}
...@@ -82,7 +84,6 @@ func (r *DynamoDeploymentReconciler) Reconcile(ctx context.Context, req ctrl.Req ...@@ -82,7 +84,6 @@ func (r *DynamoDeploymentReconciler) Reconcile(ctx context.Context, req ctrl.Req
} }
defer func() { defer func() {
message := ""
if err != nil { if err != nil {
dynamoDeployment.SetState(FailedState) dynamoDeployment.SetState(FailedState)
message = err.Error() message = err.Error()
...@@ -149,7 +150,7 @@ func (r *DynamoDeploymentReconciler) Reconcile(ctx context.Context, req ctrl.Req ...@@ -149,7 +150,7 @@ func (r *DynamoDeploymentReconciler) Reconcile(ctx context.Context, req ctrl.Req
return ctrl.Result{}, err return ctrl.Result{}, err
} }
allAreReady := true notReadyDeployments := []string{}
// reconcile the DynamoNimDeployments // reconcile the DynamoNimDeployments
for serviceName, dynamoNimDeployment := range dynamoNimDeployments { for serviceName, dynamoNimDeployment := range dynamoNimDeployments {
logger.Info("Reconciling the DynamoNimDeployment", "serviceName", serviceName, "dynamoNimDeployment", dynamoNimDeployment) logger.Info("Reconciling the DynamoNimDeployment", "serviceName", serviceName, "dynamoNimDeployment", dynamoNimDeployment)
...@@ -163,13 +164,17 @@ func (r *DynamoDeploymentReconciler) Reconcile(ctx context.Context, req ctrl.Req ...@@ -163,13 +164,17 @@ func (r *DynamoDeploymentReconciler) Reconcile(ctx context.Context, req ctrl.Req
return ctrl.Result{}, err return ctrl.Result{}, err
} }
if !dynamoNimDeployment.Status.IsReady() { if !dynamoNimDeployment.Status.IsReady() {
allAreReady = false notReadyDeployments = append(notReadyDeployments, dynamoNimDeployment.Name)
} }
} }
if allAreReady { if len(notReadyDeployments) == 0 {
dynamoDeployment.SetState(ReadyState) dynamoDeployment.SetState(ReadyState)
reason = "all_deployments_are_ready"
message = "All deployments are ready"
readyStatus = metav1.ConditionTrue readyStatus = metav1.ConditionTrue
} else { } else {
reason = "some_deployments_are_not_ready"
message = fmt.Sprintf("The following deployments are not ready: %v", notReadyDeployments)
dynamoDeployment.SetState(PendingState) dynamoDeployment.SetState(PendingState)
} }
......
/*
* SPDX-FileCopyrightText: Copyright (c) 2022 Atalaya Tech. Inc
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES
*/
package controller
import (
"testing"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
func TestIsDeploymentReady(t *testing.T) {
type args struct {
deployment *appsv1.Deployment
}
tests := []struct {
name string
args args
want bool
}{
{
name: "deployment is nil",
args: args{
deployment: nil,
},
want: false,
},
{
name: "not ready",
args: args{
deployment: &appsv1.Deployment{
Spec: appsv1.DeploymentSpec{},
Status: appsv1.DeploymentStatus{
Conditions: []appsv1.DeploymentCondition{
{
Type: appsv1.DeploymentAvailable,
Status: corev1.ConditionFalse,
},
},
},
},
},
want: false,
},
{
name: "not ready (paused)",
args: args{
deployment: &appsv1.Deployment{
Spec: appsv1.DeploymentSpec{
Paused: true,
},
},
},
want: false,
},
{
name: "ready",
args: args{
deployment: &appsv1.Deployment{
ObjectMeta: metav1.ObjectMeta{
Generation: 1,
},
Spec: appsv1.DeploymentSpec{
Replicas: &[]int32{1}[0],
},
Status: appsv1.DeploymentStatus{
ObservedGeneration: 1,
UpdatedReplicas: 1,
AvailableReplicas: 1,
Conditions: []appsv1.DeploymentCondition{
{
Type: appsv1.DeploymentAvailable,
Status: corev1.ConditionTrue,
},
},
},
},
},
want: true,
},
{
name: "ready (no desired replicas)",
args: args{
deployment: &appsv1.Deployment{
ObjectMeta: metav1.ObjectMeta{
Generation: 1,
},
Spec: appsv1.DeploymentSpec{
Replicas: &[]int32{0}[0],
},
},
},
want: true,
},
{
name: "not ready (condition false)",
args: args{
deployment: &appsv1.Deployment{
ObjectMeta: metav1.ObjectMeta{
Generation: 1,
},
Spec: appsv1.DeploymentSpec{
Replicas: &[]int32{1}[0],
},
Status: appsv1.DeploymentStatus{
ObservedGeneration: 1,
UpdatedReplicas: 1,
AvailableReplicas: 1,
Conditions: []appsv1.DeploymentCondition{
{
Type: appsv1.DeploymentAvailable,
Status: corev1.ConditionFalse,
},
},
},
},
},
want: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := IsDeploymentReady(tt.args.deployment); got != tt.want {
t.Errorf("IsDeploymentReady() = %v, want %v", got, tt.want)
}
})
}
}
...@@ -80,8 +80,8 @@ func SyncResource[T Resource](ctx context.Context, c client.Client, desired T, n ...@@ -80,8 +80,8 @@ func SyncResource[T Resource](ctx context.Context, c client.Client, desired T, n
// Check if the Spec has changed and update if necessary // Check if the Spec has changed and update if necessary
if IsSpecChanged(current, desired) { if IsSpecChanged(current, desired) {
// update the spec of the current object with the desired spec // update the spec of the current object with the desired spec
current.SetSpec(desired.GetSpec()) desired.SetResourceVersion(current.GetResourceVersion())
if err := c.Update(ctx, current); err != nil { if err := c.Update(ctx, desired); err != nil {
return desired, fmt.Errorf("failed to update resource: %w", err) return desired, fmt.Errorf("failed to update resource: %w", err)
} }
} }
......
...@@ -258,11 +258,13 @@ func GenerateDynamoNIMDeployments(parentDynamoDeployment *v1alpha1.DynamoDeploym ...@@ -258,11 +258,13 @@ func GenerateDynamoNIMDeployments(parentDynamoDeployment *v1alpha1.DynamoDeploym
}, },
} }
} }
deployment.Spec.Autoscaling = &v1alpha1.Autoscaling{
Enabled: false,
}
if service.Config.Autoscaling != nil { if service.Config.Autoscaling != nil {
deployment.Spec.Autoscaling = &v1alpha1.Autoscaling{ deployment.Spec.Autoscaling.Enabled = true
MinReplicas: service.Config.Autoscaling.MinReplicas, deployment.Spec.Autoscaling.MinReplicas = service.Config.Autoscaling.MinReplicas
MaxReplicas: service.Config.Autoscaling.MaxReplicas, deployment.Spec.Autoscaling.MaxReplicas = service.Config.Autoscaling.MaxReplicas
}
} }
deployments[service.Name] = deployment deployments[service.Name] = deployment
} }
......
...@@ -110,6 +110,7 @@ func TestGenerateDynamoNIMDeployments(t *testing.T) { ...@@ -110,6 +110,7 @@ func TestGenerateDynamoNIMDeployments(t *testing.T) {
}, },
}, },
Autoscaling: &v1alpha1.Autoscaling{ Autoscaling: &v1alpha1.Autoscaling{
Enabled: true,
MinReplicas: 1, MinReplicas: 1,
MaxReplicas: 5, MaxReplicas: 5,
}, },
...@@ -130,6 +131,9 @@ func TestGenerateDynamoNIMDeployments(t *testing.T) { ...@@ -130,6 +131,9 @@ func TestGenerateDynamoNIMDeployments(t *testing.T) {
DynamoNim: "dynamonim--ac4e234", DynamoNim: "dynamonim--ac4e234",
DynamoTag: "dynamonim:MyService1", DynamoTag: "dynamonim:MyService1",
ServiceName: "service2", ServiceName: "service2",
Autoscaling: &v1alpha1.Autoscaling{
Enabled: false,
},
}, },
}, },
}, },
...@@ -206,6 +210,7 @@ func TestGenerateDynamoNIMDeployments(t *testing.T) { ...@@ -206,6 +210,7 @@ func TestGenerateDynamoNIMDeployments(t *testing.T) {
}, },
}, },
Autoscaling: &v1alpha1.Autoscaling{ Autoscaling: &v1alpha1.Autoscaling{
Enabled: true,
MinReplicas: 1, MinReplicas: 1,
MaxReplicas: 5, MaxReplicas: 5,
}, },
...@@ -230,6 +235,9 @@ func TestGenerateDynamoNIMDeployments(t *testing.T) { ...@@ -230,6 +235,9 @@ func TestGenerateDynamoNIMDeployments(t *testing.T) {
DynamoNim: "dynamonim--ac4e234", DynamoNim: "dynamonim--ac4e234",
DynamoTag: "dynamonim:MyService2", DynamoTag: "dynamonim:MyService2",
ServiceName: "service2", ServiceName: "service2",
Autoscaling: &v1alpha1.Autoscaling{
Enabled: false,
},
}, },
}, },
}, },
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment