Unverified Commit c68d159f authored by Julien Mancuso's avatar Julien Mancuso Committed by GitHub
Browse files

fix(operator): reconcile DynamoGraphDeployment on PodCliqueScalingGroup status changes (#8328)

parent 44190094
...@@ -136,6 +136,15 @@ rules: ...@@ -136,6 +136,15 @@ rules:
- patch - patch
- update - update
- watch - watch
- apiGroups:
- grove.io
resources:
- podcliques
- podcliquescalinggroups
verbs:
- get
- list
- watch
- apiGroups: - apiGroups:
- grove.io - grove.io
resources: resources:
......
...@@ -130,6 +130,8 @@ rules: ...@@ -130,6 +130,8 @@ rules:
- grove.io - grove.io
resources: resources:
- clustertopologies - clustertopologies
- podcliques
- podcliquescalinggroups
verbs: verbs:
- get - get
- list - list
......
...@@ -87,7 +87,9 @@ type DynamoGraphDeploymentReconciler struct { ...@@ -87,7 +87,9 @@ type DynamoGraphDeploymentReconciler struct {
// +kubebuilder:rbac:groups=nvidia.com,resources=dynamographdeployments/finalizers,verbs=update // +kubebuilder:rbac:groups=nvidia.com,resources=dynamographdeployments/finalizers,verbs=update
// +kubebuilder:rbac:groups=nvidia.com,resources=dynamographdeploymentscalingadapters,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=nvidia.com,resources=dynamographdeploymentscalingadapters,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=grove.io,resources=podcliquesets,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=grove.io,resources=podcliquesets,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=grove.io,resources=podcliques,verbs=get;list;watch
// +kubebuilder:rbac:groups=grove.io,resources=podcliques/scale,verbs=get;update;patch // +kubebuilder:rbac:groups=grove.io,resources=podcliques/scale,verbs=get;update;patch
// +kubebuilder:rbac:groups=grove.io,resources=podcliquescalinggroups,verbs=get;list;watch
// +kubebuilder:rbac:groups=grove.io,resources=podcliquescalinggroups/scale,verbs=get;update;patch // +kubebuilder:rbac:groups=grove.io,resources=podcliquescalinggroups/scale,verbs=get;update;patch
// +kubebuilder:rbac:groups=grove.io,resources=clustertopologies,verbs=get;list;watch // +kubebuilder:rbac:groups=grove.io,resources=clustertopologies,verbs=get;list;watch
// +kubebuilder:rbac:groups=scheduling.run.ai,resources=queues,verbs=get;list // +kubebuilder:rbac:groups=scheduling.run.ai,resources=queues,verbs=get;list
...@@ -1666,8 +1668,6 @@ func (r *DynamoGraphDeploymentReconciler) SetupWithManager(mgr ctrl.Manager) err ...@@ -1666,8 +1668,6 @@ func (r *DynamoGraphDeploymentReconciler) SetupWithManager(mgr ctrl.Manager) err
GenericFunc: func(ge event.GenericEvent) bool { return true }, GenericFunc: func(ge event.GenericEvent) bool { return true },
})). })).
// Watch PodClique resources - only on status changes // Watch PodClique resources - only on status changes
// Note: We don't need to watch PodCliqueScalingGroup because it's just a container
// for PodCliques. The actual status changes happen at the PodClique level.
Watches( Watches(
&grovev1alpha1.PodClique{}, &grovev1alpha1.PodClique{},
handler.EnqueueRequestsFromMapFunc(r.mapPodCliqueToRequests), handler.EnqueueRequestsFromMapFunc(r.mapPodCliqueToRequests),
...@@ -1687,6 +1687,38 @@ func (r *DynamoGraphDeploymentReconciler) SetupWithManager(mgr ctrl.Manager) err ...@@ -1687,6 +1687,38 @@ func (r *DynamoGraphDeploymentReconciler) SetupWithManager(mgr ctrl.Manager) err
}, },
GenericFunc: func(ge event.GenericEvent) bool { return false }, GenericFunc: func(ge event.GenericEvent) bool { return false },
}), }),
).
// Watch PodCliqueScalingGroup resources on status-replica changes.
// PCSG.Status.AvailableReplicas is independently recomputed by the PCSG
// controller and can land after the last PodClique event the DGD
// controller sees. Without this watch, the DGD aggregate
// (CheckPCSGReady reads pcsg.Status.AvailableReplicas) can stay stale
// indefinitely even though the underlying PCSG is already ready.
Watches(
&grovev1alpha1.PodCliqueScalingGroup{},
handler.EnqueueRequestsFromMapFunc(r.mapPodCliqueScalingGroupToRequests),
builder.WithPredicates(predicate.Funcs{
CreateFunc: func(ce event.CreateEvent) bool { return false },
DeleteFunc: func(de event.DeleteEvent) bool { return false },
UpdateFunc: func(ue event.UpdateEvent) bool {
oldPCSG, okOld := ue.ObjectOld.(*grovev1alpha1.PodCliqueScalingGroup)
newPCSG, okNew := ue.ObjectNew.(*grovev1alpha1.PodCliqueScalingGroup)
if !okOld || !okNew {
return false
}
// ObservedGeneration is tracked because CheckPCSGReady uses it as
// a readiness gate ("spec not yet processed" while
// ObservedGeneration < Generation). A PCSG spec edit that does
// not change Spec.Replicas (e.g. template/topology edits) would
// otherwise not wake the DGD when Grove catches up.
return oldPCSG.Status.AvailableReplicas != newPCSG.Status.AvailableReplicas ||
oldPCSG.Status.UpdatedReplicas != newPCSG.Status.UpdatedReplicas ||
oldPCSG.Status.Replicas != newPCSG.Status.Replicas ||
oldPCSG.Spec.Replicas != newPCSG.Spec.Replicas ||
!ptrInt64Equal(oldPCSG.Status.ObservedGeneration, newPCSG.Status.ObservedGeneration)
},
GenericFunc: func(ge event.GenericEvent) bool { return false },
}),
) )
} }
...@@ -1723,3 +1755,47 @@ func (r *DynamoGraphDeploymentReconciler) mapPodCliqueToRequests(ctx context.Con ...@@ -1723,3 +1755,47 @@ func (r *DynamoGraphDeploymentReconciler) mapPodCliqueToRequests(ctx context.Con
}, },
}} }}
} }
// mapPodCliqueScalingGroupToRequests maps a PodCliqueScalingGroup to reconcile
// requests for its owning DGD.
//
// The PCSG is owned by a PodCliqueSet (controller ownerRef), and Dynamo always
// creates the PodCliqueSet with the same name as the DGD
// (see graph.go: gangSet.Name = dynamoDeployment.Name), so the PodCliqueSet
// owner reference name is the DGD name.
func (r *DynamoGraphDeploymentReconciler) mapPodCliqueScalingGroupToRequests(ctx context.Context, obj client.Object) []ctrl.Request {
pcsg, ok := obj.(*grovev1alpha1.PodCliqueScalingGroup)
if !ok {
return nil
}
controllerRef := metav1.GetControllerOf(pcsg)
if controllerRef == nil ||
controllerRef.Kind != "PodCliqueSet" ||
controllerRef.APIVersion != grovev1alpha1.SchemeGroupVersion.String() {
log.FromContext(ctx).V(1).Info("PodCliqueScalingGroup missing PodCliqueSet controller ownerReference",
"podCliqueScalingGroup", pcsg.Name,
"namespace", pcsg.Namespace)
return nil
}
return []ctrl.Request{{
NamespacedName: types.NamespacedName{
Name: controllerRef.Name,
Namespace: pcsg.Namespace,
},
}}
}
// ptrInt64Equal returns true when two *int64 values are equivalent, treating
// nil and a pointer to the same value as equal. Used to compare optional
// status fields like ObservedGeneration without tripping on pointer identity.
func ptrInt64Equal(a, b *int64) bool {
if a == nil && b == nil {
return true
}
if a == nil || b == nil {
return false
}
return *a == *b
}
...@@ -2696,3 +2696,98 @@ func TestPropagateTopologyCondition(t *testing.T) { ...@@ -2696,3 +2696,98 @@ func TestPropagateTopologyCondition(t *testing.T) {
}) })
} }
} }
func TestMapPodCliqueScalingGroupToRequests(t *testing.T) {
tests := []struct {
name string
obj client.Object
wantRequests int
wantName string
wantNs string
}{
{
name: "PCSG with PodCliqueSet controller ownerRef returns DGD request",
obj: &grovev1alpha1.PodCliqueScalingGroup{
ObjectMeta: metav1.ObjectMeta{
Name: "dynamo-recipe-0-worker",
Namespace: "mwieczorek-dsv32-trtllm-agg",
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: grovev1alpha1.SchemeGroupVersion.String(),
Kind: "PodCliqueSet",
Name: "dynamo-recipe",
Controller: ptr.To(true),
},
},
},
},
wantRequests: 1,
wantName: "dynamo-recipe",
wantNs: "mwieczorek-dsv32-trtllm-agg",
},
{
name: "PCSG with no ownerRef returns no requests",
obj: &grovev1alpha1.PodCliqueScalingGroup{
ObjectMeta: metav1.ObjectMeta{
Name: "orphan-pcsg",
Namespace: "default",
},
},
wantRequests: 0,
},
{
name: "PCSG with non-controller PodCliqueSet ownerRef returns no requests",
obj: &grovev1alpha1.PodCliqueScalingGroup{
ObjectMeta: metav1.ObjectMeta{
Name: "pcsg-with-non-controller-ref",
Namespace: "default",
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: grovev1alpha1.SchemeGroupVersion.String(),
Kind: "PodCliqueSet",
Name: "some-pcs",
// Controller flag omitted: metav1.GetControllerOf must ignore this ref.
},
},
},
},
wantRequests: 0,
},
{
name: "PCSG with non-PodCliqueSet ownerRef returns no requests",
obj: &grovev1alpha1.PodCliqueScalingGroup{
ObjectMeta: metav1.ObjectMeta{
Name: "weird-pcsg",
Namespace: "default",
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: "apps/v1",
Kind: "Deployment",
Name: "not-a-pcs",
},
},
},
},
wantRequests: 0,
},
{
name: "non-PCSG object returns no requests",
obj: &corev1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "foo", Namespace: "default"}},
wantRequests: 0,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
g := gomega.NewGomegaWithT(t)
r := &DynamoGraphDeploymentReconciler{}
reqs := r.mapPodCliqueScalingGroupToRequests(context.Background(), tt.obj)
g.Expect(reqs).To(gomega.HaveLen(tt.wantRequests))
if tt.wantRequests == 1 {
g.Expect(reqs[0].Name).To(gomega.Equal(tt.wantName))
g.Expect(reqs[0].Namespace).To(gomega.Equal(tt.wantNs))
}
})
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment