Unverified Commit d047851e authored by Julien Mancuso's avatar Julien Mancuso Committed by GitHub
Browse files

feat(operator): propagate DGD spec-level annotations and labels to child resources (#7326)


Signed-off-by: default avatarJulien Mancuso <jmancuso@nvidia.com>
parent ae5d7ba9
...@@ -57,6 +57,13 @@ spec: ...@@ -57,6 +57,13 @@ spec:
spec: spec:
description: Spec defines the desired state for this graph deployment. description: Spec defines the desired state for this graph deployment.
properties: properties:
annotations:
additionalProperties:
type: string
description: |-
Annotations to propagate to all child resources (PCS, DCD, Deployments, and pod templates).
Service-level annotations take precedence over these values.
type: object
backendFramework: backendFramework:
description: BackendFramework specifies the backend framework (e.g., "sglang", "vllm", "trtllm"). description: BackendFramework specifies the backend framework (e.g., "sglang", "vllm", "trtllm").
enum: enum:
...@@ -213,6 +220,13 @@ spec: ...@@ -213,6 +220,13 @@ spec:
- name - name
type: object type: object
type: array type: array
labels:
additionalProperties:
type: string
description: |-
Labels to propagate to all child resources (PCS, DCD, Deployments, and pod templates).
Service-level labels take precedence over these values.
type: object
pvcs: pvcs:
description: |- description: |-
PVCs defines a list of persistent volume claims that can be referenced by components. PVCs defines a list of persistent volume claims that can be referenced by components.
......
...@@ -56,6 +56,14 @@ const ( ...@@ -56,6 +56,14 @@ const (
// DynamoGraphDeploymentSpec defines the desired state of DynamoGraphDeployment. // DynamoGraphDeploymentSpec defines the desired state of DynamoGraphDeployment.
type DynamoGraphDeploymentSpec struct { type DynamoGraphDeploymentSpec struct {
// Annotations to propagate to all child resources (PCS, DCD, Deployments, and pod templates).
// Service-level annotations take precedence over these values.
// +optional
Annotations map[string]string `json:"annotations,omitempty"`
// Labels to propagate to all child resources (PCS, DCD, Deployments, and pod templates).
// Service-level labels take precedence over these values.
// +optional
Labels map[string]string `json:"labels,omitempty"`
// PVCs defines a list of persistent volume claims that can be referenced by components. // PVCs defines a list of persistent volume claims that can be referenced by components.
// Each PVC must have a unique name that can be referenced in component specifications. // Each PVC must have a unique name that can be referenced in component specifications.
// +kubebuilder:validation:Optional // +kubebuilder:validation:Optional
......
...@@ -902,6 +902,20 @@ func (in *DynamoGraphDeploymentServiceRef) DeepCopy() *DynamoGraphDeploymentServ ...@@ -902,6 +902,20 @@ func (in *DynamoGraphDeploymentServiceRef) DeepCopy() *DynamoGraphDeploymentServ
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *DynamoGraphDeploymentSpec) DeepCopyInto(out *DynamoGraphDeploymentSpec) { func (in *DynamoGraphDeploymentSpec) DeepCopyInto(out *DynamoGraphDeploymentSpec) {
*out = *in *out = *in
if in.Annotations != nil {
in, out := &in.Annotations, &out.Annotations
*out = make(map[string]string, len(*in))
for key, val := range *in {
(*out)[key] = val
}
}
if in.Labels != nil {
in, out := &in.Labels, &out.Labels
*out = make(map[string]string, len(*in))
for key, val := range *in {
(*out)[key] = val
}
}
if in.PVCs != nil { if in.PVCs != nil {
in, out := &in.PVCs, &out.PVCs in, out := &in.PVCs, &out.PVCs
*out = make([]PVC, len(*in)) *out = make([]PVC, len(*in))
......
...@@ -57,6 +57,13 @@ spec: ...@@ -57,6 +57,13 @@ spec:
spec: spec:
description: Spec defines the desired state for this graph deployment. description: Spec defines the desired state for this graph deployment.
properties: properties:
annotations:
additionalProperties:
type: string
description: |-
Annotations to propagate to all child resources (PCS, DCD, Deployments, and pod templates).
Service-level annotations take precedence over these values.
type: object
backendFramework: backendFramework:
description: BackendFramework specifies the backend framework (e.g., "sglang", "vllm", "trtllm"). description: BackendFramework specifies the backend framework (e.g., "sglang", "vllm", "trtllm").
enum: enum:
...@@ -213,6 +220,13 @@ spec: ...@@ -213,6 +220,13 @@ spec:
- name - name
type: object type: object
type: array type: array
labels:
additionalProperties:
type: string
description: |-
Labels to propagate to all child resources (PCS, DCD, Deployments, and pod templates).
Service-level labels take precedence over these values.
type: object
pvcs: pvcs:
description: |- description: |-
PVCs defines a list of persistent volume claims that can be referenced by components. PVCs defines a list of persistent volume claims that can be referenced by components.
......
...@@ -322,11 +322,12 @@ func generateSingleDCD( ...@@ -322,11 +322,12 @@ func generateSingleDCD(
deployment.Spec.DynamoNamespace = &dynamoNamespace deployment.Spec.DynamoNamespace = &dynamoNamespace
labels := make(map[string]string) labels := make(map[string]string)
deployment.Spec.Labels = labels maps.Copy(labels, component.Labels)
deployment.Labels = labels
labels[commonconsts.KubeLabelDynamoComponent] = componentName labels[commonconsts.KubeLabelDynamoComponent] = componentName
labels[commonconsts.KubeLabelDynamoNamespace] = dynamoNamespace labels[commonconsts.KubeLabelDynamoNamespace] = dynamoNamespace
labels[commonconsts.KubeLabelDynamoGraphDeploymentName] = parentDGD.Name labels[commonconsts.KubeLabelDynamoGraphDeploymentName] = parentDGD.Name
deployment.Spec.Labels = labels
deployment.Labels = labels
// only label worker DCDs with their hash for cleanup during rolling updates // only label worker DCDs with their hash for cleanup during rolling updates
if IsWorkerComponent(component.ComponentType) { if IsWorkerComponent(component.ComponentType) {
...@@ -334,6 +335,7 @@ func generateSingleDCD( ...@@ -334,6 +335,7 @@ func generateSingleDCD(
} }
propagateDGDAnnotations(parentDGD.GetAnnotations(), &deployment.Spec.DynamoComponentDeploymentSharedSpec) propagateDGDAnnotations(parentDGD.GetAnnotations(), &deployment.Spec.DynamoComponentDeploymentSharedSpec)
propagateDGDSpecMetadata(parentDGD.Spec.Annotations, parentDGD.Spec.Labels, &deployment.Spec.DynamoComponentDeploymentSharedSpec)
// Apply restart annotation if this service should be restarted. // Apply restart annotation if this service should be restarted.
if restartState.ShouldAnnotateService(componentName) { if restartState.ShouldAnnotateService(componentName) {
...@@ -1295,6 +1297,7 @@ func GeneratePodSpecForComponent( ...@@ -1295,6 +1297,7 @@ func GeneratePodSpecForComponent(
} }
propagateDGDAnnotations(dynamoDeployment.GetAnnotations(), component) propagateDGDAnnotations(dynamoDeployment.GetAnnotations(), component)
propagateDGDSpecMetadata(dynamoDeployment.Spec.Annotations, dynamoDeployment.Spec.Labels, component)
podSpec, err := GenerateBasePodSpec(component, backendFramework, secretsRetriever, dynamoDeployment.Name, dynamoDeployment.Namespace, role, numberOfNodes, operatorConfig, multinodeDeploymentType, serviceName, checkpointInfo) podSpec, err := GenerateBasePodSpec(component, backendFramework, secretsRetriever, dynamoDeployment.Name, dynamoDeployment.Namespace, role, numberOfNodes, operatorConfig, multinodeDeploymentType, serviceName, checkpointInfo)
if err != nil { if err != nil {
...@@ -1329,6 +1332,27 @@ func propagateDGDAnnotations(dgdAnnotations map[string]string, component *v1alph ...@@ -1329,6 +1332,27 @@ func propagateDGDAnnotations(dgdAnnotations map[string]string, component *v1alph
} }
} }
// propagateDGDSpecMetadata merges DGD spec-level annotations and labels into
// the component as a low-priority base. Service-level values take precedence.
func propagateDGDSpecMetadata(annotations, labels map[string]string, component *v1alpha1.DynamoComponentDeploymentSharedSpec) {
for k, v := range annotations {
if component.Annotations == nil {
component.Annotations = make(map[string]string)
}
if _, exists := component.Annotations[k]; !exists {
component.Annotations[k] = v
}
}
for k, v := range labels {
if component.Labels == nil {
component.Labels = make(map[string]string)
}
if _, exists := component.Labels[k]; !exists {
component.Labels[k] = v
}
}
}
// GenerateGrovePodCliqueSet generates a Grove PodCliqueSet for the given deployment, supporting both single-node and multinode cases. // GenerateGrovePodCliqueSet generates a Grove PodCliqueSet for the given deployment, supporting both single-node and multinode cases.
func GenerateGrovePodCliqueSet( func GenerateGrovePodCliqueSet(
ctx context.Context, ctx context.Context,
...@@ -1343,6 +1367,8 @@ func GenerateGrovePodCliqueSet( ...@@ -1343,6 +1367,8 @@ func GenerateGrovePodCliqueSet(
gangSet := &grovev1alpha1.PodCliqueSet{} gangSet := &grovev1alpha1.PodCliqueSet{}
gangSet.Name = dynamoDeployment.Name gangSet.Name = dynamoDeployment.Name
gangSet.Namespace = dynamoDeployment.Namespace gangSet.Namespace = dynamoDeployment.Namespace
gangSet.Labels = maps.Clone(dynamoDeployment.Spec.Labels)
gangSet.Annotations = maps.Clone(dynamoDeployment.Spec.Annotations)
gangSet.Spec.Replicas = 1 gangSet.Spec.Replicas = 1
gangSet.Spec.Template.HeadlessServiceConfig = &grovev1alpha1.HeadlessServiceConfig{ gangSet.Spec.Template.HeadlessServiceConfig = &grovev1alpha1.HeadlessServiceConfig{
PublishNotReadyAddresses: true, PublishNotReadyAddresses: true,
......
...@@ -7340,3 +7340,143 @@ func TestPropagateDGDAnnotations(t *testing.T) { ...@@ -7340,3 +7340,143 @@ func TestPropagateDGDAnnotations(t *testing.T) {
}) })
} }
} }
func TestPropagateDGDSpecMetadata(t *testing.T) {
tests := []struct {
name string
dgdAnnotations map[string]string
dgdLabels map[string]string
serviceAnnotations map[string]string
serviceLabels map[string]string
expectedAnnotations map[string]string
expectedLabels map[string]string
}{
{
name: "nil metadata is a no-op",
dgdAnnotations: nil,
dgdLabels: nil,
serviceAnnotations: map[string]string{"existing": "value"},
expectedAnnotations: map[string]string{"existing": "value"},
expectedLabels: nil,
},
{
name: "annotations and labels propagate to empty component",
dgdAnnotations: map[string]string{"team/cost-center": "abc"},
dgdLabels: map[string]string{"env": "prod"},
expectedAnnotations: map[string]string{"team/cost-center": "abc"},
expectedLabels: map[string]string{"env": "prod"},
},
{
name: "service-level annotations take precedence",
dgdAnnotations: map[string]string{"shared": "from-dgd", "dgd-only": "val"},
serviceAnnotations: map[string]string{"shared": "from-service"},
expectedAnnotations: map[string]string{"shared": "from-service", "dgd-only": "val"},
},
{
name: "service-level labels take precedence",
dgdLabels: map[string]string{"shared": "from-dgd", "dgd-only": "val"},
serviceLabels: map[string]string{"shared": "from-service"},
expectedLabels: map[string]string{"shared": "from-service", "dgd-only": "val"},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
component := &v1alpha1.DynamoComponentDeploymentSharedSpec{
Annotations: tt.serviceAnnotations,
Labels: tt.serviceLabels,
}
propagateDGDSpecMetadata(tt.dgdAnnotations, tt.dgdLabels, component)
if tt.expectedAnnotations == nil {
assert.True(t, len(component.Annotations) == 0 || component.Annotations == nil,
"expected no annotations, got %v", component.Annotations)
} else {
assert.Equal(t, tt.expectedAnnotations, component.Annotations)
}
if tt.expectedLabels == nil {
assert.True(t, len(component.Labels) == 0 || component.Labels == nil,
"expected no labels, got %v", component.Labels)
} else {
assert.Equal(t, tt.expectedLabels, component.Labels)
}
})
}
}
func TestGenerateGrovePodCliqueSet_SpecMetadataPropagation(t *testing.T) {
dgd := &v1alpha1.DynamoGraphDeployment{
ObjectMeta: metav1.ObjectMeta{
Name: "test-dgd",
Namespace: "ns",
},
Spec: v1alpha1.DynamoGraphDeploymentSpec{
Annotations: map[string]string{"team/cost-center": "abc"},
Labels: map[string]string{"env": "prod"},
Services: map[string]*v1alpha1.DynamoComponentDeploymentSharedSpec{
"worker": {
ComponentType: commonconsts.ComponentTypeWorker,
Replicas: ptr.To(int32(1)),
Annotations: map[string]string{"team/cost-center": "svc-override"},
},
},
},
}
pcs, err := GenerateGrovePodCliqueSet(context.Background(), dgd, &configv1alpha1.OperatorConfiguration{}, &controller_common.RuntimeConfig{}, nil, nil, nil, nil)
require.NoError(t, err)
// PCS object-level metadata
assert.Equal(t, "abc", pcs.Annotations["team/cost-center"])
assert.Equal(t, "prod", pcs.Labels["env"])
// Clique-level: service annotation takes precedence
require.Len(t, pcs.Spec.Template.Cliques, 1)
clique := pcs.Spec.Template.Cliques[0]
assert.Equal(t, "svc-override", clique.Annotations["team/cost-center"],
"service-level annotation should take precedence over spec.metadata")
}
func TestGenerateDynamoComponentsDeployments_SpecMetadataPropagation(t *testing.T) {
dgd := &v1alpha1.DynamoGraphDeployment{
ObjectMeta: metav1.ObjectMeta{
Name: "test-dgd",
Namespace: "ns",
},
Spec: v1alpha1.DynamoGraphDeploymentSpec{
Annotations: map[string]string{"team/cost-center": "abc", "shared": "dgd"},
Labels: map[string]string{"env": "prod", "shared-label": "dgd"},
Services: map[string]*v1alpha1.DynamoComponentDeploymentSharedSpec{
"frontend": {
ComponentType: commonconsts.ComponentTypeFrontend,
Replicas: ptr.To(int32(1)),
Annotations: map[string]string{"shared": "svc"},
Labels: map[string]string{"shared-label": "svc", "svc-only": "val"},
},
},
},
}
dcds, err := GenerateDynamoComponentsDeployments(context.Background(), dgd, &v1alpha1.IngressSpec{}, nil, nil, RollingUpdateContext{})
require.NoError(t, err)
dcd := dcds["frontend"]
require.NotNil(t, dcd)
// Annotations: service-level takes precedence over DGD-level
assert.Equal(t, "abc", dcd.Spec.Annotations["team/cost-center"])
assert.Equal(t, "svc", dcd.Spec.Annotations["shared"],
"service-level annotation should take precedence over DGD annotation")
// Labels: service-level survives and takes precedence over DGD-level
assert.Equal(t, "svc", dcd.Spec.Labels["shared-label"],
"service-level label should take precedence over DGD label")
assert.Equal(t, "val", dcd.Spec.Labels["svc-only"],
"service-only label should be preserved")
assert.Equal(t, "prod", dcd.Spec.Labels["env"],
"DGD-level label should propagate when no service override")
// Controller labels must always be present
assert.Equal(t, "frontend", dcd.Spec.Labels[commonconsts.KubeLabelDynamoComponent])
assert.Equal(t, dgd.Name, dcd.Spec.Labels[commonconsts.KubeLabelDynamoGraphDeploymentName])
}
...@@ -634,6 +634,8 @@ _Appears in:_ ...@@ -634,6 +634,8 @@ _Appears in:_
| Field | Description | Default | Validation | | Field | Description | Default | Validation |
| --- | --- | --- | --- | | --- | --- | --- | --- |
| `annotations` _object (keys:string, values:string)_ | Annotations to propagate to all child resources (PCS, DCD, Deployments, and pod templates).<br />Service-level annotations take precedence over these values. | | Optional: \{\} <br /> |
| `labels` _object (keys:string, values:string)_ | Labels to propagate to all child resources (PCS, DCD, Deployments, and pod templates).<br />Service-level labels take precedence over these values. | | Optional: \{\} <br /> |
| `pvcs` _[PVC](#pvc) array_ | PVCs defines a list of persistent volume claims that can be referenced by components.<br />Each PVC must have a unique name that can be referenced in component specifications. | | MaxItems: 100 <br />Optional: \{\} <br /> | | `pvcs` _[PVC](#pvc) array_ | PVCs defines a list of persistent volume claims that can be referenced by components.<br />Each PVC must have a unique name that can be referenced in component specifications. | | MaxItems: 100 <br />Optional: \{\} <br /> |
| `services` _object (keys:string, values:[DynamoComponentDeploymentSharedSpec](#dynamocomponentdeploymentsharedspec))_ | Services are the services to deploy as part of this deployment. | | MaxProperties: 25 <br />Optional: \{\} <br /> | | `services` _object (keys:string, values:[DynamoComponentDeploymentSharedSpec](#dynamocomponentdeploymentsharedspec))_ | Services are the services to deploy as part of this deployment. | | MaxProperties: 25 <br />Optional: \{\} <br /> |
| `envs` _[EnvVar](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#envvar-v1-core) array_ | Envs are environment variables applied to all services in the deployment unless<br />overridden by service-specific configuration. | | Optional: \{\} <br /> | | `envs` _[EnvVar](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#envvar-v1-core) array_ | Envs are environment variables applied to all services in the deployment unless<br />overridden by service-specific configuration. | | Optional: \{\} <br /> |
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment