Unverified Commit 94876e58 authored by julienmancuso's avatar julienmancuso Committed by GitHub
Browse files

Fix: Canonicalize PodGangSet to prevent spurious updates (#2469)

parent 78c7e352
...@@ -13,6 +13,7 @@ require ( ...@@ -13,6 +13,7 @@ require (
github.com/onsi/ginkgo/v2 v2.23.4 github.com/onsi/ginkgo/v2 v2.23.4
github.com/onsi/gomega v1.37.0 github.com/onsi/gomega v1.37.0
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.71.2 github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.71.2
github.com/stretchr/testify v1.10.0
go.etcd.io/etcd/client/v3 v3.5.21 go.etcd.io/etcd/client/v3 v3.5.21
istio.io/api v1.23.1 istio.io/api v1.23.1
istio.io/client-go v1.23.1 istio.io/client-go v1.23.1
...@@ -57,6 +58,7 @@ require ( ...@@ -57,6 +58,7 @@ require (
github.com/modern-go/reflect2 v1.0.2 // indirect github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/pkg/errors v0.9.1 // indirect github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/prometheus/client_golang v1.22.0 // indirect github.com/prometheus/client_golang v1.22.0 // indirect
github.com/prometheus/client_model v0.6.1 // indirect github.com/prometheus/client_model v0.6.1 // indirect
github.com/prometheus/common v0.62.0 // indirect github.com/prometheus/common v0.62.0 // indirect
......
...@@ -1217,7 +1217,7 @@ func (r *DynamoComponentDeploymentReconciler) generatePodTemplateSpec(ctx contex ...@@ -1217,7 +1217,7 @@ func (r *DynamoComponentDeploymentReconciler) generatePodTemplateSpec(ctx contex
podLabels[commonconsts.KubeLabelDynamoSelector] = kubeName podLabels[commonconsts.KubeLabelDynamoSelector] = kubeName
podSpec := &basePodSpec podSpec := basePodSpec
podSpec.Containers = containers podSpec.Containers = containers
extraPodMetadata := opt.dynamoComponentDeployment.Spec.ExtraPodMetadata extraPodMetadata := opt.dynamoComponentDeployment.Spec.ExtraPodMetadata
......
...@@ -796,6 +796,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing. ...@@ -796,6 +796,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
}, },
}, },
}, },
RestartPolicy: corev1.RestartPolicyAlways,
Containers: []corev1.Container{ Containers: []corev1.Container{
{ {
Name: "main", Name: "main",
...@@ -895,6 +896,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing. ...@@ -895,6 +896,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
}, },
}, },
}, },
RestartPolicy: corev1.RestartPolicyAlways,
Containers: []corev1.Container{ Containers: []corev1.Container{
{ {
Name: "main", Name: "main",
......
package controller_common
import (
"sort"
corev1 "k8s.io/api/core/v1"
)
// CanonicalizePodSpec sorts the pod spec in a way that is deterministic and easy to reason about.
//
//nolint:gocyclo
func CanonicalizePodSpec(podSpec *corev1.PodSpec) *corev1.PodSpec {
// Helper function to get EnvFromSource sort key
envFromKey := func(e corev1.EnvFromSource) string {
if e.ConfigMapRef != nil {
return "cm:" + e.ConfigMapRef.Name + ":" + e.Prefix
}
if e.SecretRef != nil {
return "sec:" + e.SecretRef.Name + ":" + e.Prefix
}
return "other:" + e.Prefix
}
// Helper function to sort container-like fields (works for both Container and EphemeralContainer)
sortContainerFields := func(env []corev1.EnvVar, envFrom []corev1.EnvFromSource, ports []corev1.ContainerPort, volumeMounts []corev1.VolumeMount, securityContext *corev1.SecurityContext) {
// Sort env vars by name
if len(env) > 1 {
sort.Slice(env, func(i, j int) bool { return env[i].Name < env[j].Name })
}
// Sort envFrom by referenced source and prefix
if len(envFrom) > 1 {
sort.Slice(envFrom, func(i, j int) bool {
return envFromKey(envFrom[i]) < envFromKey(envFrom[j])
})
}
// Sort ports by name then port number
if len(ports) > 1 {
sort.Slice(ports, func(i, j int) bool {
if ports[i].Name == ports[j].Name {
return ports[i].ContainerPort < ports[j].ContainerPort
}
return ports[i].Name < ports[j].Name
})
}
// Sort volume mounts by name then mount path
if len(volumeMounts) > 1 {
sort.Slice(volumeMounts, func(i, j int) bool {
if volumeMounts[i].Name == volumeMounts[j].Name {
return volumeMounts[i].MountPath < volumeMounts[j].MountPath
}
return volumeMounts[i].Name < volumeMounts[j].Name
})
}
// Sort security context capability lists
if securityContext != nil && securityContext.Capabilities != nil {
if caps := securityContext.Capabilities.Add; len(caps) > 1 {
sort.Slice(caps, func(i, j int) bool { return string(caps[i]) < string(caps[j]) })
}
if caps := securityContext.Capabilities.Drop; len(caps) > 1 {
sort.Slice(caps, func(i, j int) bool { return string(caps[i]) < string(caps[j]) })
}
}
}
// Sort regular containers
for i := range podSpec.Containers {
c := &podSpec.Containers[i]
sortContainerFields(c.Env, c.EnvFrom, c.Ports, c.VolumeMounts, c.SecurityContext)
}
if len(podSpec.Containers) > 1 {
sort.Slice(podSpec.Containers, func(i, j int) bool {
return podSpec.Containers[i].Name < podSpec.Containers[j].Name
})
}
// Sort init containers
for i := range podSpec.InitContainers {
c := &podSpec.InitContainers[i]
sortContainerFields(c.Env, c.EnvFrom, c.Ports, c.VolumeMounts, c.SecurityContext)
}
if len(podSpec.InitContainers) > 1 {
sort.Slice(podSpec.InitContainers, func(i, j int) bool {
return podSpec.InitContainers[i].Name < podSpec.InitContainers[j].Name
})
}
// Sort ephemeral containers
for i := range podSpec.EphemeralContainers {
ec := &podSpec.EphemeralContainers[i]
sortContainerFields(ec.Env, ec.EnvFrom, ec.Ports, ec.VolumeMounts, ec.SecurityContext)
}
if len(podSpec.EphemeralContainers) > 1 {
sort.Slice(podSpec.EphemeralContainers, func(i, j int) bool {
return podSpec.EphemeralContainers[i].Name < podSpec.EphemeralContainers[j].Name
})
}
// Sort image pull secrets
if len(podSpec.ImagePullSecrets) > 1 {
sort.Slice(podSpec.ImagePullSecrets, func(i, j int) bool {
return podSpec.ImagePullSecrets[i].Name < podSpec.ImagePullSecrets[j].Name
})
}
// Sort volumes and their nested items
sortKeyToPathItems := func(items []corev1.KeyToPath) {
if len(items) > 1 {
sort.Slice(items, func(i, j int) bool {
if items[i].Key == items[j].Key {
return items[i].Path < items[j].Path
}
return items[i].Key < items[j].Key
})
}
}
for i := range podSpec.Volumes {
v := &podSpec.Volumes[i]
// ConfigMap items
if v.ConfigMap != nil {
sortKeyToPathItems(v.ConfigMap.Items)
}
// Secret items
if v.Secret != nil {
sortKeyToPathItems(v.Secret.Items)
}
// DownwardAPI items
if v.DownwardAPI != nil && len(v.DownwardAPI.Items) > 1 {
sort.Slice(v.DownwardAPI.Items, func(i, j int) bool {
return v.DownwardAPI.Items[i].Path < v.DownwardAPI.Items[j].Path
})
}
// Projected sources
if v.Projected != nil {
// Sort projected sources
if len(v.Projected.Sources) > 1 {
sort.Slice(v.Projected.Sources, func(i, j int) bool {
getProjectionKey := func(p corev1.VolumeProjection) string {
if p.ConfigMap != nil {
return "cm:" + p.ConfigMap.Name
}
if p.Secret != nil {
return "sec:" + p.Secret.Name
}
if p.DownwardAPI != nil {
return "downward:"
}
if p.ServiceAccountToken != nil {
return "sat:" + p.ServiceAccountToken.Audience
}
return "z:other"
}
return getProjectionKey(v.Projected.Sources[i]) < getProjectionKey(v.Projected.Sources[j])
})
}
// Sort nested items for each projection
for j := range v.Projected.Sources {
p := &v.Projected.Sources[j]
if p.ConfigMap != nil {
sortKeyToPathItems(p.ConfigMap.Items)
}
if p.Secret != nil {
sortKeyToPathItems(p.Secret.Items)
}
if p.DownwardAPI != nil && len(p.DownwardAPI.Items) > 1 {
sort.Slice(p.DownwardAPI.Items, func(i, j int) bool {
return p.DownwardAPI.Items[i].Path < p.DownwardAPI.Items[j].Path
})
}
}
}
}
// Sort volumes by name
if len(podSpec.Volumes) > 1 {
sort.Slice(podSpec.Volumes, func(i, j int) bool {
return podSpec.Volumes[i].Name < podSpec.Volumes[j].Name
})
}
// Sort tolerations
if len(podSpec.Tolerations) > 1 {
sort.Slice(podSpec.Tolerations, func(i, j int) bool {
a, b := podSpec.Tolerations[i], podSpec.Tolerations[j]
if a.Key != b.Key {
return a.Key < b.Key
}
if string(a.Operator) != string(b.Operator) {
return string(a.Operator) < string(b.Operator)
}
if a.Value != b.Value {
return a.Value < b.Value
}
if string(a.Effect) != string(b.Effect) {
return string(a.Effect) < string(b.Effect)
}
// Handle TolerationSeconds (could be nil)
aSec, bSec := int64(0), int64(0)
if a.TolerationSeconds != nil {
aSec = *a.TolerationSeconds
}
if b.TolerationSeconds != nil {
bSec = *b.TolerationSeconds
}
return aSec < bSec
})
}
// Sort topology spread constraints
if len(podSpec.TopologySpreadConstraints) > 1 {
sort.Slice(podSpec.TopologySpreadConstraints, func(i, j int) bool {
a, b := podSpec.TopologySpreadConstraints[i], podSpec.TopologySpreadConstraints[j]
if a.TopologyKey != b.TopologyKey {
return a.TopologyKey < b.TopologyKey
}
if string(a.WhenUnsatisfiable) != string(b.WhenUnsatisfiable) {
return string(a.WhenUnsatisfiable) < string(b.WhenUnsatisfiable)
}
return a.MaxSkew < b.MaxSkew
})
}
// Sort host aliases
if len(podSpec.HostAliases) > 1 {
// First sort hostnames within each alias
for i := range podSpec.HostAliases {
if len(podSpec.HostAliases[i].Hostnames) > 1 {
sort.Strings(podSpec.HostAliases[i].Hostnames)
}
}
// Then sort aliases by IP
sort.Slice(podSpec.HostAliases, func(i, j int) bool {
return podSpec.HostAliases[i].IP < podSpec.HostAliases[j].IP
})
}
// Sort DNS config
if podSpec.DNSConfig != nil {
// Sort DNS options
if len(podSpec.DNSConfig.Options) > 1 {
sort.Slice(podSpec.DNSConfig.Options, func(i, j int) bool {
if podSpec.DNSConfig.Options[i].Name == podSpec.DNSConfig.Options[j].Name {
vi, vj := "", ""
if podSpec.DNSConfig.Options[i].Value != nil {
vi = *podSpec.DNSConfig.Options[i].Value
}
if podSpec.DNSConfig.Options[j].Value != nil {
vj = *podSpec.DNSConfig.Options[j].Value
}
return vi < vj
}
return podSpec.DNSConfig.Options[i].Name < podSpec.DNSConfig.Options[j].Name
})
}
// Sort nameservers and search domains
if len(podSpec.DNSConfig.Nameservers) > 1 {
sort.Strings(podSpec.DNSConfig.Nameservers)
}
if len(podSpec.DNSConfig.Searches) > 1 {
sort.Strings(podSpec.DNSConfig.Searches)
}
}
return podSpec
}
package controller_common
import (
"testing"
"github.com/stretchr/testify/assert"
corev1 "k8s.io/api/core/v1"
)
func TestCanonicalizePodSpec(t *testing.T) {
tests := []struct {
name string
input *corev1.PodSpec
expected *corev1.PodSpec
}{
{
name: "sorts containers by name",
input: &corev1.PodSpec{
Containers: []corev1.Container{
{Name: "zebra"},
{Name: "alpha"},
{Name: "beta"},
},
},
expected: &corev1.PodSpec{
Containers: []corev1.Container{
{Name: "alpha"},
{Name: "beta"},
{Name: "zebra"},
},
},
},
{
name: "sorts init containers by name",
input: &corev1.PodSpec{
InitContainers: []corev1.Container{
{Name: "init-zebra"},
{Name: "init-alpha"},
},
},
expected: &corev1.PodSpec{
InitContainers: []corev1.Container{
{Name: "init-alpha"},
{Name: "init-zebra"},
},
},
},
{
name: "sorts ephemeral containers by name",
input: &corev1.PodSpec{
EphemeralContainers: []corev1.EphemeralContainer{
{EphemeralContainerCommon: corev1.EphemeralContainerCommon{Name: "debug-zebra"}},
{EphemeralContainerCommon: corev1.EphemeralContainerCommon{Name: "debug-alpha"}},
},
},
expected: &corev1.PodSpec{
EphemeralContainers: []corev1.EphemeralContainer{
{EphemeralContainerCommon: corev1.EphemeralContainerCommon{Name: "debug-alpha"}},
{EphemeralContainerCommon: corev1.EphemeralContainerCommon{Name: "debug-zebra"}},
},
},
},
{
name: "sorts environment variables by name",
input: &corev1.PodSpec{
Containers: []corev1.Container{
{
Name: "test",
Env: []corev1.EnvVar{
{Name: "ZOO", Value: "zebra"},
{Name: "ALPHA", Value: "apple"},
{Name: "BETA", Value: "banana"},
},
},
},
},
expected: &corev1.PodSpec{
Containers: []corev1.Container{
{
Name: "test",
Env: []corev1.EnvVar{
{Name: "ALPHA", Value: "apple"},
{Name: "BETA", Value: "banana"},
{Name: "ZOO", Value: "zebra"},
},
},
},
},
},
{
name: "sorts envFrom by source type and name",
input: &corev1.PodSpec{
Containers: []corev1.Container{
{
Name: "test",
EnvFrom: []corev1.EnvFromSource{
{SecretRef: &corev1.SecretEnvSource{LocalObjectReference: corev1.LocalObjectReference{Name: "secret-z"}}},
{ConfigMapRef: &corev1.ConfigMapEnvSource{LocalObjectReference: corev1.LocalObjectReference{Name: "config-a"}}},
{SecretRef: &corev1.SecretEnvSource{LocalObjectReference: corev1.LocalObjectReference{Name: "secret-a"}}},
{ConfigMapRef: &corev1.ConfigMapEnvSource{LocalObjectReference: corev1.LocalObjectReference{Name: "config-z"}}},
},
},
},
},
expected: &corev1.PodSpec{
Containers: []corev1.Container{
{
Name: "test",
EnvFrom: []corev1.EnvFromSource{
{ConfigMapRef: &corev1.ConfigMapEnvSource{LocalObjectReference: corev1.LocalObjectReference{Name: "config-a"}}},
{ConfigMapRef: &corev1.ConfigMapEnvSource{LocalObjectReference: corev1.LocalObjectReference{Name: "config-z"}}},
{SecretRef: &corev1.SecretEnvSource{LocalObjectReference: corev1.LocalObjectReference{Name: "secret-a"}}},
{SecretRef: &corev1.SecretEnvSource{LocalObjectReference: corev1.LocalObjectReference{Name: "secret-z"}}},
},
},
},
},
},
{
name: "sorts container ports by name then port number",
input: &corev1.PodSpec{
Containers: []corev1.Container{
{
Name: "test",
Ports: []corev1.ContainerPort{
{Name: "http", ContainerPort: 8080},
{Name: "grpc", ContainerPort: 9090},
{Name: "grpc", ContainerPort: 8080},
{Name: "debug", ContainerPort: 8080},
},
},
},
},
expected: &corev1.PodSpec{
Containers: []corev1.Container{
{
Name: "test",
Ports: []corev1.ContainerPort{
{Name: "debug", ContainerPort: 8080},
{Name: "grpc", ContainerPort: 8080},
{Name: "grpc", ContainerPort: 9090},
{Name: "http", ContainerPort: 8080},
},
},
},
},
},
{
name: "sorts volume mounts by name then mount path",
input: &corev1.PodSpec{
Containers: []corev1.Container{
{
Name: "test",
VolumeMounts: []corev1.VolumeMount{
{Name: "vol1", MountPath: "/data2"},
{Name: "vol2", MountPath: "/data1"},
{Name: "vol1", MountPath: "/data1"},
},
},
},
},
expected: &corev1.PodSpec{
Containers: []corev1.Container{
{
Name: "test",
VolumeMounts: []corev1.VolumeMount{
{Name: "vol1", MountPath: "/data1"},
{Name: "vol1", MountPath: "/data2"},
{Name: "vol2", MountPath: "/data1"},
},
},
},
},
},
{
name: "sorts security context capabilities",
input: &corev1.PodSpec{
Containers: []corev1.Container{
{
Name: "test",
SecurityContext: &corev1.SecurityContext{
Capabilities: &corev1.Capabilities{
Add: []corev1.Capability{"SYS_ADMIN", "NET_ADMIN", "CHOWN"},
Drop: []corev1.Capability{"ALL", "SETUID", "KILL"},
},
},
},
},
},
expected: &corev1.PodSpec{
Containers: []corev1.Container{
{
Name: "test",
SecurityContext: &corev1.SecurityContext{
Capabilities: &corev1.Capabilities{
Add: []corev1.Capability{"CHOWN", "NET_ADMIN", "SYS_ADMIN"},
Drop: []corev1.Capability{"ALL", "KILL", "SETUID"},
},
},
},
},
},
},
{
name: "sorts image pull secrets by name",
input: &corev1.PodSpec{
ImagePullSecrets: []corev1.LocalObjectReference{
{Name: "registry-z"},
{Name: "registry-a"},
{Name: "registry-b"},
},
},
expected: &corev1.PodSpec{
ImagePullSecrets: []corev1.LocalObjectReference{
{Name: "registry-a"},
{Name: "registry-b"},
{Name: "registry-z"},
},
},
},
{
name: "sorts volumes by name",
input: &corev1.PodSpec{
Volumes: []corev1.Volume{
{Name: "vol-z"},
{Name: "vol-a"},
{Name: "vol-b"},
},
},
expected: &corev1.PodSpec{
Volumes: []corev1.Volume{
{Name: "vol-a"},
{Name: "vol-b"},
{Name: "vol-z"},
},
},
},
{
name: "sorts configmap volume items by key then path",
input: &corev1.PodSpec{
Volumes: []corev1.Volume{
{
Name: "config",
VolumeSource: corev1.VolumeSource{
ConfigMap: &corev1.ConfigMapVolumeSource{
Items: []corev1.KeyToPath{
{Key: "app.conf", Path: "config/app.conf"},
{Key: "db.conf", Path: "config/db.conf"},
{Key: "app.conf", Path: "backup/app.conf"},
},
},
},
},
},
},
expected: &corev1.PodSpec{
Volumes: []corev1.Volume{
{
Name: "config",
VolumeSource: corev1.VolumeSource{
ConfigMap: &corev1.ConfigMapVolumeSource{
Items: []corev1.KeyToPath{
{Key: "app.conf", Path: "backup/app.conf"},
{Key: "app.conf", Path: "config/app.conf"},
{Key: "db.conf", Path: "config/db.conf"},
},
},
},
},
},
},
},
{
name: "sorts secret volume items by key then path",
input: &corev1.PodSpec{
Volumes: []corev1.Volume{
{
Name: "secret",
VolumeSource: corev1.VolumeSource{
Secret: &corev1.SecretVolumeSource{
Items: []corev1.KeyToPath{
{Key: "tls.key", Path: "tls/server.key"},
{Key: "tls.crt", Path: "tls/server.crt"},
{Key: "tls.key", Path: "backup/server.key"},
},
},
},
},
},
},
expected: &corev1.PodSpec{
Volumes: []corev1.Volume{
{
Name: "secret",
VolumeSource: corev1.VolumeSource{
Secret: &corev1.SecretVolumeSource{
Items: []corev1.KeyToPath{
{Key: "tls.crt", Path: "tls/server.crt"},
{Key: "tls.key", Path: "backup/server.key"},
{Key: "tls.key", Path: "tls/server.key"},
},
},
},
},
},
},
},
{
name: "sorts downward API items by path",
input: &corev1.PodSpec{
Volumes: []corev1.Volume{
{
Name: "downward",
VolumeSource: corev1.VolumeSource{
DownwardAPI: &corev1.DownwardAPIVolumeSource{
Items: []corev1.DownwardAPIVolumeFile{
{Path: "metadata/name"},
{Path: "metadata/annotations"},
{Path: "limits/cpu"},
},
},
},
},
},
},
expected: &corev1.PodSpec{
Volumes: []corev1.Volume{
{
Name: "downward",
VolumeSource: corev1.VolumeSource{
DownwardAPI: &corev1.DownwardAPIVolumeSource{
Items: []corev1.DownwardAPIVolumeFile{
{Path: "limits/cpu"},
{Path: "metadata/annotations"},
{Path: "metadata/name"},
},
},
},
},
},
},
},
{
name: "sorts projected volume sources and their items",
input: &corev1.PodSpec{
Volumes: []corev1.Volume{
{
Name: "projected",
VolumeSource: corev1.VolumeSource{
Projected: &corev1.ProjectedVolumeSource{
Sources: []corev1.VolumeProjection{
{
Secret: &corev1.SecretProjection{
LocalObjectReference: corev1.LocalObjectReference{Name: "secret-z"},
Items: []corev1.KeyToPath{
{Key: "password", Path: "auth/password"},
{Key: "username", Path: "auth/username"},
},
},
},
{
ConfigMap: &corev1.ConfigMapProjection{
LocalObjectReference: corev1.LocalObjectReference{Name: "config-a"},
Items: []corev1.KeyToPath{
{Key: "db.conf", Path: "config/db.conf"},
{Key: "app.conf", Path: "config/app.conf"},
},
},
},
{
DownwardAPI: &corev1.DownwardAPIProjection{
Items: []corev1.DownwardAPIVolumeFile{
{Path: "metadata/name"},
{Path: "limits/cpu"},
},
},
},
{
ServiceAccountToken: &corev1.ServiceAccountTokenProjection{
Audience: "api.example.com",
Path: "tokens/api",
},
},
},
},
},
},
},
},
expected: &corev1.PodSpec{
Volumes: []corev1.Volume{
{
Name: "projected",
VolumeSource: corev1.VolumeSource{
Projected: &corev1.ProjectedVolumeSource{
Sources: []corev1.VolumeProjection{
{
ConfigMap: &corev1.ConfigMapProjection{
LocalObjectReference: corev1.LocalObjectReference{Name: "config-a"},
Items: []corev1.KeyToPath{
{Key: "app.conf", Path: "config/app.conf"},
{Key: "db.conf", Path: "config/db.conf"},
},
},
},
{
DownwardAPI: &corev1.DownwardAPIProjection{
Items: []corev1.DownwardAPIVolumeFile{
{Path: "limits/cpu"},
{Path: "metadata/name"},
},
},
},
{
ServiceAccountToken: &corev1.ServiceAccountTokenProjection{
Audience: "api.example.com",
Path: "tokens/api",
},
},
{
Secret: &corev1.SecretProjection{
LocalObjectReference: corev1.LocalObjectReference{Name: "secret-z"},
Items: []corev1.KeyToPath{
{Key: "password", Path: "auth/password"},
{Key: "username", Path: "auth/username"},
},
},
},
},
},
},
},
},
},
},
{
name: "sorts tolerations by key, operator, value, effect, seconds",
input: &corev1.PodSpec{
Tolerations: []corev1.Toleration{
{
Key: "node-type",
Operator: corev1.TolerationOpEqual,
Value: "gpu",
Effect: corev1.TaintEffectNoSchedule,
},
{
Key: "node-role",
Operator: corev1.TolerationOpEqual,
Value: "master",
Effect: corev1.TaintEffectNoSchedule,
},
{
Key: "node-role",
Operator: corev1.TolerationOpExists,
Effect: corev1.TaintEffectNoSchedule,
},
},
},
expected: &corev1.PodSpec{
Tolerations: []corev1.Toleration{
{
Key: "node-role",
Operator: corev1.TolerationOpEqual,
Value: "master",
Effect: corev1.TaintEffectNoSchedule,
},
{
Key: "node-role",
Operator: corev1.TolerationOpExists,
Effect: corev1.TaintEffectNoSchedule,
},
{
Key: "node-type",
Operator: corev1.TolerationOpEqual,
Value: "gpu",
Effect: corev1.TaintEffectNoSchedule,
},
},
},
},
{
name: "sorts topology spread constraints by topology key, when unsatisfiable, max skew",
input: &corev1.PodSpec{
TopologySpreadConstraints: []corev1.TopologySpreadConstraint{
{
TopologyKey: "kubernetes.io/zone",
WhenUnsatisfiable: corev1.DoNotSchedule,
MaxSkew: 2,
},
{
TopologyKey: "kubernetes.io/hostname",
WhenUnsatisfiable: corev1.DoNotSchedule,
MaxSkew: 1,
},
{
TopologyKey: "kubernetes.io/hostname",
WhenUnsatisfiable: corev1.ScheduleAnyway,
MaxSkew: 1,
},
},
},
expected: &corev1.PodSpec{
TopologySpreadConstraints: []corev1.TopologySpreadConstraint{
{
TopologyKey: "kubernetes.io/hostname",
WhenUnsatisfiable: corev1.DoNotSchedule,
MaxSkew: 1,
},
{
TopologyKey: "kubernetes.io/hostname",
WhenUnsatisfiable: corev1.ScheduleAnyway,
MaxSkew: 1,
},
{
TopologyKey: "kubernetes.io/zone",
WhenUnsatisfiable: corev1.DoNotSchedule,
MaxSkew: 2,
},
},
},
},
{
name: "sorts host aliases by IP and hostnames within each alias",
input: &corev1.PodSpec{
HostAliases: []corev1.HostAlias{
{
IP: "192.168.1.2",
Hostnames: []string{"web2.example.com", "api2.example.com"},
},
{
IP: "192.168.1.1",
Hostnames: []string{"web1.example.com", "api1.example.com", "admin1.example.com"},
},
},
},
expected: &corev1.PodSpec{
HostAliases: []corev1.HostAlias{
{
IP: "192.168.1.1",
Hostnames: []string{"admin1.example.com", "api1.example.com", "web1.example.com"},
},
{
IP: "192.168.1.2",
Hostnames: []string{"api2.example.com", "web2.example.com"},
},
},
},
},
{
name: "sorts DNS config options, nameservers, and searches",
input: &corev1.PodSpec{
DNSConfig: &corev1.PodDNSConfig{
Options: []corev1.PodDNSConfigOption{
{Name: "timeout", Value: func() *string { s := "5"; return &s }()},
{Name: "attempts", Value: func() *string { s := "3"; return &s }()},
{Name: "ndots", Value: func() *string { s := "2"; return &s }()},
},
Nameservers: []string{"8.8.8.8", "1.1.1.1", "8.8.4.4"},
Searches: []string{"example.com", "cluster.local", "app.local"},
},
},
expected: &corev1.PodSpec{
DNSConfig: &corev1.PodDNSConfig{
Options: []corev1.PodDNSConfigOption{
{Name: "attempts", Value: func() *string { s := "3"; return &s }()},
{Name: "ndots", Value: func() *string { s := "2"; return &s }()},
{Name: "timeout", Value: func() *string { s := "5"; return &s }()},
},
Nameservers: []string{"1.1.1.1", "8.8.4.4", "8.8.8.8"},
Searches: []string{"app.local", "cluster.local", "example.com"},
},
},
},
{
name: "handles nil pointer values gracefully",
input: &corev1.PodSpec{
Containers: []corev1.Container{
{
Name: "test",
SecurityContext: &corev1.SecurityContext{
Capabilities: nil,
},
},
},
DNSConfig: &corev1.PodDNSConfig{
Options: []corev1.PodDNSConfigOption{
{Name: "timeout", Value: nil},
{Name: "attempts", Value: func() *string { s := "3"; return &s }()},
},
},
Tolerations: []corev1.Toleration{
{
Key: "test",
TolerationSeconds: nil,
},
{
Key: "test2",
TolerationSeconds: func() *int64 { s := int64(300); return &s }(),
},
},
},
expected: &corev1.PodSpec{
Containers: []corev1.Container{
{
Name: "test",
SecurityContext: &corev1.SecurityContext{
Capabilities: nil,
},
},
},
DNSConfig: &corev1.PodDNSConfig{
Options: []corev1.PodDNSConfigOption{
{Name: "attempts", Value: func() *string { s := "3"; return &s }()},
{Name: "timeout", Value: nil},
},
},
Tolerations: []corev1.Toleration{
{
Key: "test",
TolerationSeconds: nil,
},
{
Key: "test2",
TolerationSeconds: func() *int64 { s := int64(300); return &s }(),
},
},
},
},
{
name: "returns original podspec when already sorted",
input: &corev1.PodSpec{
Containers: []corev1.Container{
{
Name: "alpha",
Env: []corev1.EnvVar{
{Name: "A", Value: "1"},
{Name: "B", Value: "2"},
},
},
{Name: "beta"},
},
ImagePullSecrets: []corev1.LocalObjectReference{
{Name: "secret-a"},
{Name: "secret-b"},
},
},
expected: &corev1.PodSpec{
Containers: []corev1.Container{
{
Name: "alpha",
Env: []corev1.EnvVar{
{Name: "A", Value: "1"},
{Name: "B", Value: "2"},
},
},
{Name: "beta"},
},
ImagePullSecrets: []corev1.LocalObjectReference{
{Name: "secret-a"},
{Name: "secret-b"},
},
},
},
{
name: "handles empty slices gracefully",
input: &corev1.PodSpec{
Containers: []corev1.Container{},
InitContainers: []corev1.Container{},
ImagePullSecrets: []corev1.LocalObjectReference{},
Volumes: []corev1.Volume{},
Tolerations: []corev1.Toleration{},
},
expected: &corev1.PodSpec{
Containers: []corev1.Container{},
InitContainers: []corev1.Container{},
ImagePullSecrets: []corev1.LocalObjectReference{},
Volumes: []corev1.Volume{},
Tolerations: []corev1.Toleration{},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := CanonicalizePodSpec(tt.input)
// Verify the function returns the same instance
assert.Same(t, tt.input, result, "function should return the same PodSpec instance")
// Verify the sorting is correct
assert.Equal(t, tt.expected, result, "PodSpec should be sorted correctly")
})
}
}
func TestCanonicalizePodSpec_Idempotent(t *testing.T) {
// Create a complex, unsorted PodSpec
podSpec := &corev1.PodSpec{
Containers: []corev1.Container{
{
Name: "zebra",
Env: []corev1.EnvVar{
{Name: "Z_VAR", Value: "z"},
{Name: "A_VAR", Value: "a"},
},
Ports: []corev1.ContainerPort{
{Name: "http", ContainerPort: 8080},
{Name: "grpc", ContainerPort: 9090},
},
VolumeMounts: []corev1.VolumeMount{
{Name: "vol2", MountPath: "/data2"},
{Name: "vol1", MountPath: "/data1"},
},
},
{Name: "alpha"},
},
InitContainers: []corev1.Container{
{Name: "init-zebra"},
{Name: "init-alpha"},
},
ImagePullSecrets: []corev1.LocalObjectReference{
{Name: "secret-z"},
{Name: "secret-a"},
},
Volumes: []corev1.Volume{
{Name: "vol-z"},
{Name: "vol-a"},
},
Tolerations: []corev1.Toleration{
{Key: "node-z"},
{Key: "node-a"},
},
}
// First canonicalization
result1 := CanonicalizePodSpec(podSpec)
// Second canonicalization on the same object
result2 := CanonicalizePodSpec(result1)
// Should be identical after second canonicalization
assert.Equal(t, result1, result2, "CanonicalizePodSpec should be idempotent")
// Verify containers are sorted
assert.Equal(t, "alpha", result2.Containers[0].Name)
assert.Equal(t, "zebra", result2.Containers[1].Name)
// Verify env vars within containers are sorted
assert.Equal(t, "A_VAR", result2.Containers[1].Env[0].Name)
assert.Equal(t, "Z_VAR", result2.Containers[1].Env[1].Name)
// Verify ports are sorted
assert.Equal(t, "grpc", result2.Containers[1].Ports[0].Name)
assert.Equal(t, "http", result2.Containers[1].Ports[1].Name)
// Verify volume mounts are sorted
assert.Equal(t, "vol1", result2.Containers[1].VolumeMounts[0].Name)
assert.Equal(t, "vol2", result2.Containers[1].VolumeMounts[1].Name)
}
func TestCanonicalizePodSpec_EnvFromSortPriority(t *testing.T) {
podSpec := &corev1.PodSpec{
Containers: []corev1.Container{
{
Name: "test",
EnvFrom: []corev1.EnvFromSource{
{SecretRef: &corev1.SecretEnvSource{LocalObjectReference: corev1.LocalObjectReference{Name: "secret-b"}}},
{ConfigMapRef: &corev1.ConfigMapEnvSource{LocalObjectReference: corev1.LocalObjectReference{Name: "config-b"}}},
{SecretRef: &corev1.SecretEnvSource{LocalObjectReference: corev1.LocalObjectReference{Name: "secret-a"}}},
{ConfigMapRef: &corev1.ConfigMapEnvSource{LocalObjectReference: corev1.LocalObjectReference{Name: "config-a"}}},
// Test duplicate names for secondary sort
{ConfigMapRef: &corev1.ConfigMapEnvSource{LocalObjectReference: corev1.LocalObjectReference{Name: "config-a"}}},
{SecretRef: &corev1.SecretEnvSource{LocalObjectReference: corev1.LocalObjectReference{Name: "secret-a"}}},
},
},
},
}
result := CanonicalizePodSpec(podSpec)
// ConfigMaps should come before Secrets (cm: < sec:)
// Within each type, sorted by name
expected := []string{
"cm:config-a:", // ConfigMap config-a
"cm:config-a:", // ConfigMap config-a (duplicate)
"cm:config-b:", // ConfigMap config-b
"sec:secret-a:", // Secret secret-a
"sec:secret-a:", // Secret secret-a (duplicate)
"sec:secret-b:", // Secret secret-b
}
envFromKey := func(e corev1.EnvFromSource) string {
if e.ConfigMapRef != nil {
return "cm:" + e.ConfigMapRef.Name + ":"
}
if e.SecretRef != nil {
return "sec:" + e.SecretRef.Name + ":"
}
return "other:"
}
for i, envFrom := range result.Containers[0].EnvFrom {
assert.Equal(t, expected[i], envFromKey(envFrom), "EnvFrom at index %d should match expected sort order", i)
}
}
func TestCanonicalizePodSpec_TolerationSecondsHandling(t *testing.T) {
sec300 := int64(300)
sec600 := int64(600)
podSpec := &corev1.PodSpec{
Tolerations: []corev1.Toleration{
{Key: "key1", TolerationSeconds: &sec600},
{Key: "key1", TolerationSeconds: nil},
{Key: "key1", TolerationSeconds: &sec300},
},
}
result := CanonicalizePodSpec(podSpec)
// Should be sorted by TolerationSeconds: nil (0) < 300 < 600
assert.Nil(t, result.Tolerations[0].TolerationSeconds)
assert.Equal(t, int64(300), *result.Tolerations[1].TolerationSeconds)
assert.Equal(t, int64(600), *result.Tolerations[2].TolerationSeconds)
}
func TestCanonicalizePodSpec_ProjectedVolumeSourcePriority(t *testing.T) {
podSpec := &corev1.PodSpec{
Volumes: []corev1.Volume{
{
Name: "projected",
VolumeSource: corev1.VolumeSource{
Projected: &corev1.ProjectedVolumeSource{
Sources: []corev1.VolumeProjection{
{Secret: &corev1.SecretProjection{LocalObjectReference: corev1.LocalObjectReference{Name: "secret-a"}}},
{ServiceAccountToken: &corev1.ServiceAccountTokenProjection{Audience: "zz.example.com"}},
{DownwardAPI: &corev1.DownwardAPIProjection{}},
{ConfigMap: &corev1.ConfigMapProjection{LocalObjectReference: corev1.LocalObjectReference{Name: "config-z"}}},
{ServiceAccountToken: &corev1.ServiceAccountTokenProjection{Audience: "aa.example.com"}},
{ConfigMap: &corev1.ConfigMapProjection{LocalObjectReference: corev1.LocalObjectReference{Name: "config-a"}}},
},
},
},
},
},
}
result := CanonicalizePodSpec(podSpec)
// Expected sort order: cm: < downward: < sat: < sec:
// Within same type, sorted by name/audience
getProjectionKey := func(p corev1.VolumeProjection) string {
if p.ConfigMap != nil {
return "cm:" + p.ConfigMap.Name
}
if p.Secret != nil {
return "sec:" + p.Secret.Name
}
if p.DownwardAPI != nil {
return "downward:"
}
if p.ServiceAccountToken != nil {
return "sat:" + p.ServiceAccountToken.Audience
}
return "z:other"
}
expected := []string{
"cm:config-a",
"cm:config-z",
"downward:",
"sat:aa.example.com",
"sat:zz.example.com",
"sec:secret-a",
}
sources := result.Volumes[0].VolumeSource.Projected.Sources
for i, source := range sources {
assert.Equal(t, expected[i], getProjectionKey(source), "Projected source at index %d should match expected sort order", i)
}
}
package controller_common
import (
"sort"
grovev1alpha1 "github.com/NVIDIA/grove/operator/api/core/v1alpha1"
)
func CanonicalizePodGangSet(gangSet *grovev1alpha1.PodGangSet) *grovev1alpha1.PodGangSet {
// sort cliques by name
sort.Slice(gangSet.Spec.Template.Cliques, func(i, j int) bool {
return gangSet.Spec.Template.Cliques[i].Name < gangSet.Spec.Template.Cliques[j].Name
})
// sort scaling groups by name
sort.Slice(gangSet.Spec.Template.PodCliqueScalingGroupConfigs, func(i, j int) bool {
return gangSet.Spec.Template.PodCliqueScalingGroupConfigs[i].Name < gangSet.Spec.Template.PodCliqueScalingGroupConfigs[j].Name
})
return gangSet
}
...@@ -27,6 +27,7 @@ import ( ...@@ -27,6 +27,7 @@ import (
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/common" "github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/common"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts" "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
"github.com/google/go-cmp/cmp"
corev1 "k8s.io/api/core/v1" corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/api/resource"
...@@ -156,6 +157,14 @@ func SyncResource[T client.Object](ctx context.Context, r Reconciler, parentReso ...@@ -156,6 +157,14 @@ func SyncResource[T client.Object](ctx context.Context, r Reconciler, parentReso
return false, resource, fmt.Errorf("failed to check if spec has changed: %w", err) return false, resource, fmt.Errorf("failed to check if spec has changed: %w", err)
} }
if newHash != nil { if newHash != nil {
// Generate and log diff before updating
diff, diffErr := generateSpecDiff(oldResource, resource)
if diffErr != nil {
logs.V(1).Info(fmt.Sprintf("Failed to generate diff for %s: %v", resourceType, diffErr))
} else if diff != "" {
logs.Info(fmt.Sprintf("%s spec changes detected", resourceType), "diff", diff)
}
// update the spec of the current object with the desired spec // update the spec of the current object with the desired spec
err = CopySpec(resource, oldResource) err = CopySpec(resource, oldResource)
if err != nil { if err != nil {
...@@ -252,6 +261,27 @@ func IsSpecChanged(current client.Object, desired client.Object) (*string, error ...@@ -252,6 +261,27 @@ func IsSpecChanged(current client.Object, desired client.Object) (*string, error
return &hashStr, nil return &hashStr, nil
} }
// generateSpecDiff creates a unified diff showing changes between old and new resource specs
func generateSpecDiff(oldResource, newResource client.Object) (string, error) {
oldSpec, err := getSpec(oldResource)
if err != nil {
return "", fmt.Errorf("failed to get old spec: %w", err)
}
newSpec, err := getSpec(newResource)
if err != nil {
return "", fmt.Errorf("failed to get new spec: %w", err)
}
// Generate diff using cmp
diff := cmp.Diff(oldSpec, newSpec)
if diff == "" {
return "", nil
}
return diff, nil
}
func GetSpecHash(obj client.Object) (string, error) { func GetSpecHash(obj client.Object) (string, error) {
spec, err := getSpec(obj) spec, err := getSpec(obj)
if err != nil { if err != nil {
......
...@@ -8,6 +8,7 @@ package dynamo ...@@ -8,6 +8,7 @@ package dynamo
import ( import (
commonconsts "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts" commonconsts "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
corev1 "k8s.io/api/core/v1" corev1 "k8s.io/api/core/v1"
"k8s.io/utils/ptr"
) )
// ComponentDefaults interface defines how defaults should be provided // ComponentDefaults interface defines how defaults should be provided
...@@ -50,7 +51,14 @@ func (b *BaseComponentDefaults) GetBaseContainer(context ComponentContext) (core ...@@ -50,7 +51,14 @@ func (b *BaseComponentDefaults) GetBaseContainer(context ComponentContext) (core
} }
func (b *BaseComponentDefaults) GetBasePodSpec(context ComponentContext) (corev1.PodSpec, error) { func (b *BaseComponentDefaults) GetBasePodSpec(context ComponentContext) (corev1.PodSpec, error) {
return corev1.PodSpec{}, nil return b.getCommonPodSpec(), nil
}
func (b *BaseComponentDefaults) getCommonPodSpec() corev1.PodSpec {
return corev1.PodSpec{
TerminationGracePeriodSeconds: ptr.To(int64(60)),
RestartPolicy: corev1.RestartPolicyAlways,
}
} }
func (b *BaseComponentDefaults) getCommonContainer(context ComponentContext) corev1.Container { func (b *BaseComponentDefaults) getCommonContainer(context ComponentContext) corev1.Container {
......
...@@ -39,8 +39,7 @@ func (p *PlannerDefaults) GetBaseContainer(context ComponentContext) (corev1.Con ...@@ -39,8 +39,7 @@ func (p *PlannerDefaults) GetBaseContainer(context ComponentContext) (corev1.Con
} }
func (p *PlannerDefaults) GetBasePodSpec(context ComponentContext) (corev1.PodSpec, error) { func (p *PlannerDefaults) GetBasePodSpec(context ComponentContext) (corev1.PodSpec, error) {
podSpec := corev1.PodSpec{ podSpec := p.getCommonPodSpec()
ServiceAccountName: commonconsts.PlannerServiceAccountName, podSpec.ServiceAccountName = commonconsts.PlannerServiceAccountName
}
return podSpec, nil return podSpec, nil
} }
...@@ -31,6 +31,7 @@ import ( ...@@ -31,6 +31,7 @@ import (
"k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr" "k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/utils/ptr"
grovev1alpha1 "github.com/NVIDIA/grove/operator/api/core/v1alpha1" grovev1alpha1 "github.com/NVIDIA/grove/operator/api/core/v1alpha1"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/common" "github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/common"
...@@ -687,13 +688,13 @@ func GenerateBasePodSpec( ...@@ -687,13 +688,13 @@ func GenerateBasePodSpec(
controllerConfig controller_common.Config, controllerConfig controller_common.Config,
multinodeDeploymentType commonconsts.MultinodeDeploymentType, multinodeDeploymentType commonconsts.MultinodeDeploymentType,
serviceName string, serviceName string,
) (corev1.PodSpec, error) { ) (*corev1.PodSpec, error) {
// Start with base container generated per component type // Start with base container generated per component type
componentContext := generateComponentContext(component, parentGraphDeploymentName, namespace, numberOfNodes) componentContext := generateComponentContext(component, parentGraphDeploymentName, namespace, numberOfNodes)
componentDefaults := ComponentDefaultsFactory(component.ComponentType) componentDefaults := ComponentDefaultsFactory(component.ComponentType)
container, err := componentDefaults.GetBaseContainer(componentContext) container, err := componentDefaults.GetBaseContainer(componentContext)
if err != nil { if err != nil {
return corev1.PodSpec{}, fmt.Errorf("failed to get base container: %w", err) return nil, fmt.Errorf("failed to get base container: %w", err)
} }
if component.ExtraPodSpec != nil && component.ExtraPodSpec.MainContainer != nil { if component.ExtraPodSpec != nil && component.ExtraPodSpec.MainContainer != nil {
...@@ -702,7 +703,7 @@ func GenerateBasePodSpec( ...@@ -702,7 +703,7 @@ func GenerateBasePodSpec(
// merge the extraPodSpec from the parent deployment with the extraPodSpec from the service // merge the extraPodSpec from the parent deployment with the extraPodSpec from the service
err = mergo.Merge(&container, *main, mergo.WithOverride) err = mergo.Merge(&container, *main, mergo.WithOverride)
if err != nil { if err != nil {
return corev1.PodSpec{}, fmt.Errorf("failed to merge extraPodSpec: %w", err) return nil, fmt.Errorf("failed to merge extraPodSpec: %w", err)
} }
// main container fields that require special handling // main container fields that require special handling
...@@ -725,7 +726,7 @@ func GenerateBasePodSpec( ...@@ -725,7 +726,7 @@ func GenerateBasePodSpec(
overrideResources, err := controller_common.GetResourcesConfig(component.Resources) overrideResources, err := controller_common.GetResourcesConfig(component.Resources)
if err != nil { if err != nil {
return corev1.PodSpec{}, fmt.Errorf("failed to get resources config: %w", err) return nil, fmt.Errorf("failed to get resources config: %w", err)
} }
// Requests // Requests
if overrideResources != nil && len(overrideResources.Requests) > 0 { if overrideResources != nil && len(overrideResources.Requests) > 0 {
...@@ -784,32 +785,32 @@ func GenerateBasePodSpec( ...@@ -784,32 +785,32 @@ func GenerateBasePodSpec(
// Apply backend-specific container modifications // Apply backend-specific container modifications
multinodeDeployer := MultinodeDeployerFactory(multinodeDeploymentType) multinodeDeployer := MultinodeDeployerFactory(multinodeDeploymentType)
if multinodeDeployer == nil { if multinodeDeployer == nil {
return corev1.PodSpec{}, fmt.Errorf("unsupported multinode deployment type: %s", multinodeDeploymentType) return nil, fmt.Errorf("unsupported multinode deployment type: %s", multinodeDeploymentType)
} }
backend := BackendFactory(backendFramework) backend := BackendFactory(backendFramework)
if backend == nil { if backend == nil {
return corev1.PodSpec{}, fmt.Errorf("unsupported backend framework: %s", backendFramework) return nil, fmt.Errorf("unsupported backend framework: %s", backendFramework)
} }
backend.UpdateContainer(&container, numberOfNodes, role, component, serviceName, multinodeDeployer) backend.UpdateContainer(&container, numberOfNodes, role, component, serviceName, multinodeDeployer)
// get base podspec from component // get base podspec from component
podSpec, err := componentDefaults.GetBasePodSpec(componentContext) podSpec, err := componentDefaults.GetBasePodSpec(componentContext)
if err != nil { if err != nil {
return corev1.PodSpec{}, fmt.Errorf("failed to get base podspec: %w", err) return nil, fmt.Errorf("failed to get base podspec: %w", err)
} }
if component.ExtraPodSpec != nil && component.ExtraPodSpec.PodSpec != nil { if component.ExtraPodSpec != nil && component.ExtraPodSpec.PodSpec != nil {
// merge extraPodSpec PodSpec with base podspec // merge extraPodSpec PodSpec with base podspec
err := mergo.Merge(&podSpec, component.ExtraPodSpec.PodSpec.DeepCopy(), mergo.WithOverride) err := mergo.Merge(&podSpec, component.ExtraPodSpec.PodSpec.DeepCopy(), mergo.WithOverride)
if err != nil { if err != nil {
return corev1.PodSpec{}, fmt.Errorf("failed to merge extraPodSpec: %w", err) return nil, fmt.Errorf("failed to merge extraPodSpec: %w", err)
} }
} }
podSpec.Containers = append(podSpec.Containers, container) podSpec.Containers = append(podSpec.Containers, container)
podSpec.Volumes = append(podSpec.Volumes, volumes...) podSpec.Volumes = append(podSpec.Volumes, volumes...)
podSpec.ImagePullSecrets = append(podSpec.ImagePullSecrets, imagePullSecrets...) podSpec.ImagePullSecrets = append(podSpec.ImagePullSecrets, imagePullSecrets...)
backend.UpdatePodSpec(&podSpec, numberOfNodes, role, component, serviceName) backend.UpdatePodSpec(&podSpec, numberOfNodes, role, component, serviceName)
return podSpec, nil return controller_common.CanonicalizePodSpec(&podSpec), nil
} }
func setMetricsLabels(labels map[string]string, dynamoGraphDeployment *v1alpha1.DynamoGraphDeployment) { func setMetricsLabels(labels map[string]string, dynamoGraphDeployment *v1alpha1.DynamoGraphDeployment) {
...@@ -846,13 +847,13 @@ func GeneratePodSpecForComponent( ...@@ -846,13 +847,13 @@ func GeneratePodSpecForComponent(
controllerConfig controller_common.Config, controllerConfig controller_common.Config,
multinodeDeploymentType commonconsts.MultinodeDeploymentType, multinodeDeploymentType commonconsts.MultinodeDeploymentType,
serviceName string, serviceName string,
) (corev1.PodSpec, error) { ) (*corev1.PodSpec, error) {
if len(dynamoDeployment.Spec.Envs) > 0 { if len(dynamoDeployment.Spec.Envs) > 0 {
component.Envs = MergeEnvs(dynamoDeployment.Spec.Envs, component.Envs) component.Envs = MergeEnvs(dynamoDeployment.Spec.Envs, component.Envs)
} }
podSpec, err := GenerateBasePodSpec(component, backendFramework, secretsRetriever, dynamoDeployment.Name, dynamoDeployment.Namespace, role, numberOfNodes, controllerConfig, multinodeDeploymentType, serviceName) podSpec, err := GenerateBasePodSpec(component, backendFramework, secretsRetriever, dynamoDeployment.Name, dynamoDeployment.Namespace, role, numberOfNodes, controllerConfig, multinodeDeploymentType, serviceName)
if err != nil { if err != nil {
return corev1.PodSpec{}, err return nil, err
} }
return podSpec, nil return podSpec, nil
} }
...@@ -871,6 +872,7 @@ func GenerateGrovePodGangSet( ...@@ -871,6 +872,7 @@ func GenerateGrovePodGangSet(
gangSet.Spec.Template.HeadlessServiceConfig = &grovev1alpha1.HeadlessServiceConfig{ gangSet.Spec.Template.HeadlessServiceConfig = &grovev1alpha1.HeadlessServiceConfig{
PublishNotReadyAddresses: true, PublishNotReadyAddresses: true,
} }
gangSet.Spec.Template.StartupType = ptr.To(grovev1alpha1.CliqueStartupTypeAnyOrder)
if controllerConfig.Grove.TerminationDelay > 0 { if controllerConfig.Grove.TerminationDelay > 0 {
gangSet.Spec.Template.TerminationDelay = &metav1.Duration{Duration: controllerConfig.Grove.TerminationDelay} gangSet.Spec.Template.TerminationDelay = &metav1.Duration{Duration: controllerConfig.Grove.TerminationDelay}
} }
...@@ -911,9 +913,10 @@ func GenerateGrovePodGangSet( ...@@ -911,9 +913,10 @@ func GenerateGrovePodGangSet(
clique := &grovev1alpha1.PodCliqueTemplateSpec{ clique := &grovev1alpha1.PodCliqueTemplateSpec{
Name: strings.ToLower(r.Name), Name: strings.ToLower(r.Name),
Spec: grovev1alpha1.PodCliqueSpec{ Spec: grovev1alpha1.PodCliqueSpec{
RoleName: strings.ToLower(r.Name), RoleName: strings.ToLower(r.Name),
Replicas: r.Replicas, Replicas: r.Replicas,
PodSpec: podSpec, MinAvailable: ptr.To(int32(1)),
PodSpec: *podSpec,
}, },
} }
labels, err := generateLabels(component, dynamoDeployment, r.Name) labels, err := generateLabels(component, dynamoDeployment, r.Name)
...@@ -935,9 +938,10 @@ func GenerateGrovePodGangSet( ...@@ -935,9 +938,10 @@ func GenerateGrovePodGangSet(
if isMultinode { if isMultinode {
scalingGroups = append(scalingGroups, grovev1alpha1.PodCliqueScalingGroupConfig{ scalingGroups = append(scalingGroups, grovev1alpha1.PodCliqueScalingGroupConfig{
Name: strings.ToLower(serviceName), Name: strings.ToLower(serviceName),
CliqueNames: cliqueNames, CliqueNames: cliqueNames,
Replicas: component.Replicas, Replicas: component.Replicas,
MinAvailable: ptr.To(int32(1)),
}) })
} }
} }
...@@ -945,7 +949,7 @@ func GenerateGrovePodGangSet( ...@@ -945,7 +949,7 @@ func GenerateGrovePodGangSet(
gangSet.Spec.Template.PodCliqueScalingGroupConfigs = scalingGroups gangSet.Spec.Template.PodCliqueScalingGroupConfigs = scalingGroups
} }
return gangSet, nil return controller_common.CanonicalizePodGangSet(gangSet), nil
} }
func generateLabels(component *v1alpha1.DynamoComponentDeploymentOverridesSpec, dynamoDeployment *v1alpha1.DynamoGraphDeployment, componentName string) (map[string]string, error) { func generateLabels(component *v1alpha1.DynamoComponentDeploymentOverridesSpec, dynamoDeployment *v1alpha1.DynamoGraphDeployment, componentName string) (map[string]string, error) {
...@@ -1141,7 +1145,7 @@ func GenerateBasePodSpecForController( ...@@ -1141,7 +1145,7 @@ func GenerateBasePodSpecForController(
controllerConfig controller_common.Config, controllerConfig controller_common.Config,
role Role, role Role,
multinodeDeploymentType commonconsts.MultinodeDeploymentType, multinodeDeploymentType commonconsts.MultinodeDeploymentType,
) (corev1.PodSpec, error) { ) (*corev1.PodSpec, error) {
// Convert to our interface // Convert to our interface
componentSpec := ConvertDynamoComponentDeploymentToSpec(dynComponent) componentSpec := ConvertDynamoComponentDeploymentToSpec(dynComponent)
...@@ -1150,7 +1154,7 @@ func GenerateBasePodSpecForController( ...@@ -1150,7 +1154,7 @@ func GenerateBasePodSpecForController(
// Determine backend framework using hybrid approach // Determine backend framework using hybrid approach
backendFramework, err := getBackendFrameworkFromDynamoComponent(dynComponent) backendFramework, err := getBackendFrameworkFromDynamoComponent(dynComponent)
if err != nil { if err != nil {
return corev1.PodSpec{}, fmt.Errorf("failed to determine backend framework: %w", err) return nil, fmt.Errorf("failed to determine backend framework: %w", err)
} }
// Generate base PodSpec with standard env vars using merged component envs // Generate base PodSpec with standard env vars using merged component envs
...@@ -1169,7 +1173,7 @@ func GenerateBasePodSpecForController( ...@@ -1169,7 +1173,7 @@ func GenerateBasePodSpecForController(
serviceName, serviceName,
) )
if err != nil { if err != nil {
return corev1.PodSpec{}, err return nil, err
} }
return podSpec, nil return podSpec, nil
......
...@@ -1205,6 +1205,7 @@ func TestGenerateGrovePodGangSet(t *testing.T) { ...@@ -1205,6 +1205,7 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
Spec: grovev1alpha1.PodGangSetSpec{ Spec: grovev1alpha1.PodGangSetSpec{
Replicas: 1, Replicas: 1,
Template: grovev1alpha1.PodGangSetTemplateSpec{ Template: grovev1alpha1.PodGangSetTemplateSpec{
StartupType: ptr.To(grovev1alpha1.CliqueStartupTypeAnyOrder),
HeadlessServiceConfig: &grovev1alpha1.HeadlessServiceConfig{ HeadlessServiceConfig: &grovev1alpha1.HeadlessServiceConfig{
PublishNotReadyAddresses: true, PublishNotReadyAddresses: true,
}, },
...@@ -1224,8 +1225,9 @@ func TestGenerateGrovePodGangSet(t *testing.T) { ...@@ -1224,8 +1225,9 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
"nvidia.com/annotation2": "annotation2", "nvidia.com/annotation2": "annotation2",
}, },
Spec: grovev1alpha1.PodCliqueSpec{ Spec: grovev1alpha1.PodCliqueSpec{
RoleName: "frontend", RoleName: "frontend",
Replicas: 1, Replicas: 1,
MinAvailable: ptr.To(int32(1)),
PodSpec: corev1.PodSpec{ PodSpec: corev1.PodSpec{
Volumes: []corev1.Volume{ Volumes: []corev1.Volume{
{ {
...@@ -1244,6 +1246,7 @@ func TestGenerateGrovePodGangSet(t *testing.T) { ...@@ -1244,6 +1246,7 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
Name: "frontend-secret", Name: "frontend-secret",
}, },
}, },
RestartPolicy: corev1.RestartPolicyAlways,
Containers: []corev1.Container{ Containers: []corev1.Container{
{ {
Name: "main", Name: "main",
...@@ -1353,8 +1356,9 @@ func TestGenerateGrovePodGangSet(t *testing.T) { ...@@ -1353,8 +1356,9 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
}, },
Annotations: map[string]string{}, Annotations: map[string]string{},
Spec: grovev1alpha1.PodCliqueSpec{ Spec: grovev1alpha1.PodCliqueSpec{
RoleName: "planner", RoleName: "planner",
Replicas: 2, Replicas: 2,
MinAvailable: ptr.To(int32(1)),
PodSpec: corev1.PodSpec{ PodSpec: corev1.PodSpec{
Volumes: []corev1.Volume{ Volumes: []corev1.Volume{
{ {
...@@ -1375,6 +1379,8 @@ func TestGenerateGrovePodGangSet(t *testing.T) { ...@@ -1375,6 +1379,8 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
}, },
}, },
}, },
TerminationGracePeriodSeconds: ptr.To(int64(60)),
RestartPolicy: corev1.RestartPolicyAlways,
Containers: []corev1.Container{ Containers: []corev1.Container{
{ {
Name: "main", Name: "main",
...@@ -1690,10 +1696,12 @@ func TestGenerateGrovePodGangSet(t *testing.T) { ...@@ -1690,10 +1696,12 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
"worker-ldr", "worker-ldr",
"worker-wkr", "worker-wkr",
}, },
Replicas: ptr.To(int32(5)), Replicas: ptr.To(int32(5)),
MinAvailable: ptr.To(int32(1)),
}, },
}, },
// StartupType: ptr.To(grovev1alpha1.CliqueStartupTypeExplicit), // StartupType: ptr.To(grovev1alpha1.CliqueStartupTypeExplicit),
StartupType: ptr.To(grovev1alpha1.CliqueStartupTypeAnyOrder),
Cliques: []*grovev1alpha1.PodCliqueTemplateSpec{ Cliques: []*grovev1alpha1.PodCliqueTemplateSpec{
{ {
Name: "worker-ldr", Name: "worker-ldr",
...@@ -1709,9 +1717,12 @@ func TestGenerateGrovePodGangSet(t *testing.T) { ...@@ -1709,9 +1717,12 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
"nvidia.com/annotation2": "annotation2", "nvidia.com/annotation2": "annotation2",
}, },
Spec: grovev1alpha1.PodCliqueSpec{ Spec: grovev1alpha1.PodCliqueSpec{
RoleName: "worker-ldr", RoleName: "worker-ldr",
Replicas: 1, Replicas: 1,
MinAvailable: ptr.To(int32(1)),
PodSpec: corev1.PodSpec{ PodSpec: corev1.PodSpec{
RestartPolicy: corev1.RestartPolicyAlways,
TerminationGracePeriodSeconds: ptr.To(int64(60)),
Volumes: []corev1.Volume{ Volumes: []corev1.Volume{
{ {
Name: "shared-memory", Name: "shared-memory",
...@@ -1855,10 +1866,13 @@ func TestGenerateGrovePodGangSet(t *testing.T) { ...@@ -1855,10 +1866,13 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
"nvidia.com/annotation2": "annotation2", "nvidia.com/annotation2": "annotation2",
}, },
Spec: grovev1alpha1.PodCliqueSpec{ Spec: grovev1alpha1.PodCliqueSpec{
RoleName: "worker-wkr", RoleName: "worker-wkr",
Replicas: 2, Replicas: 2,
MinAvailable: ptr.To(int32(1)),
// StartsAfter: []string{"worker-ldr"}, // StartsAfter: []string{"worker-ldr"},
PodSpec: corev1.PodSpec{ PodSpec: corev1.PodSpec{
RestartPolicy: corev1.RestartPolicyAlways,
TerminationGracePeriodSeconds: ptr.To(int64(60)),
Volumes: []corev1.Volume{ Volumes: []corev1.Volume{
{ {
Name: "shared-memory", Name: "shared-memory",
...@@ -1961,8 +1975,9 @@ func TestGenerateGrovePodGangSet(t *testing.T) { ...@@ -1961,8 +1975,9 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
}, },
Annotations: map[string]string{}, Annotations: map[string]string{},
Spec: grovev1alpha1.PodCliqueSpec{ Spec: grovev1alpha1.PodCliqueSpec{
RoleName: "frontend", RoleName: "frontend",
Replicas: 1, Replicas: 1,
MinAvailable: ptr.To(int32(1)),
PodSpec: corev1.PodSpec{ PodSpec: corev1.PodSpec{
Volumes: []corev1.Volume{ Volumes: []corev1.Volume{
{ {
...@@ -1981,6 +1996,7 @@ func TestGenerateGrovePodGangSet(t *testing.T) { ...@@ -1981,6 +1996,7 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
}, },
}, },
TerminationGracePeriodSeconds: ptr.To(int64(10)), TerminationGracePeriodSeconds: ptr.To(int64(10)),
RestartPolicy: corev1.RestartPolicyAlways,
Containers: []corev1.Container{ Containers: []corev1.Container{
{ {
Name: "main", Name: "main",
...@@ -2090,9 +2106,12 @@ func TestGenerateGrovePodGangSet(t *testing.T) { ...@@ -2090,9 +2106,12 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
}, },
Annotations: map[string]string{}, Annotations: map[string]string{},
Spec: grovev1alpha1.PodCliqueSpec{ Spec: grovev1alpha1.PodCliqueSpec{
RoleName: "planner", RoleName: "planner",
Replicas: 2, Replicas: 2,
MinAvailable: ptr.To(int32(1)),
PodSpec: corev1.PodSpec{ PodSpec: corev1.PodSpec{
TerminationGracePeriodSeconds: ptr.To(int64(60)),
RestartPolicy: corev1.RestartPolicyAlways,
Volumes: []corev1.Volume{ Volumes: []corev1.Volume{
{ {
Name: "planner-pvc", Name: "planner-pvc",
...@@ -2440,6 +2459,7 @@ func TestGenerateGrovePodGangSet(t *testing.T) { ...@@ -2440,6 +2459,7 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
Spec: grovev1alpha1.PodGangSetSpec{ Spec: grovev1alpha1.PodGangSetSpec{
Replicas: 1, Replicas: 1,
Template: grovev1alpha1.PodGangSetTemplateSpec{ Template: grovev1alpha1.PodGangSetTemplateSpec{
StartupType: ptr.To(grovev1alpha1.CliqueStartupTypeAnyOrder),
HeadlessServiceConfig: &grovev1alpha1.HeadlessServiceConfig{ HeadlessServiceConfig: &grovev1alpha1.HeadlessServiceConfig{
PublishNotReadyAddresses: true, PublishNotReadyAddresses: true,
}, },
...@@ -2451,7 +2471,8 @@ func TestGenerateGrovePodGangSet(t *testing.T) { ...@@ -2451,7 +2471,8 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
"worker-ldr", "worker-ldr",
"worker-wkr", "worker-wkr",
}, },
Replicas: ptr.To(int32(5)), Replicas: ptr.To(int32(5)),
MinAvailable: ptr.To(int32(1)),
}, },
}, },
// StartupType: ptr.To(grovev1alpha1.CliqueStartupTypeExplicit), // StartupType: ptr.To(grovev1alpha1.CliqueStartupTypeExplicit),
...@@ -2470,8 +2491,9 @@ func TestGenerateGrovePodGangSet(t *testing.T) { ...@@ -2470,8 +2491,9 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
"nvidia.com/annotation2": "annotation2", "nvidia.com/annotation2": "annotation2",
}, },
Spec: grovev1alpha1.PodCliqueSpec{ Spec: grovev1alpha1.PodCliqueSpec{
RoleName: "worker-ldr", RoleName: "worker-ldr",
Replicas: 1, Replicas: 1,
MinAvailable: ptr.To(int32(1)),
PodSpec: corev1.PodSpec{ PodSpec: corev1.PodSpec{
Volumes: []corev1.Volume{ Volumes: []corev1.Volume{
{ {
...@@ -2484,6 +2506,8 @@ func TestGenerateGrovePodGangSet(t *testing.T) { ...@@ -2484,6 +2506,8 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
}, },
}, },
}, },
TerminationGracePeriodSeconds: ptr.To(int64(60)),
RestartPolicy: corev1.RestartPolicyAlways,
Containers: []corev1.Container{ Containers: []corev1.Container{
{ {
Name: "main", Name: "main",
...@@ -2604,10 +2628,12 @@ func TestGenerateGrovePodGangSet(t *testing.T) { ...@@ -2604,10 +2628,12 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
"nvidia.com/annotation2": "annotation2", "nvidia.com/annotation2": "annotation2",
}, },
Spec: grovev1alpha1.PodCliqueSpec{ Spec: grovev1alpha1.PodCliqueSpec{
RoleName: "worker-wkr", RoleName: "worker-wkr",
Replicas: 2, Replicas: 2,
MinAvailable: ptr.To(int32(1)),
// StartsAfter: []string{"worker-ldr"}, // StartsAfter: []string{"worker-ldr"},
PodSpec: corev1.PodSpec{ PodSpec: corev1.PodSpec{
TerminationGracePeriodSeconds: ptr.To(int64(60)),
Volumes: []corev1.Volume{ Volumes: []corev1.Volume{
{ {
Name: "shared-memory", Name: "shared-memory",
...@@ -2619,6 +2645,7 @@ func TestGenerateGrovePodGangSet(t *testing.T) { ...@@ -2619,6 +2645,7 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
}, },
}, },
}, },
RestartPolicy: corev1.RestartPolicyAlways,
Containers: []corev1.Container{ Containers: []corev1.Container{
{ {
Name: "main", Name: "main",
...@@ -2710,8 +2737,9 @@ func TestGenerateGrovePodGangSet(t *testing.T) { ...@@ -2710,8 +2737,9 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
}, },
Annotations: map[string]string{}, Annotations: map[string]string{},
Spec: grovev1alpha1.PodCliqueSpec{ Spec: grovev1alpha1.PodCliqueSpec{
RoleName: "frontend", RoleName: "frontend",
Replicas: 1, Replicas: 1,
MinAvailable: ptr.To(int32(1)),
PodSpec: corev1.PodSpec{ PodSpec: corev1.PodSpec{
Volumes: []corev1.Volume{ Volumes: []corev1.Volume{
{ {
...@@ -2730,6 +2758,7 @@ func TestGenerateGrovePodGangSet(t *testing.T) { ...@@ -2730,6 +2758,7 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
}, },
}, },
TerminationGracePeriodSeconds: ptr.To(int64(10)), TerminationGracePeriodSeconds: ptr.To(int64(10)),
RestartPolicy: corev1.RestartPolicyAlways,
Containers: []corev1.Container{ Containers: []corev1.Container{
{ {
Name: "main", Name: "main",
...@@ -2839,9 +2868,11 @@ func TestGenerateGrovePodGangSet(t *testing.T) { ...@@ -2839,9 +2868,11 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
}, },
Annotations: map[string]string{}, Annotations: map[string]string{},
Spec: grovev1alpha1.PodCliqueSpec{ Spec: grovev1alpha1.PodCliqueSpec{
RoleName: "planner", RoleName: "planner",
Replicas: 2, Replicas: 2,
MinAvailable: ptr.To(int32(1)),
PodSpec: corev1.PodSpec{ PodSpec: corev1.PodSpec{
TerminationGracePeriodSeconds: ptr.To(int64(60)),
Volumes: []corev1.Volume{ Volumes: []corev1.Volume{
{ {
Name: "planner-pvc", Name: "planner-pvc",
...@@ -2861,6 +2892,7 @@ func TestGenerateGrovePodGangSet(t *testing.T) { ...@@ -2861,6 +2892,7 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
}, },
}, },
}, },
RestartPolicy: corev1.RestartPolicyAlways,
Containers: []corev1.Container{ Containers: []corev1.Container{
{ {
Name: "main", Name: "main",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment