Unverified Commit 80279ad3 authored by julienmancuso's avatar julienmancuso Committed by GitHub
Browse files

fix: increase shm default size and make it configurable (#2616)

parent 8ad558d4
......@@ -10241,6 +10241,18 @@ spec:
serviceName:
description: contains the name of the component
type: string
sharedMemory:
description: SharedMemory controls the tmpfs mounted at /dev/shm (enable/disable and size).
properties:
disabled:
type: boolean
size:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
type: object
type: object
status:
description: Status reflects the current observed state of the component deployment.
......
......@@ -10340,6 +10340,18 @@ spec:
serviceName:
description: contains the name of the component
type: string
sharedMemory:
description: SharedMemory controls the tmpfs mounted at /dev/shm (enable/disable and size).
properties:
disabled:
type: boolean
size:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
type: object
type: object
description: |-
Services allows per-service overrides of the component deployment settings.
......
......@@ -44,3 +44,8 @@ type Autoscaling struct {
Behavior *autoscalingv2.HorizontalPodAutoscalerBehavior `json:"behavior,omitempty"`
Metrics []autoscalingv2.MetricSpec `json:"metrics,omitempty"`
}
type SharedMemorySpec struct {
Disabled bool `json:"disabled,omitempty"`
Size resource.Quantity `json:"size,omitempty"`
}
......@@ -92,6 +92,9 @@ type DynamoComponentDeploymentSharedSpec struct {
// Ingress config to expose the component outside the cluster (or through a service mesh).
Ingress *IngressSpec `json:"ingress,omitempty"`
// SharedMemory controls the tmpfs mounted at /dev/shm (enable/disable and size).
SharedMemory *SharedMemorySpec `json:"sharedMemory,omitempty"`
// +optional
// ExtraPodMetadata adds labels/annotations to the created Pods.
ExtraPodMetadata *dynamoCommon.ExtraPodMetadata `json:"extraPodMetadata,omitempty"`
......
......@@ -243,6 +243,11 @@ func (in *DynamoComponentDeploymentSharedSpec) DeepCopyInto(out *DynamoComponent
*out = new(IngressSpec)
(*in).DeepCopyInto(*out)
}
if in.SharedMemory != nil {
in, out := &in.SharedMemory, &out.SharedMemory
*out = new(SharedMemorySpec)
(*in).DeepCopyInto(*out)
}
if in.ExtraPodMetadata != nil {
in, out := &in.ExtraPodMetadata, &out.ExtraPodMetadata
*out = new(common.ExtraPodMetadata)
......@@ -563,3 +568,19 @@ func (in *PVC) DeepCopy() *PVC {
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *SharedMemorySpec) DeepCopyInto(out *SharedMemorySpec) {
*out = *in
out.Size = in.Size.DeepCopy()
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SharedMemorySpec.
func (in *SharedMemorySpec) DeepCopy() *SharedMemorySpec {
if in == nil {
return nil
}
out := new(SharedMemorySpec)
in.DeepCopyInto(out)
return out
}
......@@ -10241,6 +10241,18 @@ spec:
serviceName:
description: contains the name of the component
type: string
sharedMemory:
description: SharedMemory controls the tmpfs mounted at /dev/shm (enable/disable and size).
properties:
disabled:
type: boolean
size:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
type: object
type: object
status:
description: Status reflects the current observed state of the component deployment.
......
......@@ -10340,6 +10340,18 @@ spec:
serviceName:
description: contains the name of the component
type: string
sharedMemory:
description: SharedMemory controls the tmpfs mounted at /dev/shm (enable/disable and size).
properties:
disabled:
type: boolean
size:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
type: object
type: object
description: |-
Services allows per-service overrides of the component deployment settings.
......
......@@ -48,9 +48,11 @@ const (
DefaultGroveTerminationDelay = 15 * time.Minute
// Metrics related constants
KubeAnnotationEnableMetrics = "nvidia.com/enable-metrics" // User-provided annotation to control metrics
KubeLabelMetricsEnabled = "nvidia.com/metrics-enabled" // Controller-managed label for pod selection
KubeValueNameSharedMemory = "shared-memory"
KubeAnnotationEnableMetrics = "nvidia.com/enable-metrics" // User-provided annotation to control metrics
KubeLabelMetricsEnabled = "nvidia.com/metrics-enabled" // Controller-managed label for pod selection
KubeValueNameSharedMemory = "shared-memory"
DefaultSharedMemoryMountPath = "/dev/shm"
DefaultSharedMemorySize = "8Gi"
// Grove multinode role suffixes
GroveRoleSuffixLeader = "ldr"
......
......@@ -24,7 +24,6 @@ import (
"fmt"
"testing"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/common"
dynamoCommon "github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/common"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
commonconsts "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
......@@ -705,18 +704,18 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
Multinode: &v1alpha1.MultinodeSpec{
NodeCount: 2,
},
Resources: &common.Resources{
Requests: &common.ResourceItem{
Resources: &dynamoCommon.Resources{
Requests: &dynamoCommon.ResourceItem{
CPU: "300m",
Memory: "500Mi",
},
Limits: &common.ResourceItem{
Limits: &dynamoCommon.ResourceItem{
GPU: "1",
Memory: "20Gi",
CPU: "10",
},
},
ExtraPodMetadata: &common.ExtraPodMetadata{
ExtraPodMetadata: &dynamoCommon.ExtraPodMetadata{
Annotations: map[string]string{
"nvidia.com/annotation1": "annotation1",
},
......@@ -799,7 +798,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
VolumeSource: corev1.VolumeSource{
EmptyDir: &corev1.EmptyDirVolumeSource{
Medium: corev1.StorageMediumMemory,
SizeLimit: resource.NewQuantity(5*1024*1024*1024, resource.BinarySI), // 5gi (calculated from memory limit / 4)
SizeLimit: func() *resource.Quantity { q := resource.MustParse(commonconsts.DefaultSharedMemorySize); return &q }(),
},
},
},
......@@ -829,7 +828,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
VolumeMounts: []corev1.VolumeMount{
{
Name: "shared-memory",
MountPath: "/dev/shm",
MountPath: commonconsts.DefaultSharedMemoryMountPath,
},
},
Resources: corev1.ResourceRequirements{
......@@ -908,7 +907,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
VolumeSource: corev1.VolumeSource{
EmptyDir: &corev1.EmptyDirVolumeSource{
Medium: corev1.StorageMediumMemory,
SizeLimit: resource.NewQuantity(5*1024*1024*1024, resource.BinarySI), // 5gi (calculated from memory limit / 4)
SizeLimit: func() *resource.Quantity { q := resource.MustParse(commonconsts.DefaultSharedMemorySize); return &q }(),
},
},
},
......@@ -938,7 +937,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
VolumeMounts: []corev1.VolumeMount{
{
Name: "shared-memory",
MountPath: "/dev/shm",
MountPath: commonconsts.DefaultSharedMemoryMountPath,
},
},
Resources: corev1.ResourceRequirements{
......@@ -980,8 +979,8 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
Multinode: &v1alpha1.MultinodeSpec{
NodeCount: 2,
},
Resources: &common.Resources{
Limits: &common.ResourceItem{
Resources: &dynamoCommon.Resources{
Limits: &dynamoCommon.ResourceItem{
GPU: "1",
},
},
......@@ -1024,8 +1023,8 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
Multinode: &v1alpha1.MultinodeSpec{
NodeCount: 2,
},
Resources: &common.Resources{
Limits: &common.ResourceItem{
Resources: &dynamoCommon.Resources{
Limits: &dynamoCommon.ResourceItem{
GPU: "1",
},
},
......
......@@ -677,6 +677,8 @@ func addStandardEnvVars(container *corev1.Container, controllerConfig controller
// GenerateBasePodSpec creates a basic PodSpec with common logic shared between controller and grove
// Includes standard environment variables (DYNAMO_PORT, NATS_SERVER, ETCD_ENDPOINTS)
// Deployment-specific environment merging should be handled by the caller
//
//nolint:gocyclo
func GenerateBasePodSpec(
component *v1alpha1.DynamoComponentDeploymentOverridesSpec,
backendFramework BackendFramework,
......@@ -780,9 +782,10 @@ func GenerateBasePodSpec(
MountPath: *component.PVC.MountPoint,
})
}
shmVolume, shmVolumeMount := generateSharedMemoryVolumeAndMount(&container.Resources)
volumes = append(volumes, shmVolume)
container.VolumeMounts = append(container.VolumeMounts, shmVolumeMount)
if shmVol, shmMount := generateSharedMemoryVolumeAndMount(component.SharedMemory); shmVol != nil && shmMount != nil {
volumes = append(volumes, *shmVol)
container.VolumeMounts = append(container.VolumeMounts, *shmMount)
}
// Apply backend-specific container modifications
multinodeDeployer := MultinodeDeployerFactory(multinodeDeploymentType)
......@@ -1181,36 +1184,29 @@ func GenerateBasePodSpecForController(
return podSpec, nil
}
func generateSharedMemoryVolumeAndMount(resources *corev1.ResourceRequirements) (corev1.Volume, corev1.VolumeMount) {
sharedMemorySizeLimit := resource.MustParse("512Mi")
// Check if we have memory limits to work with
memoryLimit := resources.Limits[corev1.ResourceMemory]
if !memoryLimit.IsZero() {
// Use 1/4 of memory limit
calculatedSize := resource.NewQuantity(memoryLimit.Value()/4, resource.BinarySI)
// Apply bounds: minimum 512Mi, maximum 8Gi
minSize := resource.MustParse("512Mi")
maxSize := resource.MustParse("8Gi")
if calculatedSize.Cmp(minSize) > 0 && calculatedSize.Cmp(maxSize) < 0 {
sharedMemorySizeLimit = *calculatedSize
} else if calculatedSize.Cmp(maxSize) >= 0 {
sharedMemorySizeLimit = maxSize // Cap at maximum
func generateSharedMemoryVolumeAndMount(spec *v1alpha1.SharedMemorySpec) (*corev1.Volume, *corev1.VolumeMount) {
// default: enabled=true, size=8Gi
size := resource.MustParse(commonconsts.DefaultSharedMemorySize)
if spec != nil {
if spec.Disabled {
return nil, nil
}
if !spec.Size.IsZero() {
size = spec.Size
}
// If calculatedSize < minSize, keep the 512Mi base
}
volume := corev1.Volume{
Name: commonconsts.KubeValueNameSharedMemory,
VolumeSource: corev1.VolumeSource{
EmptyDir: &corev1.EmptyDirVolumeSource{
Medium: corev1.StorageMediumMemory,
SizeLimit: &sharedMemorySizeLimit,
SizeLimit: &size,
},
},
}
volumeMount := corev1.VolumeMount{
Name: commonconsts.KubeValueNameSharedMemory,
MountPath: "/dev/shm",
MountPath: commonconsts.DefaultSharedMemoryMountPath,
}
return volume, volumeMount
return &volume, &volumeMount
}
......@@ -1235,7 +1235,7 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
VolumeSource: corev1.VolumeSource{
EmptyDir: &corev1.EmptyDirVolumeSource{
Medium: corev1.StorageMediumMemory,
SizeLimit: resource.NewQuantity(536870912, resource.BinarySI),
SizeLimit: func() *resource.Quantity { q := resource.MustParse(commonconsts.DefaultSharedMemorySize); return &q }(),
},
},
},
......@@ -1337,7 +1337,7 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
VolumeMounts: []corev1.VolumeMount{
{
Name: "shared-memory",
MountPath: "/dev/shm",
MountPath: commonconsts.DefaultSharedMemoryMountPath,
},
},
Ports: []corev1.ContainerPort{
......@@ -1378,7 +1378,7 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
VolumeSource: corev1.VolumeSource{
EmptyDir: &corev1.EmptyDirVolumeSource{
Medium: corev1.StorageMediumMemory,
SizeLimit: resource.NewQuantity(536870912, resource.BinarySI),
SizeLimit: func() *resource.Quantity { q := resource.MustParse(commonconsts.DefaultSharedMemorySize); return &q }(),
},
},
},
......@@ -1471,7 +1471,7 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
},
{
Name: "shared-memory",
MountPath: "/dev/shm",
MountPath: commonconsts.DefaultSharedMemoryMountPath,
},
},
},
......@@ -1733,7 +1733,7 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
VolumeSource: corev1.VolumeSource{
EmptyDir: &corev1.EmptyDirVolumeSource{
Medium: corev1.StorageMediumMemory,
SizeLimit: resource.NewQuantity(512*1024*1024, resource.BinarySI),
SizeLimit: func() *resource.Quantity { q := resource.MustParse(commonconsts.DefaultSharedMemorySize); return &q }(),
},
},
},
......@@ -1812,7 +1812,7 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
VolumeMounts: []corev1.VolumeMount{
{
Name: commonconsts.KubeValueNameSharedMemory,
MountPath: "/dev/shm",
MountPath: commonconsts.DefaultSharedMemoryMountPath,
},
},
LivenessProbe: &corev1.Probe{
......@@ -1883,7 +1883,7 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
VolumeSource: corev1.VolumeSource{
EmptyDir: &corev1.EmptyDirVolumeSource{
Medium: corev1.StorageMediumMemory,
SizeLimit: resource.NewQuantity(512*1024*1024, resource.BinarySI),
SizeLimit: func() *resource.Quantity { q := resource.MustParse(commonconsts.DefaultSharedMemorySize); return &q }(),
},
},
},
......@@ -1962,7 +1962,7 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
VolumeMounts: []corev1.VolumeMount{
{
Name: commonconsts.KubeValueNameSharedMemory,
MountPath: "/dev/shm",
MountPath: commonconsts.DefaultSharedMemoryMountPath,
},
},
},
......@@ -1989,7 +1989,7 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
VolumeSource: corev1.VolumeSource{
EmptyDir: &corev1.EmptyDirVolumeSource{
Medium: corev1.StorageMediumMemory,
SizeLimit: resource.NewQuantity(512*1024*1024, resource.BinarySI),
SizeLimit: func() *resource.Quantity { q := resource.MustParse(commonconsts.DefaultSharedMemorySize); return &q }(),
},
},
},
......@@ -2098,7 +2098,7 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
VolumeMounts: []corev1.VolumeMount{
{
Name: commonconsts.KubeValueNameSharedMemory,
MountPath: "/dev/shm",
MountPath: commonconsts.DefaultSharedMemoryMountPath,
},
},
},
......@@ -2134,7 +2134,7 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
VolumeSource: corev1.VolumeSource{
EmptyDir: &corev1.EmptyDirVolumeSource{
Medium: corev1.StorageMediumMemory,
SizeLimit: resource.NewQuantity(512*1024*1024, resource.BinarySI),
SizeLimit: func() *resource.Quantity { q := resource.MustParse(commonconsts.DefaultSharedMemorySize); return &q }(),
},
},
},
......@@ -2225,7 +2225,7 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
},
{
Name: "shared-memory",
MountPath: "/dev/shm",
MountPath: commonconsts.DefaultSharedMemoryMountPath,
},
},
},
......@@ -2509,7 +2509,7 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
VolumeSource: corev1.VolumeSource{
EmptyDir: &corev1.EmptyDirVolumeSource{
Medium: corev1.StorageMediumMemory,
SizeLimit: resource.NewQuantity(512*1024*1024, resource.BinarySI),
SizeLimit: func() *resource.Quantity { q := resource.MustParse(commonconsts.DefaultSharedMemorySize); return &q }(),
},
},
},
......@@ -2590,7 +2590,7 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
VolumeMounts: []corev1.VolumeMount{
{
Name: commonconsts.KubeValueNameSharedMemory,
MountPath: "/dev/shm",
MountPath: commonconsts.DefaultSharedMemoryMountPath,
},
},
ReadinessProbe: &corev1.Probe{
......@@ -2648,7 +2648,7 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
VolumeSource: corev1.VolumeSource{
EmptyDir: &corev1.EmptyDirVolumeSource{
Medium: corev1.StorageMediumMemory,
SizeLimit: resource.NewQuantity(512*1024*1024, resource.BinarySI),
SizeLimit: func() *resource.Quantity { q := resource.MustParse(commonconsts.DefaultSharedMemorySize); return &q }(),
},
},
},
......@@ -2728,7 +2728,7 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
VolumeMounts: []corev1.VolumeMount{
{
Name: commonconsts.KubeValueNameSharedMemory,
MountPath: "/dev/shm",
MountPath: commonconsts.DefaultSharedMemoryMountPath,
},
},
},
......@@ -2755,7 +2755,7 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
VolumeSource: corev1.VolumeSource{
EmptyDir: &corev1.EmptyDirVolumeSource{
Medium: corev1.StorageMediumMemory,
SizeLimit: resource.NewQuantity(512*1024*1024, resource.BinarySI),
SizeLimit: func() *resource.Quantity { q := resource.MustParse(commonconsts.DefaultSharedMemorySize); return &q }(),
},
},
},
......@@ -2864,7 +2864,7 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
VolumeMounts: []corev1.VolumeMount{
{
Name: commonconsts.KubeValueNameSharedMemory,
MountPath: "/dev/shm",
MountPath: commonconsts.DefaultSharedMemoryMountPath,
},
},
},
......@@ -2899,7 +2899,7 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
VolumeSource: corev1.VolumeSource{
EmptyDir: &corev1.EmptyDirVolumeSource{
Medium: corev1.StorageMediumMemory,
SizeLimit: resource.NewQuantity(512*1024*1024, resource.BinarySI),
SizeLimit: func() *resource.Quantity { q := resource.MustParse(commonconsts.DefaultSharedMemorySize); return &q }(),
},
},
},
......@@ -2991,7 +2991,7 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
},
{
Name: "shared-memory",
MountPath: "/dev/shm",
MountPath: commonconsts.DefaultSharedMemoryMountPath,
},
},
},
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment