Unverified Commit fcc42624 authored by Michael Shin's avatar Michael Shin Committed by GitHub
Browse files

feat: Add vLLM compilation cache to Dynamo Operator (#3257)


Signed-off-by: default avatarMichael Shin <michaelshin@users.noreply.github.com>
parent 02c822d6
......@@ -9,6 +9,9 @@ spec:
envs:
- name: DYNAMO_NAMESPACE
value: "dynamo"
pvcs:
- name: dynamo-pvc
create: false # Must be pre-created before deployment and SLA profiler must have been run
services:
Frontend:
dynamoNamespace: dynamo
......@@ -41,10 +44,9 @@ spec:
periodSeconds: 60
timeoutSeconds: 30
failureThreshold: 10
pvc:
create: false
name: dynamo-pvc # Must be pre-created before deployment and SLA profiler must have been run
mountPoint: /data
volumeMounts:
- name: dynamo-pvc
mountPoint: /data
extraPodSpec:
mainContainer:
image: my-registry/sglang-runtime:my-tag
......
......@@ -79,6 +79,8 @@ metadata:
name: trtllm-disagg-tp8
spec:
backendFramework: trtllm
pvcs:
- name: models
envs:
- name: OMPI_ALLOW_RUN_AS_ROOT
value: "1"
......@@ -103,9 +105,9 @@ spec:
- --http-port
- "8000"
prefill:
pvc:
name: models
mountPoint: /models
volumeMounts:
- name: models
mountPoint: /models
dynamoNamespace: trtllm-disagg
envFromSecret: hf-token-secret
componentType: worker
......@@ -143,9 +145,9 @@ spec:
- --disaggregation-strategy
- decode_first
decode:
pvc:
name: models
mountPoint: /models
volumeMounts:
- name: models
mountPoint: /models
dynamoNamespace: trtllm-disagg
envFromSecret: hf-token-secret
componentType: worker
......
......@@ -6,6 +6,9 @@ kind: DynamoGraphDeployment
metadata:
name: trtllm-disagg-planner
spec:
pvcs:
- name: dynamo-pvc
create: false
envs:
- name: DYNAMO_NAMESPACE
value: "trtllm-disagg-planner"
......@@ -58,10 +61,9 @@ spec:
periodSeconds: 60
timeoutSeconds: 30
failureThreshold: 10
pvc:
create: false
name: dynamo-pvc # Must be pre-created before deployment and SLA profiler must have been run
mountPoint: /data
volumeMounts:
- name: dynamo-pvc # Must be pre-created before deployment and SLA profiler must have been run
mountPoint: /data
extraPodSpec:
mainContainer:
image: my-registry/trtllm-runtime:my-tag
......
......@@ -6,6 +6,9 @@ kind: DynamoGraphDeployment
metadata:
name: vllm-disagg-planner
spec:
pvcs:
- name: dynamo-pvc
create: false # Must be pre-created before deployment and SLA profiler must have been run
envs:
- name: DYNAMO_NAMESPACE
value: "vllm-disagg-planner"
......@@ -41,10 +44,9 @@ spec:
periodSeconds: 60
timeoutSeconds: 30
failureThreshold: 10
pvc:
create: false
name: dynamo-pvc # Must be pre-created before deployment and SLA profiler must have been run
mountPoint: /data
volumeMounts:
- name: dynamo-pvc
mountPoint: /data
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
......
......@@ -10025,31 +10025,6 @@ spec:
required:
- nodeCount
type: object
pvc:
description: PVC config describing volumes to be mounted by the component.
properties:
create:
description: Create indicates to create a new PVC
type: boolean
mountPoint:
type: string
name:
description: Name is the name of the PVC
type: string
size:
anyOf:
- type: integer
- type: string
description: Size of the NIM cache in Gi, used during PVC creation
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
storageClass:
description: StorageClass to be used for PVC creation. Leave it as empty if the PVC is already created.
type: string
volumeAccessMode:
description: VolumeAccessMode is the volume access mode of the PVC
type: string
type: object
readinessProbe:
description: ReadinessProbe to signal when the container is ready to receive traffic.
properties:
......@@ -10257,6 +10232,30 @@ spec:
subComponentType:
description: SubComponentType indicates the sub-role of this component (for example, "prefill").
type: string
volumeMounts:
description: VolumeMounts references PVCs defined at the top level for volumes to be mounted by the component.
items:
description: VolumeMount references a PVC defined at the top level for volumes to be mounted by the component
properties:
mountPoint:
description: |-
MountPoint specifies where to mount the volume.
If useAsCompilationCache is true and mountPoint is not specified,
a backend-specific default will be used.
type: string
name:
description: Name references a PVC name defined in the top-level PVCs map
type: string
useAsCompilationCache:
default: false
description: |-
UseAsCompilationCache indicates this volume should be used as a compilation cache.
When true, backend-specific environment variables will be set and default mount points may be used.
type: boolean
required:
- name
type: object
type: array
type: object
status:
description: Status reflects the current observed state of the component deployment.
......
......@@ -180,6 +180,38 @@ spec:
- name
type: object
type: array
pvcs:
description: |-
PVCs defines a list of persistent volume claims that can be referenced by components.
Each PVC must have a unique name that can be referenced in component specifications.
items:
properties:
create:
description: Create indicates to create a new PVC
type: boolean
name:
description: Name is the name of the PVC
type: string
size:
anyOf:
- type: integer
- type: string
description: Size of the volume in Gi, used during PVC creation. Required when create is true.
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
storageClass:
description: StorageClass to be used for PVC creation. Required when create is true.
type: string
volumeAccessMode:
description: VolumeAccessMode is the volume access mode of the PVC. Required when create is true.
type: string
required:
- name
type: object
x-kubernetes-validations:
- message: When create is true, size, storageClass, and volumeAccessMode are required
rule: '!has(self.create) || self.create == false || (has(self.size) && has(self.storageClass) && has(self.volumeAccessMode))'
type: array
services:
additionalProperties:
properties:
......@@ -10124,31 +10156,6 @@ spec:
required:
- nodeCount
type: object
pvc:
description: PVC config describing volumes to be mounted by the component.
properties:
create:
description: Create indicates to create a new PVC
type: boolean
mountPoint:
type: string
name:
description: Name is the name of the PVC
type: string
size:
anyOf:
- type: integer
- type: string
description: Size of the NIM cache in Gi, used during PVC creation
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
storageClass:
description: StorageClass to be used for PVC creation. Leave it as empty if the PVC is already created.
type: string
volumeAccessMode:
description: VolumeAccessMode is the volume access mode of the PVC
type: string
type: object
readinessProbe:
description: ReadinessProbe to signal when the container is ready to receive traffic.
properties:
......@@ -10356,6 +10363,30 @@ spec:
subComponentType:
description: SubComponentType indicates the sub-role of this component (for example, "prefill").
type: string
volumeMounts:
description: VolumeMounts references PVCs defined at the top level for volumes to be mounted by the component.
items:
description: VolumeMount references a PVC defined at the top level for volumes to be mounted by the component
properties:
mountPoint:
description: |-
MountPoint specifies where to mount the volume.
If useAsCompilationCache is true and mountPoint is not specified,
a backend-specific default will be used.
type: string
name:
description: Name references a PVC name defined in the top-level PVCs map
type: string
useAsCompilationCache:
default: false
description: |-
UseAsCompilationCache indicates this volume should be used as a compilation cache.
When true, backend-specific environment variables will be set and default mount points may be used.
type: boolean
required:
- name
type: object
type: array
type: object
description: |-
Services allows per-service overrides of the component deployment settings.
......
......@@ -23,18 +23,34 @@ import (
"k8s.io/apimachinery/pkg/api/resource"
)
// +kubebuilder:validation:XValidation:rule="!has(self.create) || self.create == false || (has(self.size) && has(self.storageClass) && has(self.volumeAccessMode))",message="When create is true, size, storageClass, and volumeAccessMode are required"
type PVC struct {
// Create indicates to create a new PVC
Create *bool `json:"create,omitempty"`
// Name is the name of the PVC
// +kubebuilder:validation:Required
Name *string `json:"name,omitempty"`
// StorageClass to be used for PVC creation. Leave it as empty if the PVC is already created.
// StorageClass to be used for PVC creation. Required when create is true.
StorageClass string `json:"storageClass,omitempty"`
// Size of the NIM cache in Gi, used during PVC creation
// Size of the volume in Gi, used during PVC creation. Required when create is true.
Size resource.Quantity `json:"size,omitempty"`
// VolumeAccessMode is the volume access mode of the PVC
// VolumeAccessMode is the volume access mode of the PVC. Required when create is true.
VolumeAccessMode corev1.PersistentVolumeAccessMode `json:"volumeAccessMode,omitempty"`
MountPoint *string `json:"mountPoint,omitempty"`
}
// VolumeMount references a PVC defined at the top level for volumes to be mounted by the component
type VolumeMount struct {
// Name references a PVC name defined in the top-level PVCs map
// +kubebuilder:validation:Required
Name string `json:"name,omitempty"`
// MountPoint specifies where to mount the volume.
// If useAsCompilationCache is true and mountPoint is not specified,
// a backend-specific default will be used.
MountPoint string `json:"mountPoint,omitempty"`
// UseAsCompilationCache indicates this volume should be used as a compilation cache.
// When true, backend-specific environment variables will be set and default mount points may be used.
// +kubebuilder:default=false
UseAsCompilationCache bool `json:"useAsCompilationCache,omitempty"`
}
type Autoscaling struct {
......
......@@ -89,8 +89,8 @@ type DynamoComponentDeploymentSharedSpec struct {
// EnvFromSecret references a Secret whose key/value pairs will be exposed as
// environment variables in the component containers.
EnvFromSecret *string `json:"envFromSecret,omitempty"`
// PVC config describing volumes to be mounted by the component.
PVC *PVC `json:"pvc,omitempty"`
// VolumeMounts references PVCs defined at the top level for volumes to be mounted by the component.
VolumeMounts []VolumeMount `json:"volumeMounts,omitempty"`
// Ingress config to expose the component outside the cluster (or through a service mesh).
Ingress *IngressSpec `json:"ingress,omitempty"`
......
......@@ -25,7 +25,9 @@ import (
commonconsts "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/utils/ptr"
)
func TestDynamoComponentDeployment_IsFrontendComponent(t *testing.T) {
......@@ -261,3 +263,160 @@ func TestDynamoComponentDeployment_GetParentGraphDeploymentName(t *testing.T) {
})
}
}
func TestDynamoComponentDeploymentSharedSpec_VolumeMounts(t *testing.T) {
tests := []struct {
name string
spec DynamoComponentDeploymentSharedSpec
expectedMountCount int
expectedMounts []VolumeMount
}{
{
name: "Spec with multiple volume mounts",
spec: DynamoComponentDeploymentSharedSpec{
VolumeMounts: []VolumeMount{
{Name: "data-pvc", MountPoint: "/data"},
{Name: "logs-pvc", MountPoint: "/logs"},
},
},
expectedMountCount: 2,
expectedMounts: []VolumeMount{
{Name: "data-pvc", MountPoint: "/data"},
{Name: "logs-pvc", MountPoint: "/logs"},
},
},
{
name: "Spec with single volume mount",
spec: DynamoComponentDeploymentSharedSpec{
VolumeMounts: []VolumeMount{
{Name: "shared-storage", MountPoint: "/shared"},
},
},
expectedMountCount: 1,
expectedMounts: []VolumeMount{
{Name: "shared-storage", MountPoint: "/shared"},
},
},
{
name: "Spec without volume mounts",
spec: DynamoComponentDeploymentSharedSpec{
VolumeMounts: nil,
},
expectedMountCount: 0,
expectedMounts: nil,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if len(tt.spec.VolumeMounts) != tt.expectedMountCount {
t.Errorf("Volume mount count = %v, want %v", len(tt.spec.VolumeMounts), tt.expectedMountCount)
}
if !reflect.DeepEqual(tt.spec.VolumeMounts, tt.expectedMounts) {
t.Errorf("VolumeMounts = %v, want %v", tt.spec.VolumeMounts, tt.expectedMounts)
}
})
}
}
func TestPVC_Validation(t *testing.T) {
tests := []struct {
name string
pvc PVC
expectValid bool
description string
}{
{
name: "valid PVC with create false",
pvc: PVC{
Create: ptr.To(false),
Name: ptr.To("test-pvc"),
},
expectValid: true,
description: "When create is false, size/storageClass/volumeAccessMode are not required",
},
{
name: "valid PVC with create nil (omitted)",
pvc: PVC{
Name: ptr.To("test-pvc"),
},
expectValid: true,
description: "When create is omitted, size/storageClass/volumeAccessMode are not required",
},
{
name: "valid PVC with create true and all required fields",
pvc: PVC{
Create: ptr.To(true),
Name: ptr.To("test-pvc"),
Size: resource.MustParse("10Gi"),
StorageClass: "fast-ssd",
VolumeAccessMode: corev1.ReadWriteOnce,
},
expectValid: true,
description: "When create is true and all required fields are provided",
},
{
name: "invalid PVC with create true but missing size",
pvc: PVC{
Create: ptr.To(true),
Name: ptr.To("test-pvc"),
StorageClass: "fast-ssd",
VolumeAccessMode: corev1.ReadWriteOnce,
},
expectValid: false,
description: "When create is true but size is missing",
},
{
name: "invalid PVC with create true but missing storageClass",
pvc: PVC{
Create: ptr.To(true),
Name: ptr.To("test-pvc"),
Size: resource.MustParse("10Gi"),
VolumeAccessMode: corev1.ReadWriteOnce,
},
expectValid: false,
description: "When create is true but storageClass is missing",
},
{
name: "invalid PVC with create true but missing volumeAccessMode",
pvc: PVC{
Create: ptr.To(true),
Name: ptr.To("test-pvc"),
Size: resource.MustParse("10Gi"),
StorageClass: "fast-ssd",
},
expectValid: false,
description: "When create is true but volumeAccessMode is missing",
},
{
name: "invalid PVC with create true but missing all required fields",
pvc: PVC{
Create: ptr.To(true),
Name: ptr.To("test-pvc"),
},
expectValid: false,
description: "When create is true but all required fields are missing",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if tt.pvc.Create != nil && *tt.pvc.Create {
hasSize := !tt.pvc.Size.IsZero()
hasStorageClass := tt.pvc.StorageClass != ""
hasVolumeAccessMode := tt.pvc.VolumeAccessMode != ""
isValid := hasSize && hasStorageClass && hasVolumeAccessMode
if isValid != tt.expectValid {
t.Errorf("PVC validation = %v, expected %v. %s", isValid, tt.expectValid, tt.description)
t.Errorf(" hasSize: %v, hasStorageClass: %v, hasVolumeAccessMode: %v", hasSize, hasStorageClass, hasVolumeAccessMode)
}
} else {
if !tt.expectValid {
t.Errorf("PVC validation should be valid when create is false/nil. %s", tt.description)
}
}
})
}
}
......@@ -32,6 +32,10 @@ type DynamoGraphDeploymentSpec struct {
// DynamoGraph selects the graph (workflow/topology) to deploy. This must match
// a graph name packaged with the Dynamo archive.
DynamoGraph string `json:"dynamoGraph,omitempty"`
// PVCs defines a list of persistent volume claims that can be referenced by components.
// Each PVC must have a unique name that can be referenced in component specifications.
// +kubebuilder:validation:Optional
PVCs []PVC `json:"pvcs,omitempty"`
// Services allows per-service overrides of the component deployment settings.
// - key: name of the service defined by the DynamoComponent
// - value: overrides for that service
......
......@@ -233,10 +233,10 @@ func (in *DynamoComponentDeploymentSharedSpec) DeepCopyInto(out *DynamoComponent
*out = new(string)
**out = **in
}
if in.PVC != nil {
in, out := &in.PVC, &out.PVC
*out = new(PVC)
(*in).DeepCopyInto(*out)
if in.VolumeMounts != nil {
in, out := &in.VolumeMounts, &out.VolumeMounts
*out = make([]VolumeMount, len(*in))
copy(*out, *in)
}
if in.Ingress != nil {
in, out := &in.Ingress, &out.Ingress
......@@ -397,6 +397,13 @@ func (in *DynamoGraphDeploymentList) DeepCopyObject() runtime.Object {
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *DynamoGraphDeploymentSpec) DeepCopyInto(out *DynamoGraphDeploymentSpec) {
*out = *in
if in.PVCs != nil {
in, out := &in.PVCs, &out.PVCs
*out = make([]PVC, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
if in.Services != nil {
in, out := &in.Services, &out.Services
*out = make(map[string]*DynamoComponentDeploymentOverridesSpec, len(*in))
......@@ -552,11 +559,6 @@ func (in *PVC) DeepCopyInto(out *PVC) {
**out = **in
}
out.Size = in.Size.DeepCopy()
if in.MountPoint != nil {
in, out := &in.MountPoint, &out.MountPoint
*out = new(string)
**out = **in
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PVC.
......@@ -584,3 +586,18 @@ func (in *SharedMemorySpec) DeepCopy() *SharedMemorySpec {
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *VolumeMount) DeepCopyInto(out *VolumeMount) {
*out = *in
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VolumeMount.
func (in *VolumeMount) DeepCopy() *VolumeMount {
if in == nil {
return nil
}
out := new(VolumeMount)
in.DeepCopyInto(out)
return out
}
......@@ -10025,31 +10025,6 @@ spec:
required:
- nodeCount
type: object
pvc:
description: PVC config describing volumes to be mounted by the component.
properties:
create:
description: Create indicates to create a new PVC
type: boolean
mountPoint:
type: string
name:
description: Name is the name of the PVC
type: string
size:
anyOf:
- type: integer
- type: string
description: Size of the NIM cache in Gi, used during PVC creation
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
storageClass:
description: StorageClass to be used for PVC creation. Leave it as empty if the PVC is already created.
type: string
volumeAccessMode:
description: VolumeAccessMode is the volume access mode of the PVC
type: string
type: object
readinessProbe:
description: ReadinessProbe to signal when the container is ready to receive traffic.
properties:
......@@ -10257,6 +10232,30 @@ spec:
subComponentType:
description: SubComponentType indicates the sub-role of this component (for example, "prefill").
type: string
volumeMounts:
description: VolumeMounts references PVCs defined at the top level for volumes to be mounted by the component.
items:
description: VolumeMount references a PVC defined at the top level for volumes to be mounted by the component
properties:
mountPoint:
description: |-
MountPoint specifies where to mount the volume.
If useAsCompilationCache is true and mountPoint is not specified,
a backend-specific default will be used.
type: string
name:
description: Name references a PVC name defined in the top-level PVCs map
type: string
useAsCompilationCache:
default: false
description: |-
UseAsCompilationCache indicates this volume should be used as a compilation cache.
When true, backend-specific environment variables will be set and default mount points may be used.
type: boolean
required:
- name
type: object
type: array
type: object
status:
description: Status reflects the current observed state of the component deployment.
......
......@@ -180,6 +180,38 @@ spec:
- name
type: object
type: array
pvcs:
description: |-
PVCs defines a list of persistent volume claims that can be referenced by components.
Each PVC must have a unique name that can be referenced in component specifications.
items:
properties:
create:
description: Create indicates to create a new PVC
type: boolean
name:
description: Name is the name of the PVC
type: string
size:
anyOf:
- type: integer
- type: string
description: Size of the volume in Gi, used during PVC creation. Required when create is true.
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
storageClass:
description: StorageClass to be used for PVC creation. Required when create is true.
type: string
volumeAccessMode:
description: VolumeAccessMode is the volume access mode of the PVC. Required when create is true.
type: string
required:
- name
type: object
x-kubernetes-validations:
- message: When create is true, size, storageClass, and volumeAccessMode are required
rule: '!has(self.create) || self.create == false || (has(self.size) && has(self.storageClass) && has(self.volumeAccessMode))'
type: array
services:
additionalProperties:
properties:
......@@ -10124,31 +10156,6 @@ spec:
required:
- nodeCount
type: object
pvc:
description: PVC config describing volumes to be mounted by the component.
properties:
create:
description: Create indicates to create a new PVC
type: boolean
mountPoint:
type: string
name:
description: Name is the name of the PVC
type: string
size:
anyOf:
- type: integer
- type: string
description: Size of the NIM cache in Gi, used during PVC creation
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
storageClass:
description: StorageClass to be used for PVC creation. Leave it as empty if the PVC is already created.
type: string
volumeAccessMode:
description: VolumeAccessMode is the volume access mode of the PVC
type: string
type: object
readinessProbe:
description: ReadinessProbe to signal when the container is ready to receive traffic.
properties:
......@@ -10356,6 +10363,30 @@ spec:
subComponentType:
description: SubComponentType indicates the sub-role of this component (for example, "prefill").
type: string
volumeMounts:
description: VolumeMounts references PVCs defined at the top level for volumes to be mounted by the component.
items:
description: VolumeMount references a PVC defined at the top level for volumes to be mounted by the component
properties:
mountPoint:
description: |-
MountPoint specifies where to mount the volume.
If useAsCompilationCache is true and mountPoint is not specified,
a backend-specific default will be used.
type: string
name:
description: Name references a PVC name defined in the top-level PVCs map
type: string
useAsCompilationCache:
default: false
description: |-
UseAsCompilationCache indicates this volume should be used as a compilation cache.
When true, backend-specific environment variables will be set and default mount points may be used.
type: boolean
required:
- name
type: object
type: array
type: object
description: |-
Services allows per-service overrides of the component deployment settings.
......
......@@ -65,6 +65,9 @@ const (
DefaultSharedMemoryMountPath = "/dev/shm"
DefaultSharedMemorySize = "8Gi"
// Compilation cache default mount points
DefaultVLLMCacheMountPoint = "/root/.cache/vllm"
// Kai-scheduler related constants
KubeAnnotationKaiSchedulerQueue = "nvidia.com/kai-scheduler-queue" // User-provided annotation to specify queue name
KubeLabelKaiSchedulerQueue = "kai.scheduler/queue" // Label injected into pods for kai-scheduler
......
......@@ -50,7 +50,6 @@ import (
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
"sigs.k8s.io/controller-runtime/pkg/event"
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/predicate"
......@@ -189,13 +188,6 @@ func (r *DynamoComponentDeploymentReconciler) Reconcile(ctx context.Context, req
modified := false
// Reconcile PVC
_, err = r.reconcilePVC(ctx, dynamoComponentDeployment)
if err != nil {
logs.Error(err, "Unable to create PVC", "crd", req.NamespacedName)
return ctrl.Result{}, err
}
// Create the appropriate workload resource based on deployment type
var leaderWorkerSets []*leaderworkersetv1.LeaderWorkerSet
var deployment *appsv1.Deployment
......@@ -681,41 +673,6 @@ func IsDeploymentReady(deployment *appsv1.Deployment) bool {
return false
}
func (r *DynamoComponentDeploymentReconciler) reconcilePVC(ctx context.Context, crd *v1alpha1.DynamoComponentDeployment) (*corev1.PersistentVolumeClaim, error) {
logger := log.FromContext(ctx)
if crd.Spec.PVC == nil {
return nil, nil
}
pvcConfig := *crd.Spec.PVC
pvc := &corev1.PersistentVolumeClaim{}
pvcName := types.NamespacedName{Name: getPvcName(crd, pvcConfig.Name), Namespace: crd.GetNamespace()}
err := r.Get(ctx, pvcName, pvc)
if err != nil && client.IgnoreNotFound(err) != nil {
logger.Error(err, "Unable to retrieve PVC", "crd", crd.GetName())
return nil, err
}
// If PVC does not exist, create a new one
if err != nil {
if pvcConfig.Create == nil || !*pvcConfig.Create {
logger.Error(err, "Unknown PVC", "pvc", pvc.Name)
return nil, err
}
pvc = constructPVC(crd, pvcConfig)
if err := controllerutil.SetControllerReference(crd, pvc, r.Client.Scheme()); err != nil {
logger.Error(err, "Failed to set controller reference", "pvc", pvc.Name)
return nil, err
}
err = r.Create(ctx, pvc)
if err != nil {
logger.Error(err, "Failed to create pvc", "pvc", pvc.Name)
return nil, err
}
logger.Info("PVC created", "pvc", pvcName)
}
return pvc, nil
}
func (r *DynamoComponentDeploymentReconciler) setStatusConditions(ctx context.Context, req ctrl.Request, conditions ...metav1.Condition) (dynamoComponentDeployment *v1alpha1.DynamoComponentDeployment, err error) {
dynamoComponentDeployment = &v1alpha1.DynamoComponentDeployment{}
maxRetries := 3
......
......@@ -32,11 +32,13 @@ import (
networkingv1 "k8s.io/api/networking/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/scale"
"k8s.io/client-go/tools/record"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
"sigs.k8s.io/controller-runtime/pkg/event"
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/predicate"
......@@ -155,6 +157,14 @@ type Resource interface {
func (r *DynamoGraphDeploymentReconciler) reconcileResources(ctx context.Context, dynamoDeployment *nvidiacomv1alpha1.DynamoGraphDeployment) (State, Reason, Message, error) {
logger := log.FromContext(ctx)
// Reconcile top-level PVCs first
err := r.reconcilePVCs(ctx, dynamoDeployment)
if err != nil {
logger.Error(err, "Failed to reconcile top-level PVCs")
return "", "", "", fmt.Errorf("failed to reconcile top-level PVCs: %w", err)
}
// Orchestrator selection via single boolean annotation: nvidia.com/enable-grove
// Unset or not "false": Grove if available; else component mode
// "false": component mode (multinode -> LWS; single-node -> standard)
......@@ -408,6 +418,68 @@ func (r *DynamoGraphDeploymentReconciler) reconcileDynamoComponentsDeployments(c
return r.checkResourcesReadiness(resources)
}
// reconcilePVC reconciles a single top-level PVC defined in the DynamoGraphDeployment spec
func (r *DynamoGraphDeploymentReconciler) reconcilePVC(ctx context.Context, dynamoDeployment *nvidiacomv1alpha1.DynamoGraphDeployment, pvcName string, pvcConfig nvidiacomv1alpha1.PVC) (*corev1.PersistentVolumeClaim, error) {
logger := log.FromContext(ctx)
pvc := &corev1.PersistentVolumeClaim{}
pvcNamespacedName := types.NamespacedName{Name: pvcName, Namespace: dynamoDeployment.Namespace}
err := r.Get(ctx, pvcNamespacedName, pvc)
if err != nil && client.IgnoreNotFound(err) != nil {
logger.Error(err, "Unable to retrieve top-level PVC", "pvcName", pvcName)
return nil, err
}
// If PVC does not exist, create a new one
if err != nil {
if pvcConfig.Create == nil || !*pvcConfig.Create {
logger.Error(err, "Top-level PVC does not exist and create is not enabled", "pvcName", pvcName)
return nil, err
}
pvc = constructPVC(dynamoDeployment, pvcConfig)
if err := controllerutil.SetControllerReference(dynamoDeployment, pvc, r.Client.Scheme()); err != nil {
logger.Error(err, "Failed to set controller reference for top-level PVC", "pvcName", pvcName)
return nil, err
}
err = r.Create(ctx, pvc)
if err != nil {
logger.Error(err, "Failed to create top-level PVC", "pvcName", pvcName)
return nil, err
}
logger.Info("Top-level PVC created", "pvcName", pvcName, "namespace", dynamoDeployment.Namespace)
}
return pvc, nil
}
// reconcilePVCs reconciles all top-level PVCs defined in the DynamoGraphDeployment spec
func (r *DynamoGraphDeploymentReconciler) reconcilePVCs(ctx context.Context, dynamoDeployment *nvidiacomv1alpha1.DynamoGraphDeployment) error {
logger := log.FromContext(ctx)
if dynamoDeployment.Spec.PVCs == nil {
return nil
}
for _, pvcConfig := range dynamoDeployment.Spec.PVCs {
if pvcConfig.Name == nil || *pvcConfig.Name == "" {
logger.Error(nil, "PVC not reconcilable: name is required", "pvcConfig", pvcConfig)
continue
}
pvcName := *pvcConfig.Name
logger.Info("Reconciling top-level PVC", "pvcName", pvcName, "namespace", dynamoDeployment.Namespace)
_, err := r.reconcilePVC(ctx, dynamoDeployment, pvcName, pvcConfig)
if err != nil {
return err
}
}
return nil
}
func (r *DynamoGraphDeploymentReconciler) FinalizeResource(ctx context.Context, dynamoDeployment *nvidiacomv1alpha1.DynamoGraphDeployment) error {
// for now doing nothing
return nil
......@@ -427,6 +499,13 @@ func (r *DynamoGraphDeploymentReconciler) SetupWithManager(mgr ctrl.Manager) err
UpdateFunc: func(de event.UpdateEvent) bool { return true },
GenericFunc: func(ge event.GenericEvent) bool { return true },
})).
Owns(&corev1.PersistentVolumeClaim{}, builder.WithPredicates(predicate.Funcs{
// ignore creation cause we don't want to be called again after we create the PVC
CreateFunc: func(ce event.CreateEvent) bool { return false },
DeleteFunc: func(de event.DeleteEvent) bool { return true },
UpdateFunc: func(de event.UpdateEvent) bool { return true },
GenericFunc: func(ge event.GenericEvent) bool { return true },
})).
WithEventFilter(commonController.EphemeralDeploymentEventFilter(r.Config))
if r.Config.Grove.Enabled {
ctrlBuilder = ctrlBuilder.Owns(&grovev1alpha1.PodCliqueSet{}, builder.WithPredicates(predicate.Funcs{
......
......@@ -7,6 +7,7 @@ import (
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
corev1 "k8s.io/api/core/v1"
"sigs.k8s.io/controller-runtime/pkg/log"
)
const (
......@@ -26,6 +27,20 @@ func isPythonCommand(cmd string) bool {
}
func (b *SGLangBackend) UpdateContainer(container *corev1.Container, numberOfNodes int32, role Role, component *v1alpha1.DynamoComponentDeploymentOverridesSpec, serviceName string, multinodeDeployer MultinodeDeployer) {
// Check for volumeMounts with useAsCompilationCache=true
for _, volumeMount := range component.VolumeMounts {
if volumeMount.UseAsCompilationCache {
logger := log.Log.WithName("sglang-backend")
logger.Info("Compilation cache configured for SGLang but not yet fully supported",
"backend", "sglang",
"status", "partial-support",
"cache-dir", volumeMount.MountPoint,
"use-as-compilation-cache", true,
"env-vars-set", false,
"next-steps", "upstream SGLang changes needed")
}
}
// For single node, nothing to do
if numberOfNodes <= 1 {
return
......
......@@ -301,7 +301,6 @@ func TestSGLangBackend_ShellCommandInjection(t *testing.T) {
t.Errorf("UpdateContainer() args = %v, want %v", container.Args, tt.expectedArgs)
}
// Verify command is still sh -c for shell commands
expectedCommand := tt.initialCommand
if !reflect.DeepEqual(container.Command, expectedCommand) {
t.Errorf("UpdateContainer() should preserve shell command, got: %v, want: %v", container.Command, expectedCommand)
......@@ -438,7 +437,6 @@ func TestSGLangBackend_ProbeRemoval(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Create initial probes
livenessProbe := &corev1.Probe{InitialDelaySeconds: 30}
readinessProbe := &corev1.Probe{InitialDelaySeconds: 10}
startupProbe := &corev1.Probe{InitialDelaySeconds: 5}
......@@ -476,3 +474,101 @@ func TestSGLangBackend_ProbeRemoval(t *testing.T) {
})
}
}
func TestSGLangBackend_UpdateContainer_UseAsCompilationCache(t *testing.T) {
backend := &SGLangBackend{}
tests := []struct {
name string
component *v1alpha1.DynamoComponentDeploymentOverridesSpec
volumeMounts []corev1.VolumeMount
expectNoEnvVarChanges bool
expectLoggedPartialSupport bool
}{
{
name: "SGLang backend with useAsCompilationCache volume mount",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
VolumeMounts: []v1alpha1.VolumeMount{
{
Name: "sglang-cache",
MountPoint: "/cache/sglang",
UseAsCompilationCache: true,
},
},
},
},
volumeMounts: []corev1.VolumeMount{},
expectNoEnvVarChanges: true, // SGLang doesn't set env vars yet
expectLoggedPartialSupport: true,
},
{
name: "SGLang backend with useAsCompilationCache at custom volume mount",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
VolumeMounts: []v1alpha1.VolumeMount{
{
Name: "custom-cache",
MountPoint: "/custom/cache/path",
UseAsCompilationCache: true,
},
},
},
},
volumeMounts: []corev1.VolumeMount{},
expectNoEnvVarChanges: true, // SGLang doesn't set env vars yet
expectLoggedPartialSupport: true,
},
{
name: "SGLang backend without useAsCompilationCache volume mount",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
VolumeMounts: []v1alpha1.VolumeMount{
{
Name: "regular-volume",
MountPoint: "/data",
},
},
},
},
volumeMounts: []corev1.VolumeMount{},
expectNoEnvVarChanges: true,
expectLoggedPartialSupport: false,
},
{
name: "SGLang backend with no volume mounts",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
VolumeMounts: nil,
},
},
volumeMounts: []corev1.VolumeMount{},
expectNoEnvVarChanges: true,
expectLoggedPartialSupport: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Create a container with initial state including volume mounts
container := &corev1.Container{
Env: []corev1.EnvVar{},
VolumeMounts: tt.volumeMounts,
}
// Store original env vars for comparison
originalEnvCount := len(container.Env)
// Call UpdateContainer (single node to avoid multinode logic)
backend.UpdateContainer(container, 1, RoleMain, tt.component, "test-service", &GroveMultinodeDeployer{})
if tt.expectNoEnvVarChanges {
// Check that no new environment variables were added
if len(container.Env) != originalEnvCount {
t.Errorf("Expected no environment variable changes, but env count changed from %d to %d", originalEnvCount, len(container.Env))
}
}
})
}
}
......@@ -11,6 +11,7 @@ import (
commonconsts "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/intstr"
"sigs.k8s.io/controller-runtime/pkg/log"
)
type TRTLLMBackend struct {
......@@ -18,6 +19,19 @@ type TRTLLMBackend struct {
}
func (b *TRTLLMBackend) UpdateContainer(container *corev1.Container, numberOfNodes int32, role Role, component *v1alpha1.DynamoComponentDeploymentOverridesSpec, serviceName string, multinodeDeployer MultinodeDeployer) {
// Check for volumeMounts with useAsCompilationCache=true
for _, volumeMount := range component.VolumeMounts {
if volumeMount.UseAsCompilationCache {
logger := log.Log.WithName("trtllm-backend")
logger.Info("Compilation cache configured for TensorRT-LLM but not yet fully supported",
"backend", "trtllm",
"status", "partial-support",
"use-as-compilation-cache", true,
"env-vars-set", false,
"next-steps", "upstream TensorRT-LLM changes needed")
}
}
// For single node, nothing to do
if numberOfNodes <= 1 {
return
......
......@@ -143,10 +143,8 @@ func TestTRTLLMBackend_UpdateContainer(t *testing.T) {
StartupProbe: &corev1.Probe{},
}
// Call UpdateContainer
backend.UpdateContainer(container, tt.numberOfNodes, tt.role, tt.component, "test-service", tt.multinodeDeployer)
// Use helper functions to validate results
validateVolumeMounts(t, container, tt.expectedVolumeMounts)
validateCommand(t, container, tt.expectedCommand)
validateArgs(t, container, tt.expectedArgs)
......@@ -816,3 +814,100 @@ func TestTRTLLMBackend_getGPUsPerNode(t *testing.T) {
})
}
}
func TestTRTLLMBackend_UpdateContainer_UseAsCompilationCache(t *testing.T) {
backend := &TRTLLMBackend{}
tests := []struct {
name string
component *v1alpha1.DynamoComponentDeploymentOverridesSpec
volumeMounts []corev1.VolumeMount
expectNoEnvVarChanges bool
expectLoggedPartialSupport bool
}{
{
name: "TensorRT-LLM backend with useAsCompilationCache volume mount",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
VolumeMounts: []v1alpha1.VolumeMount{
{
Name: "trtllm-cache",
MountPoint: "/cache/trtllm",
UseAsCompilationCache: true,
},
},
},
},
volumeMounts: []corev1.VolumeMount{},
expectNoEnvVarChanges: true, // TensorRT-LLM doesn't set env vars yet
expectLoggedPartialSupport: true,
},
{
name: "TensorRT-LLM backend with useAsCompilationCache at custom mount point",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
VolumeMounts: []v1alpha1.VolumeMount{
{
Name: "custom-cache",
MountPoint: "/custom/cache/path",
UseAsCompilationCache: true,
},
},
},
},
volumeMounts: []corev1.VolumeMount{},
expectNoEnvVarChanges: true, // TensorRT-LLM doesn't set env vars yet
expectLoggedPartialSupport: true,
},
{
name: "TensorRT-LLM backend without useAsCompilationCache",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
VolumeMounts: []v1alpha1.VolumeMount{
{
Name: "regular-volume",
MountPoint: "/data",
},
},
},
},
volumeMounts: []corev1.VolumeMount{},
expectNoEnvVarChanges: true,
expectLoggedPartialSupport: false,
},
{
name: "TensorRT-LLM backend with no volume mounts",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
VolumeMounts: nil,
},
},
volumeMounts: []corev1.VolumeMount{},
expectNoEnvVarChanges: true,
expectLoggedPartialSupport: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Create a container with initial state including volume mounts
container := &corev1.Container{
Env: []corev1.EnvVar{},
VolumeMounts: tt.volumeMounts,
}
// Store original env vars for comparison
originalEnvCount := len(container.Env)
// Call UpdateContainer (single node to avoid multinode logic)
backend.UpdateContainer(container, 1, RoleMain, tt.component, "test-service", &GroveMultinodeDeployer{})
if tt.expectNoEnvVarChanges {
// Check that no new environment variables were added
if len(container.Env) != originalEnvCount {
t.Errorf("Expected no environment variable changes, but env count changed from %d to %d", originalEnvCount, len(container.Env))
}
}
})
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment