feat: Add vLLM compilation cache to Dynamo Operator (#3257)

Signed-off-by: Michael Shin <michaelshin@users.noreply.github.com>

feat: Add vLLM compilation cache to Dynamo Operator (#3257)
Signed-off-by: Michael Shin <michaelshin@users.noreply.github.com>
fcc42624 · Michael Shin · GitHub · 02c822d6 · fcc42624 · fcc42624
Unverified Commit fcc42624 authored Oct 01, 2025 by Michael Shin Committed by GitHub Oct 01, 2025
13 changed files
--- a/deploy/cloud/operator/internal/dynamo/backend_vllm.go
+++ b/deploy/cloud/operator/internal/dynamo/backend_vllm.go
@@ -6,6 +6,7 @@ import (

 	"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
 	corev1 "k8s.io/api/core/v1"
+	"sigs.k8s.io/controller-runtime/pkg/log"
 )

 const (
@@ -28,6 +29,35 @@ func (b *VLLMBackend) UpdateContainer(container *corev1.Container, numberOfNodes
 			container.StartupProbe = nil
 		}
 	}
+
+	// Set compilation cache environment variables for VLLM
+	cacheDir := ""
+
+	// Check for volumeMounts with useAsCompilationCache=true
+	for _, volumeMount := range component.VolumeMounts {
+		if volumeMount.UseAsCompilationCache {
+			cacheDir = volumeMount.MountPoint
+			break
+		}
+	}
+
+	if cacheDir != "" {
+		// Set VLLM cache directory using the environment variable
+		container.Env = append(container.Env, corev1.EnvVar{
+			Name:  "VLLM_CACHE_ROOT",
+			Value: cacheDir,
+		})
+
+		// Log confirmation that compilation cache is configured for VLLM
+		logger := log.Log.WithName("vllm-backend")
+		logger.Info("Compilation cache configured and enabled for VLLM backend",
+			"backend", "vllm",
+			"status", "fully-supported",
+			"cache-dir", cacheDir,
+			"use-as-compilation-cache", true,
+			"env-vars-set", true,
+			"env-vars", "VLLM_CACHE_ROOT")
+	}
 }

 func (b *VLLMBackend) UpdatePodSpec(podSpec *corev1.PodSpec, numberOfNodes int32, role Role, component *v1alpha1.DynamoComponentDeploymentOverridesSpec, serviceName string) {

--- a/deploy/cloud/operator/internal/dynamo/backend_vllm_test.go
+++ b/deploy/cloud/operator/internal/dynamo/backend_vllm_test.go
@@ -126,6 +126,118 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) {
 	}
 }

+func TestVLLMBackend_UpdateContainer_UseAsCompilationCache(t *testing.T) {
+	backend := &VLLMBackend{}
+
+	tests := []struct {
+		name                  string
+		component             *v1alpha1.DynamoComponentDeploymentOverridesSpec
+		volumeMounts          []corev1.VolumeMount
+		expectCacheEnvVar     bool
+		expectCacheEnvVarName string
+		expectCacheEnvVarVal  string
+	}{
+		{
+			name: "VLLM backend with useAsCompilationCache volume mount",
+			component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
+				DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
+					VolumeMounts: []v1alpha1.VolumeMount{
+						{
+							Name:                  "vllm-cache",
+							MountPoint:            "/root/.cache/vllm",
+							UseAsCompilationCache: true,
+						},
+					},
+				},
+			},
+			volumeMounts:          []corev1.VolumeMount{},
+			expectCacheEnvVar:     true,
+			expectCacheEnvVarName: "VLLM_CACHE_ROOT",
+			expectCacheEnvVarVal:  "/root/.cache/vllm",
+		},
+		{
+			name: "VLLM backend with useAsCompilationCache at custom mount point",
+			component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
+				DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
+					VolumeMounts: []v1alpha1.VolumeMount{
+						{
+							Name:                  "custom-cache",
+							MountPoint:            "/custom/cache/path",
+							UseAsCompilationCache: true,
+						},
+					},
+				},
+			},
+			volumeMounts:          []corev1.VolumeMount{},
+			expectCacheEnvVar:     true,
+			expectCacheEnvVarName: "VLLM_CACHE_ROOT",
+			expectCacheEnvVarVal:  "/custom/cache/path",
+		},
+		{
+			name: "VLLM backend without useAsCompilationCache",
+			component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
+				DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
+					VolumeMounts: []v1alpha1.VolumeMount{
+						{
+							Name:       "regular-volume",
+							MountPoint: "/data",
+						},
+					},
+				},
+			},
+			volumeMounts:      []corev1.VolumeMount{},
+			expectCacheEnvVar: false,
+		},
+		{
+			name: "VLLM backend with no volume mounts",
+			component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
+				DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
+					VolumeMounts: nil,
+				},
+			},
+			volumeMounts:      []corev1.VolumeMount{},
+			expectCacheEnvVar: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			g := gomega.NewGomegaWithT(t)
+
+			// Create a container with initial state including volume mounts
+			container := &corev1.Container{
+				Env:          []corev1.EnvVar{},
+				VolumeMounts: tt.volumeMounts,
+			}
+
+			// Call UpdateContainer
+			backend.UpdateContainer(container, 1, RoleMain, tt.component, "test-service", &GroveMultinodeDeployer{})
+
+			if tt.expectCacheEnvVar {
+				// Check that the VLLM_CACHE_ROOT environment variable is set
+				found := false
+				for _, env := range container.Env {
+					if env.Name == tt.expectCacheEnvVarName {
+						found = true
+						g.Expect(env.Value).To(gomega.Equal(tt.expectCacheEnvVarVal))
+						break
+					}
+				}
+				if !found {
+					t.Errorf("Expected environment variable %s not found in container", tt.expectCacheEnvVarName)
+				}
+			} else {
+				// Check that no cache environment variable is set
+				for _, env := range container.Env {
+					if env.Name == "VLLM_CACHE_ROOT" {
+						t.Errorf("Unexpected environment variable VLLM_CACHE_ROOT found: %s", env.Value)
+					}
+				}
+			}
+		})
+	}
+}
+
 func TestUpdateVLLMMultinodeArgs(t *testing.T) {
 	tests := []struct {
 		name              string

--- a/deploy/cloud/operator/internal/dynamo/graph.go
+++ b/deploy/cloud/operator/internal/dynamo/graph.go
@@ -781,18 +781,37 @@ func GenerateBasePodSpec(
 	addStandardEnvVars(&container, controllerConfig)

 	var volumes []corev1.Volume
-	if component.PVC != nil {
+
+	for _, volumeMount := range component.VolumeMounts {
+		if volumeMount.Name == "" {
+			return nil, fmt.Errorf("volumeMount.name is required when volumeMounts is set")
+		}
+
+		// Determine mount point
+		mountPoint := volumeMount.MountPoint
+		if volumeMount.UseAsCompilationCache && mountPoint == "" {
+			// Use backend-specific default for compilation cache
+			defaultMountPoint := getDefaultCompilationCacheMountPoint(backendFramework)
+			if defaultMountPoint == "" {
+				return nil, fmt.Errorf("volumeMount with useAsCompilationCache=true requires an explicit mountPoint for backend framework %s (no default available)", backendFramework)
+			}
+			mountPoint = defaultMountPoint
+		} else if !volumeMount.UseAsCompilationCache && mountPoint == "" {
+			return nil, fmt.Errorf("volumeMount.mountPoint is required when useAsCompilationCache is false")
+		}
+
 		volumes = append(volumes, corev1.Volume{
-			Name: *component.PVC.Name,
+			Name: volumeMount.Name,
 			VolumeSource: corev1.VolumeSource{
 				PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{
-					ClaimName: *component.PVC.Name,
+					ClaimName: volumeMount.Name,
 				},
 			},
 		})
+
 		container.VolumeMounts = append(container.VolumeMounts, corev1.VolumeMount{
-			Name:      *component.PVC.Name,
-			MountPath: *component.PVC.MountPoint,
+			Name:      volumeMount.Name,
+			MountPath: mountPoint,
 		})
 	}
 	if shmVol, shmMount := generateSharedMemoryVolumeAndMount(component.SharedMemory); shmVol != nil && shmMount != nil {
@@ -1154,8 +1173,8 @@ func ConvertDynamoComponentDeploymentToSpec(dynComponent *v1alpha1.DynamoCompone
 	}
 }

-// getBackendFrameworkFromDynamoComponent determines backend framework for a DynamoComponentDeployment
-func getBackendFrameworkFromDynamoComponent(dynComponent *v1alpha1.DynamoComponentDeployment) (BackendFramework, error) {
+// GetBackendFrameworkFromDynamoComponent determines backend framework for a DynamoComponentDeployment
+func GetBackendFrameworkFromDynamoComponent(dynComponent *v1alpha1.DynamoComponentDeployment) (BackendFramework, error) {
 	// Extract command/args from component
 	var command, args []string
 	if dynComponent.Spec.ExtraPodSpec != nil && dynComponent.Spec.ExtraPodSpec.MainContainer != nil {
@@ -1189,7 +1208,7 @@ func GenerateBasePodSpecForController(
 	numberOfNodes := componentSpec.GetNumberOfNodes()

 	// Determine backend framework using hybrid approach
-	backendFramework, err := getBackendFrameworkFromDynamoComponent(dynComponent)
+	backendFramework, err := GetBackendFrameworkFromDynamoComponent(dynComponent)
 	if err != nil {
 		return nil, fmt.Errorf("failed to determine backend framework: %w", err)
 	}
@@ -1216,6 +1235,21 @@ func GenerateBasePodSpecForController(
 	return podSpec, nil
 }

+// getDefaultCompilationCacheMountPoint returns the default mount point for compilation cache based on backend framework
+func getDefaultCompilationCacheMountPoint(backendFramework BackendFramework) string {
+	switch backendFramework {
+	case BackendFrameworkVLLM:
+		return commonconsts.DefaultVLLMCacheMountPoint
+	case BackendFrameworkSGLang, BackendFrameworkTRTLLM:
+		// SGLang and TensorRT-LLM don't currently support compilation caches
+		// Return empty string as these should not be used
+		return ""
+	default:
+		// For unknown backends, don't assume compilation cache support
+		return ""
+	}
+}
+
 func generateSharedMemoryVolumeAndMount(spec *v1alpha1.SharedMemorySpec) (*corev1.Volume, *corev1.VolumeMount) {
 	// default: enabled=true, size=8Gi
 	size := resource.MustParse(commonconsts.DefaultSharedMemorySize)

--- a/deploy/cloud/operator/internal/dynamo/graph_test.go
+++ b/deploy/cloud/operator/internal/dynamo/graph_test.go
@@ -1200,9 +1200,11 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
 											Value: "2",
 										},
 									},
-									PVC: &v1alpha1.PVC{
-										Name:       &[]string{"planner-pvc"}[0],
-										MountPoint: &[]string{"/planner"}[0],
+									VolumeMounts: []v1alpha1.VolumeMount{
+										{
+											Name:       "planner-pvc",
+											MountPoint: "/planner",
+										},
 									},
 									EnvFromSecret: &[]string{"planner-secret"}[0],
 									LivenessProbe: &corev1.Probe{
@@ -1719,9 +1721,11 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
 											Value: "2",
 										},
 									},
-									PVC: &v1alpha1.PVC{
-										Name:       &[]string{"planner-pvc"}[0],
-										MountPoint: &[]string{"/planner"}[0],
+									VolumeMounts: []v1alpha1.VolumeMount{
+										{
+											Name:       "planner-pvc",
+											MountPoint: "/planner",
+										},
 									},
 									EnvFromSecret: &[]string{"planner-secret"}[0],
 									LivenessProbe: &corev1.Probe{
@@ -2517,9 +2521,11 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
 											Value: "2",
 										},
 									},
-									PVC: &v1alpha1.PVC{
-										Name:       &[]string{"planner-pvc"}[0],
-										MountPoint: &[]string{"/planner"}[0],
+									VolumeMounts: []v1alpha1.VolumeMount{
+										{
+											Name:       "planner-pvc",
+											MountPoint: "/planner",
+										},
 									},
 									EnvFromSecret: &[]string{"planner-secret"}[0],
 									LivenessProbe: &corev1.Probe{
@@ -4760,3 +4766,344 @@ func TestGenerateBasePodSpec_Worker(t *testing.T) {
 		})
 	}
 }
+
+func TestGenerateBasePodSpec_VolumeMounts(t *testing.T) {
+	secretsRetriever := &mockSecretsRetriever{}
+	controllerConfig := controller_common.Config{}
+
+	tests := []struct {
+		name           string
+		component      *v1alpha1.DynamoComponentDeploymentOverridesSpec
+		expectError    bool
+		expectedPVCs   []string
+		expectedMounts []corev1.VolumeMount
+	}{
+		{
+			name: "valid volumeMounts",
+			component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
+				DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
+					ComponentType: commonconsts.ComponentTypeFrontend,
+					VolumeMounts: []v1alpha1.VolumeMount{
+						{
+							Name:       "test-pvc",
+							MountPoint: "/data",
+						},
+					},
+				},
+			},
+			expectError:  false,
+			expectedPVCs: []string{"test-pvc"},
+			expectedMounts: []corev1.VolumeMount{
+				{Name: "test-pvc", MountPath: "/data"},
+				{Name: "shared-memory", MountPath: "/dev/shm"},
+			},
+		},
+		{
+			name: "multiple volumeMounts",
+			component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
+				DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
+					ComponentType: commonconsts.ComponentTypeFrontend,
+					VolumeMounts: []v1alpha1.VolumeMount{
+						{Name: "pvc1", MountPoint: "/data1"},
+						{Name: "pvc2", MountPoint: "/data2"},
+					},
+				},
+			},
+			expectError:  false,
+			expectedPVCs: []string{"pvc1", "pvc2"},
+			expectedMounts: []corev1.VolumeMount{
+				{Name: "pvc1", MountPath: "/data1"},
+				{Name: "pvc2", MountPath: "/data2"},
+				{Name: "shared-memory", MountPath: "/dev/shm"},
+			},
+		},
+		{
+			name: "empty volumeMount name",
+			component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
+				DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
+					ComponentType: commonconsts.ComponentTypeFrontend,
+					VolumeMounts: []v1alpha1.VolumeMount{
+						{Name: "", MountPoint: "/data"},
+					},
+				},
+			},
+			expectError: true,
+		},
+		{
+			name: "empty volumeMount mountPoint",
+			component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
+				DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
+					ComponentType: commonconsts.ComponentTypeFrontend,
+					VolumeMounts: []v1alpha1.VolumeMount{
+						{Name: "test-pvc", MountPoint: ""},
+					},
+				},
+			},
+			expectError: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			podSpec, err := GenerateBasePodSpec(
+				tt.component,
+				BackendFrameworkVLLM,
+				secretsRetriever,
+				"test-deployment",
+				"default",
+				RoleMain,
+				1,
+				controllerConfig,
+				commonconsts.MultinodeDeploymentTypeGrove,
+				"test-service",
+			)
+
+			if tt.expectError {
+				if err == nil {
+					t.Errorf("GenerateBasePodSpec() expected error, got nil")
+				}
+				return
+			}
+
+			if err != nil {
+				t.Errorf("GenerateBasePodSpec() unexpected error: %v", err)
+				return
+			}
+
+			// Check expected PVCs are present in volumes
+			for _, expectedPVC := range tt.expectedPVCs {
+				found := false
+				for _, volume := range podSpec.Volumes {
+					if volume.Name == expectedPVC && volume.PersistentVolumeClaim != nil {
+						if volume.PersistentVolumeClaim.ClaimName == expectedPVC {
+							found = true
+							break
+						}
+					}
+				}
+				if !found {
+					t.Errorf("GenerateBasePodSpec() expected PVC volume %s not found", expectedPVC)
+				}
+			}
+
+			// Check expected mounts are present
+			if len(podSpec.Containers) == 0 {
+				t.Errorf("GenerateBasePodSpec() no containers found")
+				return
+			}
+
+			container := podSpec.Containers[0]
+			for _, expectedMount := range tt.expectedMounts {
+				found := false
+				for _, mount := range container.VolumeMounts {
+					if mount.Name == expectedMount.Name && mount.MountPath == expectedMount.MountPath {
+						found = true
+						break
+					}
+				}
+				if !found {
+					t.Errorf("GenerateBasePodSpec() expected volume mount %+v not found", expectedMount)
+				}
+			}
+		})
+	}
+}
+
+func TestGenerateBasePodSpec_UseAsCompilationCache_BackendSupport(t *testing.T) {
+	secretsRetriever := &mockSecretsRetriever{}
+	controllerConfig := controller_common.Config{}
+
+	tests := []struct {
+		name             string
+		component        *v1alpha1.DynamoComponentDeploymentOverridesSpec
+		backendFramework BackendFramework
+		expectError      bool
+		expectedMount    *corev1.VolumeMount
+	}{
+		{
+			name: "useAsCompilationCache with custom mount point",
+			component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
+				DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
+					ComponentType: commonconsts.ComponentTypeFrontend,
+					VolumeMounts: []v1alpha1.VolumeMount{
+						{
+							Name:                  "cache-pvc",
+							MountPoint:            "/custom/cache",
+							UseAsCompilationCache: true,
+						},
+					},
+				},
+			},
+			backendFramework: BackendFrameworkVLLM,
+			expectError:      false,
+			expectedMount:    &corev1.VolumeMount{Name: "cache-pvc", MountPath: "/custom/cache"},
+		},
+		{
+			name: "useAsCompilationCache with default mount point for VLLM",
+			component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
+				DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
+					ComponentType: commonconsts.ComponentTypeFrontend,
+					VolumeMounts: []v1alpha1.VolumeMount{
+						{
+							Name:                  "cache-pvc",
+							UseAsCompilationCache: true,
+						},
+					},
+				},
+			},
+			backendFramework: BackendFrameworkVLLM,
+			expectError:      false,
+			expectedMount:    &corev1.VolumeMount{Name: "cache-pvc", MountPath: commonconsts.DefaultVLLMCacheMountPoint},
+		},
+		{
+			name: "useAsCompilationCache without mount point for SGLang - should error",
+			component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
+				DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
+					ComponentType: commonconsts.ComponentTypeFrontend,
+					VolumeMounts: []v1alpha1.VolumeMount{
+						{
+							Name:                  "cache-pvc",
+							UseAsCompilationCache: true,
+						},
+					},
+				},
+			},
+			backendFramework: BackendFrameworkSGLang,
+			expectError:      true, // SGLang doesn't support compilation cache, requires explicit mount point
+			expectedMount:    nil,
+		},
+		{
+			name: "useAsCompilationCache with explicit mount point for SGLang - should work",
+			component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
+				DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
+					ComponentType: commonconsts.ComponentTypeFrontend,
+					VolumeMounts: []v1alpha1.VolumeMount{
+						{
+							Name:                  "cache-pvc",
+							MountPoint:            "/custom/sglang/cache",
+							UseAsCompilationCache: true,
+						},
+					},
+				},
+			},
+			backendFramework: BackendFrameworkSGLang,
+			expectError:      false,
+			expectedMount:    &corev1.VolumeMount{Name: "cache-pvc", MountPath: "/custom/sglang/cache"},
+		},
+		{
+			name: "useAsCompilationCache without mount point for TensorRT-LLM - should error",
+			component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
+				DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
+					ComponentType: commonconsts.ComponentTypeFrontend,
+					VolumeMounts: []v1alpha1.VolumeMount{
+						{
+							Name:                  "cache-pvc",
+							UseAsCompilationCache: true,
+						},
+					},
+				},
+			},
+			backendFramework: BackendFrameworkTRTLLM,
+			expectError:      true, // TensorRT-LLM doesn't support compilation cache, requires explicit mount point
+			expectedMount:    nil,
+		},
+		{
+			name: "useAsCompilationCache with explicit mount point for TensorRT-LLM - should work",
+			component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
+				DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
+					ComponentType: commonconsts.ComponentTypeFrontend,
+					VolumeMounts: []v1alpha1.VolumeMount{
+						{
+							Name:                  "cache-pvc",
+							MountPoint:            "/custom/trtllm/cache",
+							UseAsCompilationCache: true,
+						},
+					},
+				},
+			},
+			backendFramework: BackendFrameworkTRTLLM,
+			expectError:      false,
+			expectedMount:    &corev1.VolumeMount{Name: "cache-pvc", MountPath: "/custom/trtllm/cache"},
+		},
+		{
+			name: "no useAsCompilationCache volumes - should be ignored",
+			component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
+				DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
+					ComponentType: commonconsts.ComponentTypeFrontend,
+					VolumeMounts: []v1alpha1.VolumeMount{
+						{
+							Name:       "regular-pvc",
+							MountPoint: "/data",
+						},
+					},
+				},
+			},
+			backendFramework: BackendFrameworkVLLM,
+			expectError:      false,
+			expectedMount:    nil, // Should be ignored, not error
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			podSpec, err := GenerateBasePodSpec(
+				tt.component,
+				tt.backendFramework,
+				secretsRetriever,
+				"test-deployment",
+				"default",
+				RoleMain,
+				1,
+				controllerConfig,
+				commonconsts.MultinodeDeploymentTypeGrove,
+				"test-service",
+			)
+
+			if tt.expectError {
+				if err == nil {
+					t.Errorf("GenerateBasePodSpec() expected error, got nil")
+				}
+				return
+			}
+
+			if err != nil {
+				t.Errorf("GenerateBasePodSpec() unexpected error: %v", err)
+				return
+			}
+
+			if tt.expectedMount != nil {
+				// Check PVC volume exists
+				found := false
+				for _, volume := range podSpec.Volumes {
+					if volume.Name == tt.expectedMount.Name && volume.PersistentVolumeClaim != nil {
+						if volume.PersistentVolumeClaim.ClaimName == tt.expectedMount.Name {
+							found = true
+							break
+						}
+					}
+				}
+				if !found {
+					t.Errorf("GenerateBasePodSpec() expected PVC volume %s not found", tt.expectedMount.Name)
+				}
+
+				// Check volume mount exists
+				if len(podSpec.Containers) == 0 {
+					t.Errorf("GenerateBasePodSpec() no containers found")
+					return
+				}
+
+				container := podSpec.Containers[0]
+				found = false
+				for _, mount := range container.VolumeMounts {
+					if mount.Name == tt.expectedMount.Name && mount.MountPath == tt.expectedMount.MountPath {
+						found = true
+						break
+					}
+				}
+				if !found {
+					t.Errorf("GenerateBasePodSpec() expected volume mount %+v not found", tt.expectedMount)
+				}
+			}
+		})
+	}
+}
--- a/docs/kubernetes/fluxcd.md
+++ b/docs/kubernetes/fluxcd.md
@@ -30,6 +30,9 @@ kind: DynamoGraphDeployment
 metadata:
  name: llm-agg
 spec:
+  pvcs:
+    - name: vllm-model-storage
+      size: 100Gi
  services:
    Frontend:
      replicas: 1
@@ -47,10 +50,9 @@ spec:
      - name: SPECIFIC_ENV_VAR
        value: some_specific_value
      # Add PVC for model storage
-      pvc:
-        name: vllm-model-storage
-        mountPath: /models
-        size: 100Gi
+      volumeMounts:
+        - name: vllm-model-storage
+          mountPoint: /models
 ```

 Commit and push this file to your Git repository. FluxCD will detect the new CR and create the initial Dynamo deployment in your cluster.

--- a/docs/kubernetes/installation_guide.md
+++ b/docs/kubernetes/installation_guide.md
@@ -151,9 +151,9 @@ helm upgrade --install dynamo-crds ./crds/ --namespace default
 helm dep build ./platform/
 helm install dynamo-platform ./platform/ \
  --namespace ${NAMESPACE} \
-  --set dynamo-operator.controllerManager.manager.image.repository=${DOCKER_SERVER}/dynamo-operator \
-  --set dynamo-operator.controllerManager.manager.image.tag=${IMAGE_TAG} \
-  --set dynamo-operator.imagePullSecrets[0].name=docker-imagepullsecret
+  --set "dynamo-operator.controllerManager.manager.image.repository=${DOCKER_SERVER}/dynamo-operator" \
+  --set "dynamo-operator.controllerManager.manager.image.tag=${IMAGE_TAG}" \
+  --set "dynamo-operator.imagePullSecrets[0].name=docker-imagepullsecret"
 ```

 → [Verify Installation](#verify-installation)

--- a/docs/kubernetes/model_caching_with_fluid.md
+++ b/docs/kubernetes/model_caching_with_fluid.md
@@ -178,6 +178,8 @@ kind: DynamoGraphDeployment
 metadata:
  name: model-caching
 spec:
+  pvcs:
+    - name: s3-model
  envs:
    - name: HF_HOME
      value: /model
@@ -185,13 +187,13 @@ spec:
      value: '{"Common": {"model": "/model", ...}}'
  services:
    VllmWorker:
-      pvc:
-        name: s3-model
-        mountPoint: /model
+      volumeMounts:
+        - name: s3-model
+          mountPoint: /model
    Processor:
-      pvc:
-        name: s3-model
-        mountPoint: /model
+      volumeMounts:
+        - name: s3-model
+          mountPoint: /model
 ```


@@ -286,15 +288,17 @@ spec:
      16384, "enable-prefix-caching": true, "ServiceArgs": {"workers": 1, "resources":
      {"gpu": "4", "memory": "40Gi"}}, "common-configs": ["model", "block-size", "max-model-len"]},
      "Planner": {"environment": "kubernetes", "no-operation": true}}'
+  pvcs:
+    - name: llama-3-3-70b-instruct-model
  services:
    Processor:
-      pvc:
-        mountPoint: /model
-        name: llama-3-3-70b-instruct-model
+      volumeMounts:
+        - name: llama-3-3-70b-instruct-model
+          mountPoint: /model
    VllmWorker:
-      pvc:
-        mountPoint: /model
-        name: llama-3-3-70b-instruct-model
+      volumeMounts:
+        - name: llama-3-3-70b-instruct-model
+          mountPoint: /model
      extraPodSpec:
        affinity:
          nodeAffinity:

--- a/recipes/deepseek-r1/sglang-wideep/tep16p-dep16d-disagg.yaml
+++ b/recipes/deepseek-r1/sglang-wideep/tep16p-dep16d-disagg.yaml
@@ -6,6 +6,9 @@ kind: DynamoGraphDeployment
 metadata:
  name: sgl-dsr1-16gpu
 spec:
+  pvcs:
+    - name: model-cache
+      create: false
  services:
    Frontend:
      dynamoNamespace: sgl-dsr1-16gpu
@@ -30,10 +33,9 @@ spec:
      resources:
        limits:
          gpu: "8"
-      pvc:
-        create: false
-        name: model-cache
-        mountPoint: /root/.cache/huggingface
+      volumeMounts:
+        - name: model-cache
+          mountPoint: /root/.cache/huggingface
      sharedMemory:
        size: 80Gi
      extraPodSpec:
@@ -74,10 +76,9 @@ spec:
      resources:
        limits:
          gpu: "8"
-      pvc:
-        create: false
-        name: model-cache
-        mountPoint: /root/.cache/huggingface
+      volumeMounts:
+        - name: model-cache
+          mountPoint: /root/.cache/huggingface
      sharedMemory:
        size: 80Gi
      extraPodSpec:

--- a/recipes/deepseek-r1/sglang-wideep/tep8p-dep8d-disagg.yaml
+++ b/recipes/deepseek-r1/sglang-wideep/tep8p-dep8d-disagg.yaml
@@ -6,6 +6,9 @@ kind: DynamoGraphDeployment
 metadata:
  name: sgl-dsr1-8gpu
 spec:
+  pvcs:
+    - name: model-cache
+      create: false
  services:
    Frontend:
      dynamoNamespace: sgl-dsr1-8gpu
@@ -28,10 +31,9 @@ spec:
      resources:
        limits:
          gpu: "8"
-      pvc:
-        create: false
-        name: model-cache
-        mountPoint: /root/.cache/huggingface
+      volumeMounts:
+        - name: model-cache
+          mountPoint: /root/.cache/huggingface
      sharedMemory:
        size: 80Gi
      extraPodSpec:
@@ -69,10 +71,9 @@ spec:
      resources:
        limits:
          gpu: "8"
-      pvc:
-        create: false
-        name: model-cache
-        mountPoint: /root/.cache/huggingface
+      volumeMounts:
+        - name: model-cache
+          mountPoint: /root/.cache/huggingface
      sharedMemory:
        size: 80Gi
      extraPodSpec:

--- a/recipes/gpt-oss-120b/trtllm/agg/deploy.yaml
+++ b/recipes/gpt-oss-120b/trtllm/agg/deploy.yaml
@@ -6,15 +6,17 @@ metadata:
  name: gpt-oss-agg-shm
 spec:
  backendFramework: trtllm
+  pvcs:
+    - name: model-cache-oss-gpt120b
+      create: false
  services:
    TrtllmWorker:
      componentType: main
      dynamoNamespace: gpt-oss-agg-shm
      envFromSecret: hf-token-secret
-      pvc:
-        create: false
-        name: model-cache-oss-gpt120b
-        mountPoint: /root/.cache/huggingface
+      volumeMounts:
+        - name: model-cache-oss-gpt120b
+          mountPoint: /root/.cache/huggingface
      sharedMemory:
        size: 80Gi
      extraPodSpec:

--- a/recipes/llama-3-70b/vllm/agg/deploy.yaml
+++ b/recipes/llama-3-70b/vllm/agg/deploy.yaml
@@ -6,14 +6,16 @@ metadata:
  name: llama3-70b-agg
 spec:
  backendFramework: vllm
+  pvcs:
+    - name: model-cache
+      create: false
  services:
    Frontend:
      componentType: frontend
      dynamoNamespace: llama3-70b-agg
-      pvc:
-        create: false
-        name: model-cache
-        mountPoint: /root/.cache/huggingface
+      volumeMounts:
+        - name: model-cache
+          mountPoint: /root/.cache/huggingface
      extraPodSpec:
        mainContainer:
          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
@@ -23,10 +25,9 @@ spec:
      componentType: worker
      dynamoNamespace: llama3-70b-agg
      envFromSecret: hf-token-secret
-      pvc:
-        create: false
-        name: model-cache
-        mountPoint: /root/.cache/huggingface
+      volumeMounts:
+        - name: model-cache
+          mountPoint: /root/.cache/huggingface
      sharedMemory:
        size: 20Gi
      extraPodSpec:

--- a/recipes/llama-3-70b/vllm/disagg-multi-node/deploy.yaml
+++ b/recipes/llama-3-70b/vllm/disagg-multi-node/deploy.yaml
@@ -6,14 +6,16 @@ metadata:
  name: llama3-70b-disagg-mn
 spec:
  backendFramework: vllm
+  pvcs:
+    - name: model-cache
+      create: false
  services:
    Frontend:
      componentType: frontend
      dynamoNamespace: llama3-70b-disagg-mn
-      pvc:
-        create: false
-        name: model-cache
-        mountPoint: /root/.cache/huggingface
+      volumeMounts:
+        - name: model-cache
+          mountPoint: /root/.cache/huggingface
      extraPodSpec:
        mainContainer:
          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
@@ -23,10 +25,9 @@ spec:
      componentType: worker
      dynamoNamespace: llama3-70b-disagg-mn
      envFromSecret: hf-token-secret
-      pvc:
-        create: false
-        name: model-cache
-        mountPoint: /root/.cache/huggingface
+      volumeMounts:
+        - name: model-cache
+          mountPoint: /root/.cache/huggingface
      sharedMemory:
        size: 80Gi
      extraPodSpec:
@@ -48,10 +49,9 @@ spec:
      componentType: worker
      dynamoNamespace: llama3-70b-disagg-mn
      envFromSecret: hf-token-secret
-      pvc:
-        create: false
-        name: model-cache
-        mountPoint: /root/.cache/huggingface
+      volumeMounts:
+        - name: model-cache
+          mountPoint: /root/.cache/huggingface
      sharedMemory:
        size: 80Gi
      extraPodSpec:

--- a/recipes/llama-3-70b/vllm/disagg-single-node/deploy.yaml
+++ b/recipes/llama-3-70b/vllm/disagg-single-node/deploy.yaml
@@ -6,14 +6,16 @@ metadata:
  name: llama3-70b-disagg-sn
 spec:
  backendFramework: vllm
+  pvcs:
+    - name: model-cache
+      create: false
  services:
    Frontend:
      componentType: frontend
      dynamoNamespace: llama3-70b-disagg-sn
-      pvc:
-        create: false
-        name: model-cache
-        mountPoint: /root/.cache/huggingface
+      volumeMounts:
+        - name: model-cache
+          mountPoint: /root/.cache/huggingface
      extraPodSpec:
        mainContainer:
          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
@@ -23,10 +25,9 @@ spec:
      componentType: worker
      dynamoNamespace: llama3-70b-disagg-sn
      envFromSecret: hf-token-secret
-      pvc:
-        create: false
-        name: model-cache
-        mountPoint: /root/.cache/huggingface
+      volumeMounts:
+        - name: model-cache
+          mountPoint: /root/.cache/huggingface
      sharedMemory:
        size: 80Gi
      extraPodSpec:
@@ -58,10 +59,9 @@ spec:
      componentType: worker
      dynamoNamespace: llama3-70b-disagg-sn
      envFromSecret: hf-token-secret
-      pvc:
-        create: false
-        name: model-cache
-        mountPoint: /root/.cache/huggingface
+      volumeMounts:
+        - name: model-cache
+          mountPoint: /root/.cache/huggingface
      sharedMemory:
        size: 80Gi
      extraPodSpec: