Unverified Commit fcc42624 authored by Michael Shin's avatar Michael Shin Committed by GitHub
Browse files

feat: Add vLLM compilation cache to Dynamo Operator (#3257)


Signed-off-by: default avatarMichael Shin <michaelshin@users.noreply.github.com>
parent 02c822d6
...@@ -6,6 +6,7 @@ import ( ...@@ -6,6 +6,7 @@ import (
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1" "github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
corev1 "k8s.io/api/core/v1" corev1 "k8s.io/api/core/v1"
"sigs.k8s.io/controller-runtime/pkg/log"
) )
const ( const (
...@@ -28,6 +29,35 @@ func (b *VLLMBackend) UpdateContainer(container *corev1.Container, numberOfNodes ...@@ -28,6 +29,35 @@ func (b *VLLMBackend) UpdateContainer(container *corev1.Container, numberOfNodes
container.StartupProbe = nil container.StartupProbe = nil
} }
} }
// Set compilation cache environment variables for VLLM
cacheDir := ""
// Check for volumeMounts with useAsCompilationCache=true
for _, volumeMount := range component.VolumeMounts {
if volumeMount.UseAsCompilationCache {
cacheDir = volumeMount.MountPoint
break
}
}
if cacheDir != "" {
// Set VLLM cache directory using the environment variable
container.Env = append(container.Env, corev1.EnvVar{
Name: "VLLM_CACHE_ROOT",
Value: cacheDir,
})
// Log confirmation that compilation cache is configured for VLLM
logger := log.Log.WithName("vllm-backend")
logger.Info("Compilation cache configured and enabled for VLLM backend",
"backend", "vllm",
"status", "fully-supported",
"cache-dir", cacheDir,
"use-as-compilation-cache", true,
"env-vars-set", true,
"env-vars", "VLLM_CACHE_ROOT")
}
} }
func (b *VLLMBackend) UpdatePodSpec(podSpec *corev1.PodSpec, numberOfNodes int32, role Role, component *v1alpha1.DynamoComponentDeploymentOverridesSpec, serviceName string) { func (b *VLLMBackend) UpdatePodSpec(podSpec *corev1.PodSpec, numberOfNodes int32, role Role, component *v1alpha1.DynamoComponentDeploymentOverridesSpec, serviceName string) {
......
...@@ -126,6 +126,118 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) { ...@@ -126,6 +126,118 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) {
} }
} }
func TestVLLMBackend_UpdateContainer_UseAsCompilationCache(t *testing.T) {
backend := &VLLMBackend{}
tests := []struct {
name string
component *v1alpha1.DynamoComponentDeploymentOverridesSpec
volumeMounts []corev1.VolumeMount
expectCacheEnvVar bool
expectCacheEnvVarName string
expectCacheEnvVarVal string
}{
{
name: "VLLM backend with useAsCompilationCache volume mount",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
VolumeMounts: []v1alpha1.VolumeMount{
{
Name: "vllm-cache",
MountPoint: "/root/.cache/vllm",
UseAsCompilationCache: true,
},
},
},
},
volumeMounts: []corev1.VolumeMount{},
expectCacheEnvVar: true,
expectCacheEnvVarName: "VLLM_CACHE_ROOT",
expectCacheEnvVarVal: "/root/.cache/vllm",
},
{
name: "VLLM backend with useAsCompilationCache at custom mount point",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
VolumeMounts: []v1alpha1.VolumeMount{
{
Name: "custom-cache",
MountPoint: "/custom/cache/path",
UseAsCompilationCache: true,
},
},
},
},
volumeMounts: []corev1.VolumeMount{},
expectCacheEnvVar: true,
expectCacheEnvVarName: "VLLM_CACHE_ROOT",
expectCacheEnvVarVal: "/custom/cache/path",
},
{
name: "VLLM backend without useAsCompilationCache",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
VolumeMounts: []v1alpha1.VolumeMount{
{
Name: "regular-volume",
MountPoint: "/data",
},
},
},
},
volumeMounts: []corev1.VolumeMount{},
expectCacheEnvVar: false,
},
{
name: "VLLM backend with no volume mounts",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
VolumeMounts: nil,
},
},
volumeMounts: []corev1.VolumeMount{},
expectCacheEnvVar: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
g := gomega.NewGomegaWithT(t)
// Create a container with initial state including volume mounts
container := &corev1.Container{
Env: []corev1.EnvVar{},
VolumeMounts: tt.volumeMounts,
}
// Call UpdateContainer
backend.UpdateContainer(container, 1, RoleMain, tt.component, "test-service", &GroveMultinodeDeployer{})
if tt.expectCacheEnvVar {
// Check that the VLLM_CACHE_ROOT environment variable is set
found := false
for _, env := range container.Env {
if env.Name == tt.expectCacheEnvVarName {
found = true
g.Expect(env.Value).To(gomega.Equal(tt.expectCacheEnvVarVal))
break
}
}
if !found {
t.Errorf("Expected environment variable %s not found in container", tt.expectCacheEnvVarName)
}
} else {
// Check that no cache environment variable is set
for _, env := range container.Env {
if env.Name == "VLLM_CACHE_ROOT" {
t.Errorf("Unexpected environment variable VLLM_CACHE_ROOT found: %s", env.Value)
}
}
}
})
}
}
func TestUpdateVLLMMultinodeArgs(t *testing.T) { func TestUpdateVLLMMultinodeArgs(t *testing.T) {
tests := []struct { tests := []struct {
name string name string
......
...@@ -781,18 +781,37 @@ func GenerateBasePodSpec( ...@@ -781,18 +781,37 @@ func GenerateBasePodSpec(
addStandardEnvVars(&container, controllerConfig) addStandardEnvVars(&container, controllerConfig)
var volumes []corev1.Volume var volumes []corev1.Volume
if component.PVC != nil {
for _, volumeMount := range component.VolumeMounts {
if volumeMount.Name == "" {
return nil, fmt.Errorf("volumeMount.name is required when volumeMounts is set")
}
// Determine mount point
mountPoint := volumeMount.MountPoint
if volumeMount.UseAsCompilationCache && mountPoint == "" {
// Use backend-specific default for compilation cache
defaultMountPoint := getDefaultCompilationCacheMountPoint(backendFramework)
if defaultMountPoint == "" {
return nil, fmt.Errorf("volumeMount with useAsCompilationCache=true requires an explicit mountPoint for backend framework %s (no default available)", backendFramework)
}
mountPoint = defaultMountPoint
} else if !volumeMount.UseAsCompilationCache && mountPoint == "" {
return nil, fmt.Errorf("volumeMount.mountPoint is required when useAsCompilationCache is false")
}
volumes = append(volumes, corev1.Volume{ volumes = append(volumes, corev1.Volume{
Name: *component.PVC.Name, Name: volumeMount.Name,
VolumeSource: corev1.VolumeSource{ VolumeSource: corev1.VolumeSource{
PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{
ClaimName: *component.PVC.Name, ClaimName: volumeMount.Name,
}, },
}, },
}) })
container.VolumeMounts = append(container.VolumeMounts, corev1.VolumeMount{ container.VolumeMounts = append(container.VolumeMounts, corev1.VolumeMount{
Name: *component.PVC.Name, Name: volumeMount.Name,
MountPath: *component.PVC.MountPoint, MountPath: mountPoint,
}) })
} }
if shmVol, shmMount := generateSharedMemoryVolumeAndMount(component.SharedMemory); shmVol != nil && shmMount != nil { if shmVol, shmMount := generateSharedMemoryVolumeAndMount(component.SharedMemory); shmVol != nil && shmMount != nil {
...@@ -1154,8 +1173,8 @@ func ConvertDynamoComponentDeploymentToSpec(dynComponent *v1alpha1.DynamoCompone ...@@ -1154,8 +1173,8 @@ func ConvertDynamoComponentDeploymentToSpec(dynComponent *v1alpha1.DynamoCompone
} }
} }
// getBackendFrameworkFromDynamoComponent determines backend framework for a DynamoComponentDeployment // GetBackendFrameworkFromDynamoComponent determines backend framework for a DynamoComponentDeployment
func getBackendFrameworkFromDynamoComponent(dynComponent *v1alpha1.DynamoComponentDeployment) (BackendFramework, error) { func GetBackendFrameworkFromDynamoComponent(dynComponent *v1alpha1.DynamoComponentDeployment) (BackendFramework, error) {
// Extract command/args from component // Extract command/args from component
var command, args []string var command, args []string
if dynComponent.Spec.ExtraPodSpec != nil && dynComponent.Spec.ExtraPodSpec.MainContainer != nil { if dynComponent.Spec.ExtraPodSpec != nil && dynComponent.Spec.ExtraPodSpec.MainContainer != nil {
...@@ -1189,7 +1208,7 @@ func GenerateBasePodSpecForController( ...@@ -1189,7 +1208,7 @@ func GenerateBasePodSpecForController(
numberOfNodes := componentSpec.GetNumberOfNodes() numberOfNodes := componentSpec.GetNumberOfNodes()
// Determine backend framework using hybrid approach // Determine backend framework using hybrid approach
backendFramework, err := getBackendFrameworkFromDynamoComponent(dynComponent) backendFramework, err := GetBackendFrameworkFromDynamoComponent(dynComponent)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to determine backend framework: %w", err) return nil, fmt.Errorf("failed to determine backend framework: %w", err)
} }
...@@ -1216,6 +1235,21 @@ func GenerateBasePodSpecForController( ...@@ -1216,6 +1235,21 @@ func GenerateBasePodSpecForController(
return podSpec, nil return podSpec, nil
} }
// getDefaultCompilationCacheMountPoint returns the default mount point for compilation cache based on backend framework
func getDefaultCompilationCacheMountPoint(backendFramework BackendFramework) string {
switch backendFramework {
case BackendFrameworkVLLM:
return commonconsts.DefaultVLLMCacheMountPoint
case BackendFrameworkSGLang, BackendFrameworkTRTLLM:
// SGLang and TensorRT-LLM don't currently support compilation caches
// Return empty string as these should not be used
return ""
default:
// For unknown backends, don't assume compilation cache support
return ""
}
}
func generateSharedMemoryVolumeAndMount(spec *v1alpha1.SharedMemorySpec) (*corev1.Volume, *corev1.VolumeMount) { func generateSharedMemoryVolumeAndMount(spec *v1alpha1.SharedMemorySpec) (*corev1.Volume, *corev1.VolumeMount) {
// default: enabled=true, size=8Gi // default: enabled=true, size=8Gi
size := resource.MustParse(commonconsts.DefaultSharedMemorySize) size := resource.MustParse(commonconsts.DefaultSharedMemorySize)
......
...@@ -1200,9 +1200,11 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) { ...@@ -1200,9 +1200,11 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
Value: "2", Value: "2",
}, },
}, },
PVC: &v1alpha1.PVC{ VolumeMounts: []v1alpha1.VolumeMount{
Name: &[]string{"planner-pvc"}[0], {
MountPoint: &[]string{"/planner"}[0], Name: "planner-pvc",
MountPoint: "/planner",
},
}, },
EnvFromSecret: &[]string{"planner-secret"}[0], EnvFromSecret: &[]string{"planner-secret"}[0],
LivenessProbe: &corev1.Probe{ LivenessProbe: &corev1.Probe{
...@@ -1719,9 +1721,11 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) { ...@@ -1719,9 +1721,11 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
Value: "2", Value: "2",
}, },
}, },
PVC: &v1alpha1.PVC{ VolumeMounts: []v1alpha1.VolumeMount{
Name: &[]string{"planner-pvc"}[0], {
MountPoint: &[]string{"/planner"}[0], Name: "planner-pvc",
MountPoint: "/planner",
},
}, },
EnvFromSecret: &[]string{"planner-secret"}[0], EnvFromSecret: &[]string{"planner-secret"}[0],
LivenessProbe: &corev1.Probe{ LivenessProbe: &corev1.Probe{
...@@ -2517,9 +2521,11 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) { ...@@ -2517,9 +2521,11 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
Value: "2", Value: "2",
}, },
}, },
PVC: &v1alpha1.PVC{ VolumeMounts: []v1alpha1.VolumeMount{
Name: &[]string{"planner-pvc"}[0], {
MountPoint: &[]string{"/planner"}[0], Name: "planner-pvc",
MountPoint: "/planner",
},
}, },
EnvFromSecret: &[]string{"planner-secret"}[0], EnvFromSecret: &[]string{"planner-secret"}[0],
LivenessProbe: &corev1.Probe{ LivenessProbe: &corev1.Probe{
...@@ -4760,3 +4766,344 @@ func TestGenerateBasePodSpec_Worker(t *testing.T) { ...@@ -4760,3 +4766,344 @@ func TestGenerateBasePodSpec_Worker(t *testing.T) {
}) })
} }
} }
func TestGenerateBasePodSpec_VolumeMounts(t *testing.T) {
secretsRetriever := &mockSecretsRetriever{}
controllerConfig := controller_common.Config{}
tests := []struct {
name string
component *v1alpha1.DynamoComponentDeploymentOverridesSpec
expectError bool
expectedPVCs []string
expectedMounts []corev1.VolumeMount
}{
{
name: "valid volumeMounts",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
ComponentType: commonconsts.ComponentTypeFrontend,
VolumeMounts: []v1alpha1.VolumeMount{
{
Name: "test-pvc",
MountPoint: "/data",
},
},
},
},
expectError: false,
expectedPVCs: []string{"test-pvc"},
expectedMounts: []corev1.VolumeMount{
{Name: "test-pvc", MountPath: "/data"},
{Name: "shared-memory", MountPath: "/dev/shm"},
},
},
{
name: "multiple volumeMounts",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
ComponentType: commonconsts.ComponentTypeFrontend,
VolumeMounts: []v1alpha1.VolumeMount{
{Name: "pvc1", MountPoint: "/data1"},
{Name: "pvc2", MountPoint: "/data2"},
},
},
},
expectError: false,
expectedPVCs: []string{"pvc1", "pvc2"},
expectedMounts: []corev1.VolumeMount{
{Name: "pvc1", MountPath: "/data1"},
{Name: "pvc2", MountPath: "/data2"},
{Name: "shared-memory", MountPath: "/dev/shm"},
},
},
{
name: "empty volumeMount name",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
ComponentType: commonconsts.ComponentTypeFrontend,
VolumeMounts: []v1alpha1.VolumeMount{
{Name: "", MountPoint: "/data"},
},
},
},
expectError: true,
},
{
name: "empty volumeMount mountPoint",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
ComponentType: commonconsts.ComponentTypeFrontend,
VolumeMounts: []v1alpha1.VolumeMount{
{Name: "test-pvc", MountPoint: ""},
},
},
},
expectError: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
podSpec, err := GenerateBasePodSpec(
tt.component,
BackendFrameworkVLLM,
secretsRetriever,
"test-deployment",
"default",
RoleMain,
1,
controllerConfig,
commonconsts.MultinodeDeploymentTypeGrove,
"test-service",
)
if tt.expectError {
if err == nil {
t.Errorf("GenerateBasePodSpec() expected error, got nil")
}
return
}
if err != nil {
t.Errorf("GenerateBasePodSpec() unexpected error: %v", err)
return
}
// Check expected PVCs are present in volumes
for _, expectedPVC := range tt.expectedPVCs {
found := false
for _, volume := range podSpec.Volumes {
if volume.Name == expectedPVC && volume.PersistentVolumeClaim != nil {
if volume.PersistentVolumeClaim.ClaimName == expectedPVC {
found = true
break
}
}
}
if !found {
t.Errorf("GenerateBasePodSpec() expected PVC volume %s not found", expectedPVC)
}
}
// Check expected mounts are present
if len(podSpec.Containers) == 0 {
t.Errorf("GenerateBasePodSpec() no containers found")
return
}
container := podSpec.Containers[0]
for _, expectedMount := range tt.expectedMounts {
found := false
for _, mount := range container.VolumeMounts {
if mount.Name == expectedMount.Name && mount.MountPath == expectedMount.MountPath {
found = true
break
}
}
if !found {
t.Errorf("GenerateBasePodSpec() expected volume mount %+v not found", expectedMount)
}
}
})
}
}
func TestGenerateBasePodSpec_UseAsCompilationCache_BackendSupport(t *testing.T) {
secretsRetriever := &mockSecretsRetriever{}
controllerConfig := controller_common.Config{}
tests := []struct {
name string
component *v1alpha1.DynamoComponentDeploymentOverridesSpec
backendFramework BackendFramework
expectError bool
expectedMount *corev1.VolumeMount
}{
{
name: "useAsCompilationCache with custom mount point",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
ComponentType: commonconsts.ComponentTypeFrontend,
VolumeMounts: []v1alpha1.VolumeMount{
{
Name: "cache-pvc",
MountPoint: "/custom/cache",
UseAsCompilationCache: true,
},
},
},
},
backendFramework: BackendFrameworkVLLM,
expectError: false,
expectedMount: &corev1.VolumeMount{Name: "cache-pvc", MountPath: "/custom/cache"},
},
{
name: "useAsCompilationCache with default mount point for VLLM",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
ComponentType: commonconsts.ComponentTypeFrontend,
VolumeMounts: []v1alpha1.VolumeMount{
{
Name: "cache-pvc",
UseAsCompilationCache: true,
},
},
},
},
backendFramework: BackendFrameworkVLLM,
expectError: false,
expectedMount: &corev1.VolumeMount{Name: "cache-pvc", MountPath: commonconsts.DefaultVLLMCacheMountPoint},
},
{
name: "useAsCompilationCache without mount point for SGLang - should error",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
ComponentType: commonconsts.ComponentTypeFrontend,
VolumeMounts: []v1alpha1.VolumeMount{
{
Name: "cache-pvc",
UseAsCompilationCache: true,
},
},
},
},
backendFramework: BackendFrameworkSGLang,
expectError: true, // SGLang doesn't support compilation cache, requires explicit mount point
expectedMount: nil,
},
{
name: "useAsCompilationCache with explicit mount point for SGLang - should work",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
ComponentType: commonconsts.ComponentTypeFrontend,
VolumeMounts: []v1alpha1.VolumeMount{
{
Name: "cache-pvc",
MountPoint: "/custom/sglang/cache",
UseAsCompilationCache: true,
},
},
},
},
backendFramework: BackendFrameworkSGLang,
expectError: false,
expectedMount: &corev1.VolumeMount{Name: "cache-pvc", MountPath: "/custom/sglang/cache"},
},
{
name: "useAsCompilationCache without mount point for TensorRT-LLM - should error",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
ComponentType: commonconsts.ComponentTypeFrontend,
VolumeMounts: []v1alpha1.VolumeMount{
{
Name: "cache-pvc",
UseAsCompilationCache: true,
},
},
},
},
backendFramework: BackendFrameworkTRTLLM,
expectError: true, // TensorRT-LLM doesn't support compilation cache, requires explicit mount point
expectedMount: nil,
},
{
name: "useAsCompilationCache with explicit mount point for TensorRT-LLM - should work",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
ComponentType: commonconsts.ComponentTypeFrontend,
VolumeMounts: []v1alpha1.VolumeMount{
{
Name: "cache-pvc",
MountPoint: "/custom/trtllm/cache",
UseAsCompilationCache: true,
},
},
},
},
backendFramework: BackendFrameworkTRTLLM,
expectError: false,
expectedMount: &corev1.VolumeMount{Name: "cache-pvc", MountPath: "/custom/trtllm/cache"},
},
{
name: "no useAsCompilationCache volumes - should be ignored",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
ComponentType: commonconsts.ComponentTypeFrontend,
VolumeMounts: []v1alpha1.VolumeMount{
{
Name: "regular-pvc",
MountPoint: "/data",
},
},
},
},
backendFramework: BackendFrameworkVLLM,
expectError: false,
expectedMount: nil, // Should be ignored, not error
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
podSpec, err := GenerateBasePodSpec(
tt.component,
tt.backendFramework,
secretsRetriever,
"test-deployment",
"default",
RoleMain,
1,
controllerConfig,
commonconsts.MultinodeDeploymentTypeGrove,
"test-service",
)
if tt.expectError {
if err == nil {
t.Errorf("GenerateBasePodSpec() expected error, got nil")
}
return
}
if err != nil {
t.Errorf("GenerateBasePodSpec() unexpected error: %v", err)
return
}
if tt.expectedMount != nil {
// Check PVC volume exists
found := false
for _, volume := range podSpec.Volumes {
if volume.Name == tt.expectedMount.Name && volume.PersistentVolumeClaim != nil {
if volume.PersistentVolumeClaim.ClaimName == tt.expectedMount.Name {
found = true
break
}
}
}
if !found {
t.Errorf("GenerateBasePodSpec() expected PVC volume %s not found", tt.expectedMount.Name)
}
// Check volume mount exists
if len(podSpec.Containers) == 0 {
t.Errorf("GenerateBasePodSpec() no containers found")
return
}
container := podSpec.Containers[0]
found = false
for _, mount := range container.VolumeMounts {
if mount.Name == tt.expectedMount.Name && mount.MountPath == tt.expectedMount.MountPath {
found = true
break
}
}
if !found {
t.Errorf("GenerateBasePodSpec() expected volume mount %+v not found", tt.expectedMount)
}
}
})
}
}
...@@ -30,6 +30,9 @@ kind: DynamoGraphDeployment ...@@ -30,6 +30,9 @@ kind: DynamoGraphDeployment
metadata: metadata:
name: llm-agg name: llm-agg
spec: spec:
pvcs:
- name: vllm-model-storage
size: 100Gi
services: services:
Frontend: Frontend:
replicas: 1 replicas: 1
...@@ -47,10 +50,9 @@ spec: ...@@ -47,10 +50,9 @@ spec:
- name: SPECIFIC_ENV_VAR - name: SPECIFIC_ENV_VAR
value: some_specific_value value: some_specific_value
# Add PVC for model storage # Add PVC for model storage
pvc: volumeMounts:
name: vllm-model-storage - name: vllm-model-storage
mountPath: /models mountPoint: /models
size: 100Gi
``` ```
Commit and push this file to your Git repository. FluxCD will detect the new CR and create the initial Dynamo deployment in your cluster. Commit and push this file to your Git repository. FluxCD will detect the new CR and create the initial Dynamo deployment in your cluster.
......
...@@ -151,9 +151,9 @@ helm upgrade --install dynamo-crds ./crds/ --namespace default ...@@ -151,9 +151,9 @@ helm upgrade --install dynamo-crds ./crds/ --namespace default
helm dep build ./platform/ helm dep build ./platform/
helm install dynamo-platform ./platform/ \ helm install dynamo-platform ./platform/ \
--namespace ${NAMESPACE} \ --namespace ${NAMESPACE} \
--set dynamo-operator.controllerManager.manager.image.repository=${DOCKER_SERVER}/dynamo-operator \ --set "dynamo-operator.controllerManager.manager.image.repository=${DOCKER_SERVER}/dynamo-operator" \
--set dynamo-operator.controllerManager.manager.image.tag=${IMAGE_TAG} \ --set "dynamo-operator.controllerManager.manager.image.tag=${IMAGE_TAG}" \
--set dynamo-operator.imagePullSecrets[0].name=docker-imagepullsecret --set "dynamo-operator.imagePullSecrets[0].name=docker-imagepullsecret"
``` ```
[Verify Installation](#verify-installation) [Verify Installation](#verify-installation)
......
...@@ -178,6 +178,8 @@ kind: DynamoGraphDeployment ...@@ -178,6 +178,8 @@ kind: DynamoGraphDeployment
metadata: metadata:
name: model-caching name: model-caching
spec: spec:
pvcs:
- name: s3-model
envs: envs:
- name: HF_HOME - name: HF_HOME
value: /model value: /model
...@@ -185,12 +187,12 @@ spec: ...@@ -185,12 +187,12 @@ spec:
value: '{"Common": {"model": "/model", ...}}' value: '{"Common": {"model": "/model", ...}}'
services: services:
VllmWorker: VllmWorker:
pvc: volumeMounts:
name: s3-model - name: s3-model
mountPoint: /model mountPoint: /model
Processor: Processor:
pvc: volumeMounts:
name: s3-model - name: s3-model
mountPoint: /model mountPoint: /model
``` ```
...@@ -286,15 +288,17 @@ spec: ...@@ -286,15 +288,17 @@ spec:
16384, "enable-prefix-caching": true, "ServiceArgs": {"workers": 1, "resources": 16384, "enable-prefix-caching": true, "ServiceArgs": {"workers": 1, "resources":
{"gpu": "4", "memory": "40Gi"}}, "common-configs": ["model", "block-size", "max-model-len"]}, {"gpu": "4", "memory": "40Gi"}}, "common-configs": ["model", "block-size", "max-model-len"]},
"Planner": {"environment": "kubernetes", "no-operation": true}}' "Planner": {"environment": "kubernetes", "no-operation": true}}'
pvcs:
- name: llama-3-3-70b-instruct-model
services: services:
Processor: Processor:
pvc: volumeMounts:
- name: llama-3-3-70b-instruct-model
mountPoint: /model mountPoint: /model
name: llama-3-3-70b-instruct-model
VllmWorker: VllmWorker:
pvc: volumeMounts:
- name: llama-3-3-70b-instruct-model
mountPoint: /model mountPoint: /model
name: llama-3-3-70b-instruct-model
extraPodSpec: extraPodSpec:
affinity: affinity:
nodeAffinity: nodeAffinity:
......
...@@ -6,6 +6,9 @@ kind: DynamoGraphDeployment ...@@ -6,6 +6,9 @@ kind: DynamoGraphDeployment
metadata: metadata:
name: sgl-dsr1-16gpu name: sgl-dsr1-16gpu
spec: spec:
pvcs:
- name: model-cache
create: false
services: services:
Frontend: Frontend:
dynamoNamespace: sgl-dsr1-16gpu dynamoNamespace: sgl-dsr1-16gpu
...@@ -30,9 +33,8 @@ spec: ...@@ -30,9 +33,8 @@ spec:
resources: resources:
limits: limits:
gpu: "8" gpu: "8"
pvc: volumeMounts:
create: false - name: model-cache
name: model-cache
mountPoint: /root/.cache/huggingface mountPoint: /root/.cache/huggingface
sharedMemory: sharedMemory:
size: 80Gi size: 80Gi
...@@ -74,9 +76,8 @@ spec: ...@@ -74,9 +76,8 @@ spec:
resources: resources:
limits: limits:
gpu: "8" gpu: "8"
pvc: volumeMounts:
create: false - name: model-cache
name: model-cache
mountPoint: /root/.cache/huggingface mountPoint: /root/.cache/huggingface
sharedMemory: sharedMemory:
size: 80Gi size: 80Gi
......
...@@ -6,6 +6,9 @@ kind: DynamoGraphDeployment ...@@ -6,6 +6,9 @@ kind: DynamoGraphDeployment
metadata: metadata:
name: sgl-dsr1-8gpu name: sgl-dsr1-8gpu
spec: spec:
pvcs:
- name: model-cache
create: false
services: services:
Frontend: Frontend:
dynamoNamespace: sgl-dsr1-8gpu dynamoNamespace: sgl-dsr1-8gpu
...@@ -28,9 +31,8 @@ spec: ...@@ -28,9 +31,8 @@ spec:
resources: resources:
limits: limits:
gpu: "8" gpu: "8"
pvc: volumeMounts:
create: false - name: model-cache
name: model-cache
mountPoint: /root/.cache/huggingface mountPoint: /root/.cache/huggingface
sharedMemory: sharedMemory:
size: 80Gi size: 80Gi
...@@ -69,9 +71,8 @@ spec: ...@@ -69,9 +71,8 @@ spec:
resources: resources:
limits: limits:
gpu: "8" gpu: "8"
pvc: volumeMounts:
create: false - name: model-cache
name: model-cache
mountPoint: /root/.cache/huggingface mountPoint: /root/.cache/huggingface
sharedMemory: sharedMemory:
size: 80Gi size: 80Gi
......
...@@ -6,14 +6,16 @@ metadata: ...@@ -6,14 +6,16 @@ metadata:
name: gpt-oss-agg-shm name: gpt-oss-agg-shm
spec: spec:
backendFramework: trtllm backendFramework: trtllm
pvcs:
- name: model-cache-oss-gpt120b
create: false
services: services:
TrtllmWorker: TrtllmWorker:
componentType: main componentType: main
dynamoNamespace: gpt-oss-agg-shm dynamoNamespace: gpt-oss-agg-shm
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
pvc: volumeMounts:
create: false - name: model-cache-oss-gpt120b
name: model-cache-oss-gpt120b
mountPoint: /root/.cache/huggingface mountPoint: /root/.cache/huggingface
sharedMemory: sharedMemory:
size: 80Gi size: 80Gi
......
...@@ -6,13 +6,15 @@ metadata: ...@@ -6,13 +6,15 @@ metadata:
name: llama3-70b-agg name: llama3-70b-agg
spec: spec:
backendFramework: vllm backendFramework: vllm
pvcs:
- name: model-cache
create: false
services: services:
Frontend: Frontend:
componentType: frontend componentType: frontend
dynamoNamespace: llama3-70b-agg dynamoNamespace: llama3-70b-agg
pvc: volumeMounts:
create: false - name: model-cache
name: model-cache
mountPoint: /root/.cache/huggingface mountPoint: /root/.cache/huggingface
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
...@@ -23,9 +25,8 @@ spec: ...@@ -23,9 +25,8 @@ spec:
componentType: worker componentType: worker
dynamoNamespace: llama3-70b-agg dynamoNamespace: llama3-70b-agg
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
pvc: volumeMounts:
create: false - name: model-cache
name: model-cache
mountPoint: /root/.cache/huggingface mountPoint: /root/.cache/huggingface
sharedMemory: sharedMemory:
size: 20Gi size: 20Gi
......
...@@ -6,13 +6,15 @@ metadata: ...@@ -6,13 +6,15 @@ metadata:
name: llama3-70b-disagg-mn name: llama3-70b-disagg-mn
spec: spec:
backendFramework: vllm backendFramework: vllm
pvcs:
- name: model-cache
create: false
services: services:
Frontend: Frontend:
componentType: frontend componentType: frontend
dynamoNamespace: llama3-70b-disagg-mn dynamoNamespace: llama3-70b-disagg-mn
pvc: volumeMounts:
create: false - name: model-cache
name: model-cache
mountPoint: /root/.cache/huggingface mountPoint: /root/.cache/huggingface
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
...@@ -23,9 +25,8 @@ spec: ...@@ -23,9 +25,8 @@ spec:
componentType: worker componentType: worker
dynamoNamespace: llama3-70b-disagg-mn dynamoNamespace: llama3-70b-disagg-mn
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
pvc: volumeMounts:
create: false - name: model-cache
name: model-cache
mountPoint: /root/.cache/huggingface mountPoint: /root/.cache/huggingface
sharedMemory: sharedMemory:
size: 80Gi size: 80Gi
...@@ -48,9 +49,8 @@ spec: ...@@ -48,9 +49,8 @@ spec:
componentType: worker componentType: worker
dynamoNamespace: llama3-70b-disagg-mn dynamoNamespace: llama3-70b-disagg-mn
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
pvc: volumeMounts:
create: false - name: model-cache
name: model-cache
mountPoint: /root/.cache/huggingface mountPoint: /root/.cache/huggingface
sharedMemory: sharedMemory:
size: 80Gi size: 80Gi
......
...@@ -6,13 +6,15 @@ metadata: ...@@ -6,13 +6,15 @@ metadata:
name: llama3-70b-disagg-sn name: llama3-70b-disagg-sn
spec: spec:
backendFramework: vllm backendFramework: vllm
pvcs:
- name: model-cache
create: false
services: services:
Frontend: Frontend:
componentType: frontend componentType: frontend
dynamoNamespace: llama3-70b-disagg-sn dynamoNamespace: llama3-70b-disagg-sn
pvc: volumeMounts:
create: false - name: model-cache
name: model-cache
mountPoint: /root/.cache/huggingface mountPoint: /root/.cache/huggingface
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
...@@ -23,9 +25,8 @@ spec: ...@@ -23,9 +25,8 @@ spec:
componentType: worker componentType: worker
dynamoNamespace: llama3-70b-disagg-sn dynamoNamespace: llama3-70b-disagg-sn
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
pvc: volumeMounts:
create: false - name: model-cache
name: model-cache
mountPoint: /root/.cache/huggingface mountPoint: /root/.cache/huggingface
sharedMemory: sharedMemory:
size: 80Gi size: 80Gi
...@@ -58,9 +59,8 @@ spec: ...@@ -58,9 +59,8 @@ spec:
componentType: worker componentType: worker
dynamoNamespace: llama3-70b-disagg-sn dynamoNamespace: llama3-70b-disagg-sn
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
pvc: volumeMounts:
create: false - name: model-cache
name: model-cache
mountPoint: /root/.cache/huggingface mountPoint: /root/.cache/huggingface
sharedMemory: sharedMemory:
size: 80Gi size: 80Gi
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment