"lib/bindings/vscode:/vscode.git/clone" did not exist on "e0a51940d105175d1105114b814933c7fc5dbd48"
Unverified Commit fcc42624 authored by Michael Shin's avatar Michael Shin Committed by GitHub
Browse files

feat: Add vLLM compilation cache to Dynamo Operator (#3257)


Signed-off-by: default avatarMichael Shin <michaelshin@users.noreply.github.com>
parent 02c822d6
......@@ -6,6 +6,7 @@ import (
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
corev1 "k8s.io/api/core/v1"
"sigs.k8s.io/controller-runtime/pkg/log"
)
const (
......@@ -28,6 +29,35 @@ func (b *VLLMBackend) UpdateContainer(container *corev1.Container, numberOfNodes
container.StartupProbe = nil
}
}
// Set compilation cache environment variables for VLLM
cacheDir := ""
// Check for volumeMounts with useAsCompilationCache=true
for _, volumeMount := range component.VolumeMounts {
if volumeMount.UseAsCompilationCache {
cacheDir = volumeMount.MountPoint
break
}
}
if cacheDir != "" {
// Set VLLM cache directory using the environment variable
container.Env = append(container.Env, corev1.EnvVar{
Name: "VLLM_CACHE_ROOT",
Value: cacheDir,
})
// Log confirmation that compilation cache is configured for VLLM
logger := log.Log.WithName("vllm-backend")
logger.Info("Compilation cache configured and enabled for VLLM backend",
"backend", "vllm",
"status", "fully-supported",
"cache-dir", cacheDir,
"use-as-compilation-cache", true,
"env-vars-set", true,
"env-vars", "VLLM_CACHE_ROOT")
}
}
func (b *VLLMBackend) UpdatePodSpec(podSpec *corev1.PodSpec, numberOfNodes int32, role Role, component *v1alpha1.DynamoComponentDeploymentOverridesSpec, serviceName string) {
......
......@@ -126,6 +126,118 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) {
}
}
func TestVLLMBackend_UpdateContainer_UseAsCompilationCache(t *testing.T) {
backend := &VLLMBackend{}
tests := []struct {
name string
component *v1alpha1.DynamoComponentDeploymentOverridesSpec
volumeMounts []corev1.VolumeMount
expectCacheEnvVar bool
expectCacheEnvVarName string
expectCacheEnvVarVal string
}{
{
name: "VLLM backend with useAsCompilationCache volume mount",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
VolumeMounts: []v1alpha1.VolumeMount{
{
Name: "vllm-cache",
MountPoint: "/root/.cache/vllm",
UseAsCompilationCache: true,
},
},
},
},
volumeMounts: []corev1.VolumeMount{},
expectCacheEnvVar: true,
expectCacheEnvVarName: "VLLM_CACHE_ROOT",
expectCacheEnvVarVal: "/root/.cache/vllm",
},
{
name: "VLLM backend with useAsCompilationCache at custom mount point",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
VolumeMounts: []v1alpha1.VolumeMount{
{
Name: "custom-cache",
MountPoint: "/custom/cache/path",
UseAsCompilationCache: true,
},
},
},
},
volumeMounts: []corev1.VolumeMount{},
expectCacheEnvVar: true,
expectCacheEnvVarName: "VLLM_CACHE_ROOT",
expectCacheEnvVarVal: "/custom/cache/path",
},
{
name: "VLLM backend without useAsCompilationCache",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
VolumeMounts: []v1alpha1.VolumeMount{
{
Name: "regular-volume",
MountPoint: "/data",
},
},
},
},
volumeMounts: []corev1.VolumeMount{},
expectCacheEnvVar: false,
},
{
name: "VLLM backend with no volume mounts",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
VolumeMounts: nil,
},
},
volumeMounts: []corev1.VolumeMount{},
expectCacheEnvVar: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
g := gomega.NewGomegaWithT(t)
// Create a container with initial state including volume mounts
container := &corev1.Container{
Env: []corev1.EnvVar{},
VolumeMounts: tt.volumeMounts,
}
// Call UpdateContainer
backend.UpdateContainer(container, 1, RoleMain, tt.component, "test-service", &GroveMultinodeDeployer{})
if tt.expectCacheEnvVar {
// Check that the VLLM_CACHE_ROOT environment variable is set
found := false
for _, env := range container.Env {
if env.Name == tt.expectCacheEnvVarName {
found = true
g.Expect(env.Value).To(gomega.Equal(tt.expectCacheEnvVarVal))
break
}
}
if !found {
t.Errorf("Expected environment variable %s not found in container", tt.expectCacheEnvVarName)
}
} else {
// Check that no cache environment variable is set
for _, env := range container.Env {
if env.Name == "VLLM_CACHE_ROOT" {
t.Errorf("Unexpected environment variable VLLM_CACHE_ROOT found: %s", env.Value)
}
}
}
})
}
}
func TestUpdateVLLMMultinodeArgs(t *testing.T) {
tests := []struct {
name string
......
......@@ -781,18 +781,37 @@ func GenerateBasePodSpec(
addStandardEnvVars(&container, controllerConfig)
var volumes []corev1.Volume
if component.PVC != nil {
for _, volumeMount := range component.VolumeMounts {
if volumeMount.Name == "" {
return nil, fmt.Errorf("volumeMount.name is required when volumeMounts is set")
}
// Determine mount point
mountPoint := volumeMount.MountPoint
if volumeMount.UseAsCompilationCache && mountPoint == "" {
// Use backend-specific default for compilation cache
defaultMountPoint := getDefaultCompilationCacheMountPoint(backendFramework)
if defaultMountPoint == "" {
return nil, fmt.Errorf("volumeMount with useAsCompilationCache=true requires an explicit mountPoint for backend framework %s (no default available)", backendFramework)
}
mountPoint = defaultMountPoint
} else if !volumeMount.UseAsCompilationCache && mountPoint == "" {
return nil, fmt.Errorf("volumeMount.mountPoint is required when useAsCompilationCache is false")
}
volumes = append(volumes, corev1.Volume{
Name: *component.PVC.Name,
Name: volumeMount.Name,
VolumeSource: corev1.VolumeSource{
PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{
ClaimName: *component.PVC.Name,
ClaimName: volumeMount.Name,
},
},
})
container.VolumeMounts = append(container.VolumeMounts, corev1.VolumeMount{
Name: *component.PVC.Name,
MountPath: *component.PVC.MountPoint,
Name: volumeMount.Name,
MountPath: mountPoint,
})
}
if shmVol, shmMount := generateSharedMemoryVolumeAndMount(component.SharedMemory); shmVol != nil && shmMount != nil {
......@@ -1154,8 +1173,8 @@ func ConvertDynamoComponentDeploymentToSpec(dynComponent *v1alpha1.DynamoCompone
}
}
// getBackendFrameworkFromDynamoComponent determines backend framework for a DynamoComponentDeployment
func getBackendFrameworkFromDynamoComponent(dynComponent *v1alpha1.DynamoComponentDeployment) (BackendFramework, error) {
// GetBackendFrameworkFromDynamoComponent determines backend framework for a DynamoComponentDeployment
func GetBackendFrameworkFromDynamoComponent(dynComponent *v1alpha1.DynamoComponentDeployment) (BackendFramework, error) {
// Extract command/args from component
var command, args []string
if dynComponent.Spec.ExtraPodSpec != nil && dynComponent.Spec.ExtraPodSpec.MainContainer != nil {
......@@ -1189,7 +1208,7 @@ func GenerateBasePodSpecForController(
numberOfNodes := componentSpec.GetNumberOfNodes()
// Determine backend framework using hybrid approach
backendFramework, err := getBackendFrameworkFromDynamoComponent(dynComponent)
backendFramework, err := GetBackendFrameworkFromDynamoComponent(dynComponent)
if err != nil {
return nil, fmt.Errorf("failed to determine backend framework: %w", err)
}
......@@ -1216,6 +1235,21 @@ func GenerateBasePodSpecForController(
return podSpec, nil
}
// getDefaultCompilationCacheMountPoint returns the default mount point for compilation cache based on backend framework
func getDefaultCompilationCacheMountPoint(backendFramework BackendFramework) string {
switch backendFramework {
case BackendFrameworkVLLM:
return commonconsts.DefaultVLLMCacheMountPoint
case BackendFrameworkSGLang, BackendFrameworkTRTLLM:
// SGLang and TensorRT-LLM don't currently support compilation caches
// Return empty string as these should not be used
return ""
default:
// For unknown backends, don't assume compilation cache support
return ""
}
}
func generateSharedMemoryVolumeAndMount(spec *v1alpha1.SharedMemorySpec) (*corev1.Volume, *corev1.VolumeMount) {
// default: enabled=true, size=8Gi
size := resource.MustParse(commonconsts.DefaultSharedMemorySize)
......
......@@ -1200,9 +1200,11 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
Value: "2",
},
},
PVC: &v1alpha1.PVC{
Name: &[]string{"planner-pvc"}[0],
MountPoint: &[]string{"/planner"}[0],
VolumeMounts: []v1alpha1.VolumeMount{
{
Name: "planner-pvc",
MountPoint: "/planner",
},
},
EnvFromSecret: &[]string{"planner-secret"}[0],
LivenessProbe: &corev1.Probe{
......@@ -1719,9 +1721,11 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
Value: "2",
},
},
PVC: &v1alpha1.PVC{
Name: &[]string{"planner-pvc"}[0],
MountPoint: &[]string{"/planner"}[0],
VolumeMounts: []v1alpha1.VolumeMount{
{
Name: "planner-pvc",
MountPoint: "/planner",
},
},
EnvFromSecret: &[]string{"planner-secret"}[0],
LivenessProbe: &corev1.Probe{
......@@ -2517,9 +2521,11 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
Value: "2",
},
},
PVC: &v1alpha1.PVC{
Name: &[]string{"planner-pvc"}[0],
MountPoint: &[]string{"/planner"}[0],
VolumeMounts: []v1alpha1.VolumeMount{
{
Name: "planner-pvc",
MountPoint: "/planner",
},
},
EnvFromSecret: &[]string{"planner-secret"}[0],
LivenessProbe: &corev1.Probe{
......@@ -4760,3 +4766,344 @@ func TestGenerateBasePodSpec_Worker(t *testing.T) {
})
}
}
func TestGenerateBasePodSpec_VolumeMounts(t *testing.T) {
secretsRetriever := &mockSecretsRetriever{}
controllerConfig := controller_common.Config{}
tests := []struct {
name string
component *v1alpha1.DynamoComponentDeploymentOverridesSpec
expectError bool
expectedPVCs []string
expectedMounts []corev1.VolumeMount
}{
{
name: "valid volumeMounts",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
ComponentType: commonconsts.ComponentTypeFrontend,
VolumeMounts: []v1alpha1.VolumeMount{
{
Name: "test-pvc",
MountPoint: "/data",
},
},
},
},
expectError: false,
expectedPVCs: []string{"test-pvc"},
expectedMounts: []corev1.VolumeMount{
{Name: "test-pvc", MountPath: "/data"},
{Name: "shared-memory", MountPath: "/dev/shm"},
},
},
{
name: "multiple volumeMounts",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
ComponentType: commonconsts.ComponentTypeFrontend,
VolumeMounts: []v1alpha1.VolumeMount{
{Name: "pvc1", MountPoint: "/data1"},
{Name: "pvc2", MountPoint: "/data2"},
},
},
},
expectError: false,
expectedPVCs: []string{"pvc1", "pvc2"},
expectedMounts: []corev1.VolumeMount{
{Name: "pvc1", MountPath: "/data1"},
{Name: "pvc2", MountPath: "/data2"},
{Name: "shared-memory", MountPath: "/dev/shm"},
},
},
{
name: "empty volumeMount name",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
ComponentType: commonconsts.ComponentTypeFrontend,
VolumeMounts: []v1alpha1.VolumeMount{
{Name: "", MountPoint: "/data"},
},
},
},
expectError: true,
},
{
name: "empty volumeMount mountPoint",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
ComponentType: commonconsts.ComponentTypeFrontend,
VolumeMounts: []v1alpha1.VolumeMount{
{Name: "test-pvc", MountPoint: ""},
},
},
},
expectError: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
podSpec, err := GenerateBasePodSpec(
tt.component,
BackendFrameworkVLLM,
secretsRetriever,
"test-deployment",
"default",
RoleMain,
1,
controllerConfig,
commonconsts.MultinodeDeploymentTypeGrove,
"test-service",
)
if tt.expectError {
if err == nil {
t.Errorf("GenerateBasePodSpec() expected error, got nil")
}
return
}
if err != nil {
t.Errorf("GenerateBasePodSpec() unexpected error: %v", err)
return
}
// Check expected PVCs are present in volumes
for _, expectedPVC := range tt.expectedPVCs {
found := false
for _, volume := range podSpec.Volumes {
if volume.Name == expectedPVC && volume.PersistentVolumeClaim != nil {
if volume.PersistentVolumeClaim.ClaimName == expectedPVC {
found = true
break
}
}
}
if !found {
t.Errorf("GenerateBasePodSpec() expected PVC volume %s not found", expectedPVC)
}
}
// Check expected mounts are present
if len(podSpec.Containers) == 0 {
t.Errorf("GenerateBasePodSpec() no containers found")
return
}
container := podSpec.Containers[0]
for _, expectedMount := range tt.expectedMounts {
found := false
for _, mount := range container.VolumeMounts {
if mount.Name == expectedMount.Name && mount.MountPath == expectedMount.MountPath {
found = true
break
}
}
if !found {
t.Errorf("GenerateBasePodSpec() expected volume mount %+v not found", expectedMount)
}
}
})
}
}
func TestGenerateBasePodSpec_UseAsCompilationCache_BackendSupport(t *testing.T) {
secretsRetriever := &mockSecretsRetriever{}
controllerConfig := controller_common.Config{}
tests := []struct {
name string
component *v1alpha1.DynamoComponentDeploymentOverridesSpec
backendFramework BackendFramework
expectError bool
expectedMount *corev1.VolumeMount
}{
{
name: "useAsCompilationCache with custom mount point",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
ComponentType: commonconsts.ComponentTypeFrontend,
VolumeMounts: []v1alpha1.VolumeMount{
{
Name: "cache-pvc",
MountPoint: "/custom/cache",
UseAsCompilationCache: true,
},
},
},
},
backendFramework: BackendFrameworkVLLM,
expectError: false,
expectedMount: &corev1.VolumeMount{Name: "cache-pvc", MountPath: "/custom/cache"},
},
{
name: "useAsCompilationCache with default mount point for VLLM",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
ComponentType: commonconsts.ComponentTypeFrontend,
VolumeMounts: []v1alpha1.VolumeMount{
{
Name: "cache-pvc",
UseAsCompilationCache: true,
},
},
},
},
backendFramework: BackendFrameworkVLLM,
expectError: false,
expectedMount: &corev1.VolumeMount{Name: "cache-pvc", MountPath: commonconsts.DefaultVLLMCacheMountPoint},
},
{
name: "useAsCompilationCache without mount point for SGLang - should error",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
ComponentType: commonconsts.ComponentTypeFrontend,
VolumeMounts: []v1alpha1.VolumeMount{
{
Name: "cache-pvc",
UseAsCompilationCache: true,
},
},
},
},
backendFramework: BackendFrameworkSGLang,
expectError: true, // SGLang doesn't support compilation cache, requires explicit mount point
expectedMount: nil,
},
{
name: "useAsCompilationCache with explicit mount point for SGLang - should work",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
ComponentType: commonconsts.ComponentTypeFrontend,
VolumeMounts: []v1alpha1.VolumeMount{
{
Name: "cache-pvc",
MountPoint: "/custom/sglang/cache",
UseAsCompilationCache: true,
},
},
},
},
backendFramework: BackendFrameworkSGLang,
expectError: false,
expectedMount: &corev1.VolumeMount{Name: "cache-pvc", MountPath: "/custom/sglang/cache"},
},
{
name: "useAsCompilationCache without mount point for TensorRT-LLM - should error",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
ComponentType: commonconsts.ComponentTypeFrontend,
VolumeMounts: []v1alpha1.VolumeMount{
{
Name: "cache-pvc",
UseAsCompilationCache: true,
},
},
},
},
backendFramework: BackendFrameworkTRTLLM,
expectError: true, // TensorRT-LLM doesn't support compilation cache, requires explicit mount point
expectedMount: nil,
},
{
name: "useAsCompilationCache with explicit mount point for TensorRT-LLM - should work",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
ComponentType: commonconsts.ComponentTypeFrontend,
VolumeMounts: []v1alpha1.VolumeMount{
{
Name: "cache-pvc",
MountPoint: "/custom/trtllm/cache",
UseAsCompilationCache: true,
},
},
},
},
backendFramework: BackendFrameworkTRTLLM,
expectError: false,
expectedMount: &corev1.VolumeMount{Name: "cache-pvc", MountPath: "/custom/trtllm/cache"},
},
{
name: "no useAsCompilationCache volumes - should be ignored",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
ComponentType: commonconsts.ComponentTypeFrontend,
VolumeMounts: []v1alpha1.VolumeMount{
{
Name: "regular-pvc",
MountPoint: "/data",
},
},
},
},
backendFramework: BackendFrameworkVLLM,
expectError: false,
expectedMount: nil, // Should be ignored, not error
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
podSpec, err := GenerateBasePodSpec(
tt.component,
tt.backendFramework,
secretsRetriever,
"test-deployment",
"default",
RoleMain,
1,
controllerConfig,
commonconsts.MultinodeDeploymentTypeGrove,
"test-service",
)
if tt.expectError {
if err == nil {
t.Errorf("GenerateBasePodSpec() expected error, got nil")
}
return
}
if err != nil {
t.Errorf("GenerateBasePodSpec() unexpected error: %v", err)
return
}
if tt.expectedMount != nil {
// Check PVC volume exists
found := false
for _, volume := range podSpec.Volumes {
if volume.Name == tt.expectedMount.Name && volume.PersistentVolumeClaim != nil {
if volume.PersistentVolumeClaim.ClaimName == tt.expectedMount.Name {
found = true
break
}
}
}
if !found {
t.Errorf("GenerateBasePodSpec() expected PVC volume %s not found", tt.expectedMount.Name)
}
// Check volume mount exists
if len(podSpec.Containers) == 0 {
t.Errorf("GenerateBasePodSpec() no containers found")
return
}
container := podSpec.Containers[0]
found = false
for _, mount := range container.VolumeMounts {
if mount.Name == tt.expectedMount.Name && mount.MountPath == tt.expectedMount.MountPath {
found = true
break
}
}
if !found {
t.Errorf("GenerateBasePodSpec() expected volume mount %+v not found", tt.expectedMount)
}
}
})
}
}
......@@ -30,6 +30,9 @@ kind: DynamoGraphDeployment
metadata:
name: llm-agg
spec:
pvcs:
- name: vllm-model-storage
size: 100Gi
services:
Frontend:
replicas: 1
......@@ -47,10 +50,9 @@ spec:
- name: SPECIFIC_ENV_VAR
value: some_specific_value
# Add PVC for model storage
pvc:
name: vllm-model-storage
mountPath: /models
size: 100Gi
volumeMounts:
- name: vllm-model-storage
mountPoint: /models
```
Commit and push this file to your Git repository. FluxCD will detect the new CR and create the initial Dynamo deployment in your cluster.
......
......@@ -151,9 +151,9 @@ helm upgrade --install dynamo-crds ./crds/ --namespace default
helm dep build ./platform/
helm install dynamo-platform ./platform/ \
--namespace ${NAMESPACE} \
--set dynamo-operator.controllerManager.manager.image.repository=${DOCKER_SERVER}/dynamo-operator \
--set dynamo-operator.controllerManager.manager.image.tag=${IMAGE_TAG} \
--set dynamo-operator.imagePullSecrets[0].name=docker-imagepullsecret
--set "dynamo-operator.controllerManager.manager.image.repository=${DOCKER_SERVER}/dynamo-operator" \
--set "dynamo-operator.controllerManager.manager.image.tag=${IMAGE_TAG}" \
--set "dynamo-operator.imagePullSecrets[0].name=docker-imagepullsecret"
```
[Verify Installation](#verify-installation)
......
......@@ -178,6 +178,8 @@ kind: DynamoGraphDeployment
metadata:
name: model-caching
spec:
pvcs:
- name: s3-model
envs:
- name: HF_HOME
value: /model
......@@ -185,13 +187,13 @@ spec:
value: '{"Common": {"model": "/model", ...}}'
services:
VllmWorker:
pvc:
name: s3-model
mountPoint: /model
volumeMounts:
- name: s3-model
mountPoint: /model
Processor:
pvc:
name: s3-model
mountPoint: /model
volumeMounts:
- name: s3-model
mountPoint: /model
```
......@@ -286,15 +288,17 @@ spec:
16384, "enable-prefix-caching": true, "ServiceArgs": {"workers": 1, "resources":
{"gpu": "4", "memory": "40Gi"}}, "common-configs": ["model", "block-size", "max-model-len"]},
"Planner": {"environment": "kubernetes", "no-operation": true}}'
pvcs:
- name: llama-3-3-70b-instruct-model
services:
Processor:
pvc:
mountPoint: /model
name: llama-3-3-70b-instruct-model
volumeMounts:
- name: llama-3-3-70b-instruct-model
mountPoint: /model
VllmWorker:
pvc:
mountPoint: /model
name: llama-3-3-70b-instruct-model
volumeMounts:
- name: llama-3-3-70b-instruct-model
mountPoint: /model
extraPodSpec:
affinity:
nodeAffinity:
......
......@@ -6,6 +6,9 @@ kind: DynamoGraphDeployment
metadata:
name: sgl-dsr1-16gpu
spec:
pvcs:
- name: model-cache
create: false
services:
Frontend:
dynamoNamespace: sgl-dsr1-16gpu
......@@ -30,10 +33,9 @@ spec:
resources:
limits:
gpu: "8"
pvc:
create: false
name: model-cache
mountPoint: /root/.cache/huggingface
volumeMounts:
- name: model-cache
mountPoint: /root/.cache/huggingface
sharedMemory:
size: 80Gi
extraPodSpec:
......@@ -74,10 +76,9 @@ spec:
resources:
limits:
gpu: "8"
pvc:
create: false
name: model-cache
mountPoint: /root/.cache/huggingface
volumeMounts:
- name: model-cache
mountPoint: /root/.cache/huggingface
sharedMemory:
size: 80Gi
extraPodSpec:
......
......@@ -6,6 +6,9 @@ kind: DynamoGraphDeployment
metadata:
name: sgl-dsr1-8gpu
spec:
pvcs:
- name: model-cache
create: false
services:
Frontend:
dynamoNamespace: sgl-dsr1-8gpu
......@@ -28,10 +31,9 @@ spec:
resources:
limits:
gpu: "8"
pvc:
create: false
name: model-cache
mountPoint: /root/.cache/huggingface
volumeMounts:
- name: model-cache
mountPoint: /root/.cache/huggingface
sharedMemory:
size: 80Gi
extraPodSpec:
......@@ -69,10 +71,9 @@ spec:
resources:
limits:
gpu: "8"
pvc:
create: false
name: model-cache
mountPoint: /root/.cache/huggingface
volumeMounts:
- name: model-cache
mountPoint: /root/.cache/huggingface
sharedMemory:
size: 80Gi
extraPodSpec:
......
......@@ -6,15 +6,17 @@ metadata:
name: gpt-oss-agg-shm
spec:
backendFramework: trtllm
pvcs:
- name: model-cache-oss-gpt120b
create: false
services:
TrtllmWorker:
componentType: main
dynamoNamespace: gpt-oss-agg-shm
envFromSecret: hf-token-secret
pvc:
create: false
name: model-cache-oss-gpt120b
mountPoint: /root/.cache/huggingface
volumeMounts:
- name: model-cache-oss-gpt120b
mountPoint: /root/.cache/huggingface
sharedMemory:
size: 80Gi
extraPodSpec:
......
......@@ -6,14 +6,16 @@ metadata:
name: llama3-70b-agg
spec:
backendFramework: vllm
pvcs:
- name: model-cache
create: false
services:
Frontend:
componentType: frontend
dynamoNamespace: llama3-70b-agg
pvc:
create: false
name: model-cache
mountPoint: /root/.cache/huggingface
volumeMounts:
- name: model-cache
mountPoint: /root/.cache/huggingface
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
......@@ -23,10 +25,9 @@ spec:
componentType: worker
dynamoNamespace: llama3-70b-agg
envFromSecret: hf-token-secret
pvc:
create: false
name: model-cache
mountPoint: /root/.cache/huggingface
volumeMounts:
- name: model-cache
mountPoint: /root/.cache/huggingface
sharedMemory:
size: 20Gi
extraPodSpec:
......
......@@ -6,14 +6,16 @@ metadata:
name: llama3-70b-disagg-mn
spec:
backendFramework: vllm
pvcs:
- name: model-cache
create: false
services:
Frontend:
componentType: frontend
dynamoNamespace: llama3-70b-disagg-mn
pvc:
create: false
name: model-cache
mountPoint: /root/.cache/huggingface
volumeMounts:
- name: model-cache
mountPoint: /root/.cache/huggingface
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
......@@ -23,10 +25,9 @@ spec:
componentType: worker
dynamoNamespace: llama3-70b-disagg-mn
envFromSecret: hf-token-secret
pvc:
create: false
name: model-cache
mountPoint: /root/.cache/huggingface
volumeMounts:
- name: model-cache
mountPoint: /root/.cache/huggingface
sharedMemory:
size: 80Gi
extraPodSpec:
......@@ -48,10 +49,9 @@ spec:
componentType: worker
dynamoNamespace: llama3-70b-disagg-mn
envFromSecret: hf-token-secret
pvc:
create: false
name: model-cache
mountPoint: /root/.cache/huggingface
volumeMounts:
- name: model-cache
mountPoint: /root/.cache/huggingface
sharedMemory:
size: 80Gi
extraPodSpec:
......
......@@ -6,14 +6,16 @@ metadata:
name: llama3-70b-disagg-sn
spec:
backendFramework: vllm
pvcs:
- name: model-cache
create: false
services:
Frontend:
componentType: frontend
dynamoNamespace: llama3-70b-disagg-sn
pvc:
create: false
name: model-cache
mountPoint: /root/.cache/huggingface
volumeMounts:
- name: model-cache
mountPoint: /root/.cache/huggingface
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
......@@ -23,10 +25,9 @@ spec:
componentType: worker
dynamoNamespace: llama3-70b-disagg-sn
envFromSecret: hf-token-secret
pvc:
create: false
name: model-cache
mountPoint: /root/.cache/huggingface
volumeMounts:
- name: model-cache
mountPoint: /root/.cache/huggingface
sharedMemory:
size: 80Gi
extraPodSpec:
......@@ -58,10 +59,9 @@ spec:
componentType: worker
dynamoNamespace: llama3-70b-disagg-sn
envFromSecret: hf-token-secret
pvc:
create: false
name: model-cache
mountPoint: /root/.cache/huggingface
volumeMounts:
- name: model-cache
mountPoint: /root/.cache/huggingface
sharedMemory:
size: 80Gi
extraPodSpec:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment