Unverified Commit 60975b51 authored by Julien Mancuso's avatar Julien Mancuso Committed by GitHub
Browse files

feat: improve CRD documentation (#3504)


Signed-off-by: default avatarJulien Mancuso <jmancuso@nvidia.com>
parent c3fcfdd6
...@@ -287,10 +287,10 @@ generate-api-docs: crd-ref-docs ## Generate API reference documentation from CRD ...@@ -287,10 +287,10 @@ generate-api-docs: crd-ref-docs ## Generate API reference documentation from CRD
--renderer=markdown \ --renderer=markdown \
--output-path=./docs/api_reference.md --output-path=./docs/api_reference.md
@echo "✅ Generated API reference at ./docs/api_reference.md" @echo "✅ Generated API reference at ./docs/api_reference.md"
# concatenate header.md and api_reference.md # concatenate header.md, api_reference.md, and footer.md
cat docs/header.md ./docs/api_reference.md > ../../../docs/kubernetes/api_reference.md cat docs/header.md ./docs/api_reference.md docs/footer.md > ../../../docs/kubernetes/api_reference.md
rm ./docs/api_reference.md rm ./docs/api_reference.md
@echo "✅ Concatenated header.md and api_reference.md" @echo "✅ Concatenated header.md, api_reference.md, and footer.md"
.PHONY: coverage .PHONY: coverage
coverage: test coverage: test
......
...@@ -20,8 +20,6 @@ ...@@ -20,8 +20,6 @@
package v1alpha1 package v1alpha1
import ( import (
"strings"
dynamoCommon "github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/common" dynamoCommon "github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/common"
commonconsts "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts" commonconsts "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
corev1 "k8s.io/api/core/v1" corev1 "k8s.io/api/core/v1"
...@@ -38,12 +36,6 @@ const ( ...@@ -38,12 +36,6 @@ const (
// DynamoComponentDeploymentSpec defines the desired state of DynamoComponentDeployment // DynamoComponentDeploymentSpec defines the desired state of DynamoComponentDeployment
type DynamoComponentDeploymentSpec struct { type DynamoComponentDeploymentSpec struct {
// DynamoComponent selects the Dynamo component from the archive to deploy.
// Typically corresponds to a component defined in the packaged Dynamo artifacts.
DynamoComponent string `json:"dynamoComponent,omitempty"`
// contains the tag of the DynamoComponent: for example, "my_package:MyService"
DynamoTag string `json:"dynamoTag,omitempty"`
// BackendFramework specifies the backend framework (e.g., "sglang", "vllm", "trtllm") // BackendFramework specifies the backend framework (e.g., "sglang", "vllm", "trtllm")
// +kubebuilder:validation:Enum=sglang;vllm;trtllm // +kubebuilder:validation:Enum=sglang;vllm;trtllm
BackendFramework string `json:"backendFramework,omitempty"` BackendFramework string `json:"backendFramework,omitempty"`
...@@ -53,10 +45,6 @@ type DynamoComponentDeploymentSpec struct { ...@@ -53,10 +45,6 @@ type DynamoComponentDeploymentSpec struct {
DynamoComponentDeploymentSharedSpec `json:",inline"` DynamoComponentDeploymentSharedSpec `json:",inline"`
} }
type DynamoComponentDeploymentOverridesSpec struct {
DynamoComponentDeploymentSharedSpec `json:",inline"`
}
type DynamoComponentDeploymentSharedSpec struct { type DynamoComponentDeploymentSharedSpec struct {
// INSERT ADDITIONAL SPEC FIELDS - desired state of cluster // INSERT ADDITIONAL SPEC FIELDS - desired state of cluster
// Important: Run "make" to regenerate code after modifying this file // Important: Run "make" to regenerate code after modifying this file
...@@ -229,7 +217,7 @@ func (s *DynamoComponentDeployment) SetSpec(spec any) { ...@@ -229,7 +217,7 @@ func (s *DynamoComponentDeployment) SetSpec(spec any) {
} }
func (s *DynamoComponentDeployment) IsFrontendComponent() bool { func (s *DynamoComponentDeployment) IsFrontendComponent() bool {
return strings.HasSuffix(s.Spec.DynamoTag, s.Spec.ServiceName) || s.Spec.ComponentType == commonconsts.ComponentTypeFrontend return s.Spec.ComponentType == commonconsts.ComponentTypeFrontend
} }
func (s *DynamoComponentDeployment) GetDynamoDeploymentConfig() []byte { func (s *DynamoComponentDeployment) GetDynamoDeploymentConfig() []byte {
......
...@@ -46,9 +46,8 @@ func TestDynamoComponentDeployment_IsFrontendComponent(t *testing.T) { ...@@ -46,9 +46,8 @@ func TestDynamoComponentDeployment_IsFrontendComponent(t *testing.T) {
name: "main component", name: "main component",
fields: fields{ fields: fields{
Spec: DynamoComponentDeploymentSpec{ Spec: DynamoComponentDeploymentSpec{
DynamoTag: "dynamo-component:main",
DynamoComponentDeploymentSharedSpec: DynamoComponentDeploymentSharedSpec{ DynamoComponentDeploymentSharedSpec: DynamoComponentDeploymentSharedSpec{
ServiceName: "main", ComponentType: commonconsts.ComponentTypeFrontend,
}, },
}, },
}, },
...@@ -58,9 +57,8 @@ func TestDynamoComponentDeployment_IsFrontendComponent(t *testing.T) { ...@@ -58,9 +57,8 @@ func TestDynamoComponentDeployment_IsFrontendComponent(t *testing.T) {
name: "not main component", name: "not main component",
fields: fields{ fields: fields{
Spec: DynamoComponentDeploymentSpec{ Spec: DynamoComponentDeploymentSpec{
DynamoTag: "dynamo-component:main",
DynamoComponentDeploymentSharedSpec: DynamoComponentDeploymentSharedSpec{ DynamoComponentDeploymentSharedSpec: DynamoComponentDeploymentSharedSpec{
ServiceName: "not-main", ComponentType: commonconsts.ComponentTypeWorker,
}, },
}, },
}, },
......
...@@ -29,20 +29,14 @@ import ( ...@@ -29,20 +29,14 @@ import (
// DynamoGraphDeploymentSpec defines the desired state of DynamoGraphDeployment. // DynamoGraphDeploymentSpec defines the desired state of DynamoGraphDeployment.
type DynamoGraphDeploymentSpec struct { type DynamoGraphDeploymentSpec struct {
// DynamoGraph selects the graph (workflow/topology) to deploy. This must match
// a graph name packaged with the Dynamo archive.
DynamoGraph string `json:"dynamoGraph,omitempty"`
// PVCs defines a list of persistent volume claims that can be referenced by components. // PVCs defines a list of persistent volume claims that can be referenced by components.
// Each PVC must have a unique name that can be referenced in component specifications. // Each PVC must have a unique name that can be referenced in component specifications.
// +kubebuilder:validation:Optional // +kubebuilder:validation:Optional
PVCs []PVC `json:"pvcs,omitempty"` PVCs []PVC `json:"pvcs,omitempty"`
// Services allows per-service overrides of the component deployment settings. // Services are the services to deploy as part of this deployment.
// - key: name of the service defined by the DynamoComponent
// - value: overrides for that service
// If not set for a service, the default DynamoComponentDeployment values are used.
// +kubebuilder:validation:Optional // +kubebuilder:validation:Optional
Services map[string]*DynamoComponentDeploymentOverridesSpec `json:"services,omitempty"` Services map[string]*DynamoComponentDeploymentSharedSpec `json:"services,omitempty"`
// Envs are environment variables applied to all services in the graph unless // Envs are environment variables applied to all services in the deployment unless
// overridden by service-specific configuration. // overridden by service-specific configuration.
// +kubebuilder:validation:Optional // +kubebuilder:validation:Optional
Envs []corev1.EnvVar `json:"envs,omitempty"` Envs []corev1.EnvVar `json:"envs,omitempty"`
......
...@@ -173,22 +173,6 @@ func (in *DynamoComponentDeploymentList) DeepCopyObject() runtime.Object { ...@@ -173,22 +173,6 @@ func (in *DynamoComponentDeploymentList) DeepCopyObject() runtime.Object {
return nil return nil
} }
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *DynamoComponentDeploymentOverridesSpec) DeepCopyInto(out *DynamoComponentDeploymentOverridesSpec) {
*out = *in
in.DynamoComponentDeploymentSharedSpec.DeepCopyInto(&out.DynamoComponentDeploymentSharedSpec)
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DynamoComponentDeploymentOverridesSpec.
func (in *DynamoComponentDeploymentOverridesSpec) DeepCopy() *DynamoComponentDeploymentOverridesSpec {
if in == nil {
return nil
}
out := new(DynamoComponentDeploymentOverridesSpec)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *DynamoComponentDeploymentSharedSpec) DeepCopyInto(out *DynamoComponentDeploymentSharedSpec) { func (in *DynamoComponentDeploymentSharedSpec) DeepCopyInto(out *DynamoComponentDeploymentSharedSpec) {
*out = *in *out = *in
...@@ -406,15 +390,15 @@ func (in *DynamoGraphDeploymentSpec) DeepCopyInto(out *DynamoGraphDeploymentSpec ...@@ -406,15 +390,15 @@ func (in *DynamoGraphDeploymentSpec) DeepCopyInto(out *DynamoGraphDeploymentSpec
} }
if in.Services != nil { if in.Services != nil {
in, out := &in.Services, &out.Services in, out := &in.Services, &out.Services
*out = make(map[string]*DynamoComponentDeploymentOverridesSpec, len(*in)) *out = make(map[string]*DynamoComponentDeploymentSharedSpec, len(*in))
for key, val := range *in { for key, val := range *in {
var outVal *DynamoComponentDeploymentOverridesSpec var outVal *DynamoComponentDeploymentSharedSpec
if val == nil { if val == nil {
(*out)[key] = nil (*out)[key] = nil
} else { } else {
inVal := (*in)[key] inVal := (*in)[key]
in, out := &inVal, &outVal in, out := &inVal, &outVal
*out = new(DynamoComponentDeploymentOverridesSpec) *out = new(DynamoComponentDeploymentSharedSpec)
(*in).DeepCopyInto(*out) (*in).DeepCopyInto(*out)
} }
(*out)[key] = outVal (*out)[key] = outVal
......
...@@ -32,7 +32,7 @@ processor: ...@@ -32,7 +32,7 @@ processor:
- "ParseError$" - "ParseError$"
# Ignore only the override wrapper type to reduce repetition # Ignore only the override wrapper type to reduce repetition
# Keep SharedSpec so embedded fields are documented once # Keep SharedSpec so embedded fields are documented once
- "DynamoComponentDeploymentOverridesSpec$" # - "DynamoComponentDeploymentOverridesSpec$"
- "DynamoComponentDeploymentStatus$" - "DynamoComponentDeploymentStatus$"
- "BaseStatus$" - "BaseStatus$"
......
# Operator Default Values Injection
The Dynamo operator automatically applies default values to various fields when they are not explicitly specified in your deployments. These defaults include:
- **Health Probes**: Startup, liveness, and readiness probes are configured differently for frontend, worker, and planner components. For example, worker components receive a startup probe with a 2-hour timeout (720 failures × 10 seconds) to accommodate long model loading times.
- **Shared Memory**: All components receive an 8Gi shared memory volume mounted at `/dev/shm` by default (can be disabled or resized via the `sharedMemory` field).
- **Environment Variables**: Components automatically receive environment variables like `DYN_NAMESPACE`, `DYN_PARENT_DGD_K8S_NAME`, `DYNAMO_PORT`, and backend-specific variables.
- **Pod Configuration**: Default `terminationGracePeriodSeconds` of 60 seconds and `restartPolicy: Always`.
- **Autoscaling**: When enabled without explicit metrics, defaults to CPU-based autoscaling with 80% target utilization.
- **Backend-Specific Behavior**: For multinode deployments, probes are automatically modified or removed for worker nodes depending on the backend framework (VLLM, SGLang, or TensorRT-LLM).
## Pod Specification Defaults
All components receive the following pod-level defaults unless overridden:
- **`terminationGracePeriodSeconds`**: `60` seconds
- **`restartPolicy`**: `Always`
## Shared Memory Configuration
Shared memory is enabled by default for all components:
- **Enabled**: `true` (unless explicitly disabled via `sharedMemory.disabled`)
- **Size**: `8Gi`
- **Mount Path**: `/dev/shm`
- **Volume Type**: `emptyDir` with `memory` medium
To disable shared memory or customize the size, use the `sharedMemory` field in your component specification.
## Health Probes by Component Type
The operator applies different default health probes based on the component type.
### Frontend Components
Frontend components receive the following probe configurations:
**Liveness Probe:**
- **Type**: HTTP GET
- **Path**: `/health`
- **Port**: `http` (8000)
- **Initial Delay**: 60 seconds
- **Period**: 60 seconds
- **Timeout**: 30 seconds
- **Failure Threshold**: 10
**Readiness Probe:**
- **Type**: Exec command
- **Command**: `curl -s http://localhost:${DYNAMO_PORT}/health | jq -e ".status == \"healthy\""`
- **Initial Delay**: 60 seconds
- **Period**: 60 seconds
- **Timeout**: 30 seconds
- **Failure Threshold**: 10
### Worker Components
Worker components receive the following probe configurations:
**Liveness Probe:**
- **Type**: HTTP GET
- **Path**: `/live`
- **Port**: `system` (9090)
- **Period**: 5 seconds
- **Timeout**: 30 seconds
- **Failure Threshold**: 1
**Readiness Probe:**
- **Type**: HTTP GET
- **Path**: `/health`
- **Port**: `system` (9090)
- **Period**: 10 seconds
- **Timeout**: 30 seconds
- **Failure Threshold**: 60
**Startup Probe:**
- **Type**: HTTP GET
- **Path**: `/live`
- **Port**: `system` (9090)
- **Period**: 10 seconds
- **Timeout**: 5 seconds
- **Failure Threshold**: 720 (allows up to 2 hours for startup: 10s × 720 = 7200s)
:::{note}
For larger models (typically >70B parameters) or slower storage systems, you may need to increase the `failureThreshold` to allow more time for model loading. Calculate the required threshold based on your expected startup time: `failureThreshold = (expected_startup_seconds / period)`. Override the startup probe in your component specification if the default 2-hour window is insufficient.
:::
### Multinode Deployment Probe Modifications
For multinode deployments, the operator modifies probes based on the backend framework and node role:
#### VLLM Backend
- **Worker nodes**: All probes (liveness, readiness, startup) are removed
#### SGLang Backend
- **Worker nodes**: All probes (liveness, readiness, startup) are removed
#### TensorRT-LLM Backend
- **Leader nodes**: All probes remain unchanged
- **Worker nodes**:
- Liveness and startup probes are removed
- Readiness probe is replaced with a TCP socket check on SSH port (2222):
- **Initial Delay**: 20 seconds
- **Period**: 20 seconds
- **Timeout**: 5 seconds
- **Failure Threshold**: 10
## Environment Variables
The operator automatically injects environment variables based on component type and configuration:
### All Components
- **`DYN_NAMESPACE`**: The Dynamo namespace for the component
- **`DYN_PARENT_DGD_K8S_NAME`**: The parent DynamoGraphDeployment Kubernetes resource name
- **`DYN_PARENT_DGD_K8S_NAMESPACE`**: The parent DynamoGraphDeployment Kubernetes namespace
### Frontend Components
- **`DYNAMO_PORT`**: `8000`
- **`DYN_HTTP_PORT`**: `8000`
### Worker Components
- **`DYN_SYSTEM_ENABLED`**: `true`
- **`DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS`**: `["generate"]`
- **`DYN_SYSTEM_PORT`**: `9090`
### Planner Components
- **`PLANNER_PROMETHEUS_PORT`**: `9085`
### VLLM Backend (with compilation cache)
When a volume mount is configured with `useAsCompilationCache: true`:
- **`VLLM_CACHE_ROOT`**: Set to the mount point of the cache volume
## Service Account
Planner components automatically receive the following service account:
- **`serviceAccountName`**: `planner-serviceaccount`
## Image Pull Secrets
The operator automatically discovers and injects image pull secrets for container images. When a component specifies a container image, the operator:
1. Scans all Kubernetes secrets of type `kubernetes.io/dockerconfigjson` in the component's namespace
2. Extracts the docker registry server URLs from each secret's authentication configuration
3. Matches the container image's registry host against the discovered registry URLs
4. Automatically injects matching secrets as `imagePullSecrets` in the pod specification
This eliminates the need to manually specify image pull secrets for each component. The operator maintains an internal index of docker secrets and their associated registries, refreshing this index periodically.
**To disable automatic image pull secret discovery** for a specific component, add the following annotation:
```yaml
annotations:
nvidia.com/disable-image-pull-secret-discovery: "true"
```
## Autoscaling Defaults
When autoscaling is enabled but no metrics are specified, the operator applies:
- **Default Metric**: CPU utilization
- **Target Average Utilization**: `80%`
## Port Configurations
Default container ports are configured based on component type:
### Frontend Components
- **Port**: 8000
- **Protocol**: TCP
- **Name**: `http`
### Worker Components
- **Port**: 9090
- **Protocol**: TCP
- **Name**: `system`
### Planner Components
- **Port**: 9085
- **Protocol**: TCP
- **Name**: `metrics`
## Backend-Specific Configurations
### VLLM
- **Ray Head Port**: 6379 (for multinode deployments)
### SGLang
- **Distribution Init Port**: 29500 (for multinode deployments)
### TensorRT-LLM
- **SSH Port**: 2222 (for multinode MPI communication)
- **OpenMPI Environment**: `OMPI_MCA_orte_keep_fqdn_hostnames=1`
## Implementation Reference
For users who want to understand the implementation details or contribute to the operator, the default values described in this document are set in the following source files:
- **Health Probes & Pod Specifications**: [`internal/dynamo/graph.go`](https://github.com/ai-dynamo/dynamo/blob/main/deploy/cloud/operator/internal/dynamo/graph.go) - Contains the main logic for applying default probes, environment variables, shared memory, and pod configurations
- **Component-Specific Defaults**:
- [`internal/dynamo/component_frontend.go`](https://github.com/ai-dynamo/dynamo/blob/main/deploy/cloud/operator/internal/dynamo/component_frontend.go)
- [`internal/dynamo/component_worker.go`](https://github.com/ai-dynamo/dynamo/blob/main/deploy/cloud/operator/internal/dynamo/component_worker.go)
- [`internal/dynamo/component_planner.go`](https://github.com/ai-dynamo/dynamo/blob/main/deploy/cloud/operator/internal/dynamo/component_planner.go)
- **Image Pull Secrets**: [`internal/secrets/docker.go`](https://github.com/ai-dynamo/dynamo/blob/main/deploy/cloud/operator/internal/secrets/docker.go) - Implements the docker secret indexer and automatic discovery
- **Backend-Specific Behavior**:
- [`internal/dynamo/backend_vllm.go`](https://github.com/ai-dynamo/dynamo/blob/main/deploy/cloud/operator/internal/dynamo/backend_vllm.go)
- [`internal/dynamo/backend_sglang.go`](https://github.com/ai-dynamo/dynamo/blob/main/deploy/cloud/operator/internal/dynamo/backend_sglang.go)
- [`internal/dynamo/backend_trtllm.go`](https://github.com/ai-dynamo/dynamo/blob/main/deploy/cloud/operator/internal/dynamo/backend_trtllm.go)
- **Constants & Annotations**: [`internal/consts/consts.go`](https://github.com/ai-dynamo/dynamo/blob/main/deploy/cloud/operator/internal/consts/consts.go) - Defines annotation keys and other constants
## Notes
- All these defaults can be overridden by explicitly specifying values in your DynamoComponentDeployment or DynamoGraphDeployment resources
- User-specified probes (via `livenessProbe`, `readinessProbe`, or `startupProbe` fields) take precedence over operator defaults
- For multinode deployments, some defaults are modified or removed as described above to accommodate distributed execution patterns
- The `extraPodSpec.mainContainer` field can be used to override probe configurations set by the operator
\ No newline at end of file
...@@ -688,8 +688,6 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing. ...@@ -688,8 +688,6 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
}, },
}, },
Spec: v1alpha1.DynamoComponentDeploymentSpec{ Spec: v1alpha1.DynamoComponentDeploymentSpec{
DynamoComponent: "test-lws-component",
DynamoTag: "test-tag",
BackendFramework: string(dynamo.BackendFrameworkVLLM), BackendFramework: string(dynamo.BackendFrameworkVLLM),
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{ DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
Envs: []corev1.EnvVar{ Envs: []corev1.EnvVar{
...@@ -990,7 +988,6 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing. ...@@ -990,7 +988,6 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
dynamoComponentDeployment: &v1alpha1.DynamoComponentDeployment{ dynamoComponentDeployment: &v1alpha1.DynamoComponentDeployment{
ObjectMeta: metav1.ObjectMeta{Name: "test-lws-nil-id", Namespace: "default"}, ObjectMeta: metav1.ObjectMeta{Name: "test-lws-nil-id", Namespace: "default"},
Spec: v1alpha1.DynamoComponentDeploymentSpec{ Spec: v1alpha1.DynamoComponentDeploymentSpec{
DynamoComponent: "test-comp", DynamoTag: "test",
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{ DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
Multinode: &v1alpha1.MultinodeSpec{ Multinode: &v1alpha1.MultinodeSpec{
NodeCount: 2, NodeCount: 2,
...@@ -1034,7 +1031,6 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing. ...@@ -1034,7 +1031,6 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
dynamoComponentDeployment: &v1alpha1.DynamoComponentDeployment{ dynamoComponentDeployment: &v1alpha1.DynamoComponentDeployment{
ObjectMeta: metav1.ObjectMeta{Name: "test-lws-leader-err", Namespace: "default"}, ObjectMeta: metav1.ObjectMeta{Name: "test-lws-leader-err", Namespace: "default"},
Spec: v1alpha1.DynamoComponentDeploymentSpec{ Spec: v1alpha1.DynamoComponentDeploymentSpec{
DynamoComponent: "test-comp", DynamoTag: "test",
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{ DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
Multinode: &v1alpha1.MultinodeSpec{ Multinode: &v1alpha1.MultinodeSpec{
NodeCount: 2, NodeCount: 2,
......
...@@ -27,7 +27,7 @@ func isPythonCommand(cmd string) bool { ...@@ -27,7 +27,7 @@ func isPythonCommand(cmd string) bool {
return matched return matched
} }
func (b *SGLangBackend) UpdateContainer(container *corev1.Container, numberOfNodes int32, role Role, component *v1alpha1.DynamoComponentDeploymentOverridesSpec, serviceName string, multinodeDeployer MultinodeDeployer) { func (b *SGLangBackend) UpdateContainer(container *corev1.Container, numberOfNodes int32, role Role, component *v1alpha1.DynamoComponentDeploymentSharedSpec, serviceName string, multinodeDeployer MultinodeDeployer) {
// Check for volumeMounts with useAsCompilationCache=true // Check for volumeMounts with useAsCompilationCache=true
for _, volumeMount := range component.VolumeMounts { for _, volumeMount := range component.VolumeMounts {
if volumeMount.UseAsCompilationCache { if volumeMount.UseAsCompilationCache {
...@@ -112,7 +112,7 @@ func (b *SGLangBackend) UpdateContainer(container *corev1.Container, numberOfNod ...@@ -112,7 +112,7 @@ func (b *SGLangBackend) UpdateContainer(container *corev1.Container, numberOfNod
} }
} }
func (b *SGLangBackend) UpdatePodSpec(podSpec *corev1.PodSpec, numberOfNodes int32, role Role, component *v1alpha1.DynamoComponentDeploymentOverridesSpec, serviceName string) { func (b *SGLangBackend) UpdatePodSpec(podSpec *corev1.PodSpec, numberOfNodes int32, role Role, component *v1alpha1.DynamoComponentDeploymentSharedSpec, serviceName string) {
// do nothing // do nothing
} }
......
...@@ -192,7 +192,7 @@ func TestSGLangBackend_PythonCommandInjection(t *testing.T) { ...@@ -192,7 +192,7 @@ func TestSGLangBackend_PythonCommandInjection(t *testing.T) {
Args: append([]string{}, tt.initialArgs...), Args: append([]string{}, tt.initialArgs...),
} }
backend.UpdateContainer(container, tt.numberOfNodes, tt.role, &v1alpha1.DynamoComponentDeploymentOverridesSpec{}, "test-service", tt.multinodeDeployer) backend.UpdateContainer(container, tt.numberOfNodes, tt.role, &v1alpha1.DynamoComponentDeploymentSharedSpec{}, "test-service", tt.multinodeDeployer)
if !reflect.DeepEqual(container.Command, tt.expectedCommand) { if !reflect.DeepEqual(container.Command, tt.expectedCommand) {
t.Errorf("UpdateContainer() command = %v, want %v", container.Command, tt.expectedCommand) t.Errorf("UpdateContainer() command = %v, want %v", container.Command, tt.expectedCommand)
...@@ -317,7 +317,7 @@ func TestSGLangBackend_ShellCommandInjection(t *testing.T) { ...@@ -317,7 +317,7 @@ func TestSGLangBackend_ShellCommandInjection(t *testing.T) {
Args: append([]string{}, tt.initialArgs...), Args: append([]string{}, tt.initialArgs...),
} }
backend.UpdateContainer(container, tt.numberOfNodes, tt.role, &v1alpha1.DynamoComponentDeploymentOverridesSpec{}, "test-service", tt.multinodeDeployer) backend.UpdateContainer(container, tt.numberOfNodes, tt.role, &v1alpha1.DynamoComponentDeploymentSharedSpec{}, "test-service", tt.multinodeDeployer)
if !reflect.DeepEqual(container.Args, tt.expectedArgs) { if !reflect.DeepEqual(container.Args, tt.expectedArgs) {
t.Errorf("UpdateContainer() args = %v, want %v", container.Args, tt.expectedArgs) t.Errorf("UpdateContainer() args = %v, want %v", container.Args, tt.expectedArgs)
...@@ -491,7 +491,7 @@ func TestSGLangBackend_ProbeRemoval(t *testing.T) { ...@@ -491,7 +491,7 @@ func TestSGLangBackend_ProbeRemoval(t *testing.T) {
StartupProbe: startupProbe, StartupProbe: startupProbe,
} }
backend.UpdateContainer(container, tt.numberOfNodes, tt.role, &v1alpha1.DynamoComponentDeploymentOverridesSpec{}, "test-service", tt.multinodeDeployer) backend.UpdateContainer(container, tt.numberOfNodes, tt.role, &v1alpha1.DynamoComponentDeploymentSharedSpec{}, "test-service", tt.multinodeDeployer)
if tt.expectProbesRemoved { if tt.expectProbesRemoved {
if container.LivenessProbe != nil { if container.LivenessProbe != nil {
...@@ -523,21 +523,19 @@ func TestSGLangBackend_UpdateContainer_UseAsCompilationCache(t *testing.T) { ...@@ -523,21 +523,19 @@ func TestSGLangBackend_UpdateContainer_UseAsCompilationCache(t *testing.T) {
tests := []struct { tests := []struct {
name string name string
component *v1alpha1.DynamoComponentDeploymentOverridesSpec component *v1alpha1.DynamoComponentDeploymentSharedSpec
volumeMounts []corev1.VolumeMount volumeMounts []corev1.VolumeMount
expectNoEnvVarChanges bool expectNoEnvVarChanges bool
expectLoggedPartialSupport bool expectLoggedPartialSupport bool
}{ }{
{ {
name: "SGLang backend with useAsCompilationCache volume mount", name: "SGLang backend with useAsCompilationCache volume mount",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{ component: &v1alpha1.DynamoComponentDeploymentSharedSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{ VolumeMounts: []v1alpha1.VolumeMount{
VolumeMounts: []v1alpha1.VolumeMount{ {
{ Name: "sglang-cache",
Name: "sglang-cache", MountPoint: "/cache/sglang",
MountPoint: "/cache/sglang", UseAsCompilationCache: true,
UseAsCompilationCache: true,
},
}, },
}, },
}, },
...@@ -547,14 +545,12 @@ func TestSGLangBackend_UpdateContainer_UseAsCompilationCache(t *testing.T) { ...@@ -547,14 +545,12 @@ func TestSGLangBackend_UpdateContainer_UseAsCompilationCache(t *testing.T) {
}, },
{ {
name: "SGLang backend with useAsCompilationCache at custom volume mount", name: "SGLang backend with useAsCompilationCache at custom volume mount",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{ component: &v1alpha1.DynamoComponentDeploymentSharedSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{ VolumeMounts: []v1alpha1.VolumeMount{
VolumeMounts: []v1alpha1.VolumeMount{ {
{ Name: "custom-cache",
Name: "custom-cache", MountPoint: "/custom/cache/path",
MountPoint: "/custom/cache/path", UseAsCompilationCache: true,
UseAsCompilationCache: true,
},
}, },
}, },
}, },
...@@ -564,13 +560,11 @@ func TestSGLangBackend_UpdateContainer_UseAsCompilationCache(t *testing.T) { ...@@ -564,13 +560,11 @@ func TestSGLangBackend_UpdateContainer_UseAsCompilationCache(t *testing.T) {
}, },
{ {
name: "SGLang backend without useAsCompilationCache volume mount", name: "SGLang backend without useAsCompilationCache volume mount",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{ component: &v1alpha1.DynamoComponentDeploymentSharedSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{ VolumeMounts: []v1alpha1.VolumeMount{
VolumeMounts: []v1alpha1.VolumeMount{ {
{ Name: "regular-volume",
Name: "regular-volume", MountPoint: "/data",
MountPoint: "/data",
},
}, },
}, },
}, },
...@@ -580,10 +574,8 @@ func TestSGLangBackend_UpdateContainer_UseAsCompilationCache(t *testing.T) { ...@@ -580,10 +574,8 @@ func TestSGLangBackend_UpdateContainer_UseAsCompilationCache(t *testing.T) {
}, },
{ {
name: "SGLang backend with no volume mounts", name: "SGLang backend with no volume mounts",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{ component: &v1alpha1.DynamoComponentDeploymentSharedSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{ VolumeMounts: nil,
VolumeMounts: nil,
},
}, },
volumeMounts: []corev1.VolumeMount{}, volumeMounts: []corev1.VolumeMount{},
expectNoEnvVarChanges: true, expectNoEnvVarChanges: true,
......
...@@ -18,7 +18,7 @@ type TRTLLMBackend struct { ...@@ -18,7 +18,7 @@ type TRTLLMBackend struct {
MpiRunSecretName string MpiRunSecretName string
} }
func (b *TRTLLMBackend) UpdateContainer(container *corev1.Container, numberOfNodes int32, role Role, component *v1alpha1.DynamoComponentDeploymentOverridesSpec, serviceName string, multinodeDeployer MultinodeDeployer) { func (b *TRTLLMBackend) UpdateContainer(container *corev1.Container, numberOfNodes int32, role Role, component *v1alpha1.DynamoComponentDeploymentSharedSpec, serviceName string, multinodeDeployer MultinodeDeployer) {
// Check for volumeMounts with useAsCompilationCache=true // Check for volumeMounts with useAsCompilationCache=true
for _, volumeMount := range component.VolumeMounts { for _, volumeMount := range component.VolumeMounts {
if volumeMount.UseAsCompilationCache { if volumeMount.UseAsCompilationCache {
...@@ -75,7 +75,7 @@ func (b *TRTLLMBackend) UpdateContainer(container *corev1.Container, numberOfNod ...@@ -75,7 +75,7 @@ func (b *TRTLLMBackend) UpdateContainer(container *corev1.Container, numberOfNod
} }
} }
func (b *TRTLLMBackend) UpdatePodSpec(podSpec *corev1.PodSpec, numberOfNodes int32, role Role, component *v1alpha1.DynamoComponentDeploymentOverridesSpec, serviceName string) { func (b *TRTLLMBackend) UpdatePodSpec(podSpec *corev1.PodSpec, numberOfNodes int32, role Role, component *v1alpha1.DynamoComponentDeploymentSharedSpec, serviceName string) {
// Add SSH keypair volume for TRTLLM multinode deployments // Add SSH keypair volume for TRTLLM multinode deployments
if numberOfNodes > 1 { if numberOfNodes > 1 {
sshVolume := corev1.Volume{ sshVolume := corev1.Volume{
...@@ -102,7 +102,7 @@ func (b *TRTLLMBackend) addSSHVolumeMount(container *corev1.Container) { ...@@ -102,7 +102,7 @@ func (b *TRTLLMBackend) addSSHVolumeMount(container *corev1.Container) {
} }
// setupLeaderContainer configures the leader node with SSH setup and mpirun command // setupLeaderContainer configures the leader node with SSH setup and mpirun command
func (b *TRTLLMBackend) setupLeaderContainer(container *corev1.Container, numberOfNodes int32, serviceName string, component *v1alpha1.DynamoComponentDeploymentOverridesSpec, multinodeDeployer MultinodeDeployer) { func (b *TRTLLMBackend) setupLeaderContainer(container *corev1.Container, numberOfNodes int32, serviceName string, component *v1alpha1.DynamoComponentDeploymentSharedSpec, multinodeDeployer MultinodeDeployer) {
// Generate the list of worker hostnames // Generate the list of worker hostnames
workerHosts := b.generateWorkerHostnames(numberOfNodes, serviceName, multinodeDeployer) workerHosts := b.generateWorkerHostnames(numberOfNodes, serviceName, multinodeDeployer)
......
...@@ -21,7 +21,7 @@ func TestTRTLLMBackend_UpdateContainer(t *testing.T) { ...@@ -21,7 +21,7 @@ func TestTRTLLMBackend_UpdateContainer(t *testing.T) {
numberOfNodes int32 numberOfNodes int32
role Role role Role
multinodeDeployer MultinodeDeployer multinodeDeployer MultinodeDeployer
component *v1alpha1.DynamoComponentDeploymentOverridesSpec component *v1alpha1.DynamoComponentDeploymentSharedSpec
expectedVolumeMounts []corev1.VolumeMount expectedVolumeMounts []corev1.VolumeMount
expectedCommand []string expectedCommand []string
expectedArgs []string expectedArgs []string
...@@ -36,7 +36,7 @@ func TestTRTLLMBackend_UpdateContainer(t *testing.T) { ...@@ -36,7 +36,7 @@ func TestTRTLLMBackend_UpdateContainer(t *testing.T) {
numberOfNodes: 1, numberOfNodes: 1,
role: RoleMain, role: RoleMain,
multinodeDeployer: &GroveMultinodeDeployer{}, multinodeDeployer: &GroveMultinodeDeployer{},
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{}, component: &v1alpha1.DynamoComponentDeploymentSharedSpec{},
expectedVolumeMounts: []corev1.VolumeMount{}, expectedVolumeMounts: []corev1.VolumeMount{},
expectedCommand: []string{}, expectedCommand: []string{},
expectedArgs: []string{"python3", "--model", "test"}, expectedArgs: []string{"python3", "--model", "test"},
...@@ -51,12 +51,10 @@ func TestTRTLLMBackend_UpdateContainer(t *testing.T) { ...@@ -51,12 +51,10 @@ func TestTRTLLMBackend_UpdateContainer(t *testing.T) {
numberOfNodes: 3, numberOfNodes: 3,
role: RoleLeader, role: RoleLeader,
multinodeDeployer: &GroveMultinodeDeployer{}, multinodeDeployer: &GroveMultinodeDeployer{},
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{ component: &v1alpha1.DynamoComponentDeploymentSharedSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{ Resources: &common.Resources{
Resources: &common.Resources{ Requests: &common.ResourceItem{
Requests: &common.ResourceItem{ GPU: "2",
GPU: "2",
},
}, },
}, },
}, },
...@@ -78,7 +76,7 @@ func TestTRTLLMBackend_UpdateContainer(t *testing.T) { ...@@ -78,7 +76,7 @@ func TestTRTLLMBackend_UpdateContainer(t *testing.T) {
numberOfNodes: 3, numberOfNodes: 3,
role: RoleWorker, role: RoleWorker,
multinodeDeployer: &GroveMultinodeDeployer{}, multinodeDeployer: &GroveMultinodeDeployer{},
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{}, component: &v1alpha1.DynamoComponentDeploymentSharedSpec{},
expectedVolumeMounts: []corev1.VolumeMount{ expectedVolumeMounts: []corev1.VolumeMount{
{Name: mpiRunSecretName, MountPath: "/ssh-pk", ReadOnly: true}, {Name: mpiRunSecretName, MountPath: "/ssh-pk", ReadOnly: true},
}, },
...@@ -107,12 +105,10 @@ func TestTRTLLMBackend_UpdateContainer(t *testing.T) { ...@@ -107,12 +105,10 @@ func TestTRTLLMBackend_UpdateContainer(t *testing.T) {
numberOfNodes: 2, numberOfNodes: 2,
role: RoleLeader, role: RoleLeader,
multinodeDeployer: &LWSMultinodeDeployer{}, multinodeDeployer: &LWSMultinodeDeployer{},
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{ component: &v1alpha1.DynamoComponentDeploymentSharedSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{ Resources: &common.Resources{
Resources: &common.Resources{ Limits: &common.ResourceItem{
Limits: &common.ResourceItem{ GPU: "1",
GPU: "1",
},
}, },
}, },
}, },
...@@ -350,7 +346,7 @@ func TestTRTLLMBackend_UpdatePodSpec(t *testing.T) { ...@@ -350,7 +346,7 @@ func TestTRTLLMBackend_UpdatePodSpec(t *testing.T) {
}, },
}, },
} }
component := &v1alpha1.DynamoComponentDeploymentOverridesSpec{} component := &v1alpha1.DynamoComponentDeploymentSharedSpec{}
// Call UpdatePodSpec // Call UpdatePodSpec
backend.UpdatePodSpec(podSpec, tt.numberOfNodes, tt.role, component, "test-service") backend.UpdatePodSpec(podSpec, tt.numberOfNodes, tt.role, component, "test-service")
...@@ -550,7 +546,7 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) { ...@@ -550,7 +546,7 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) {
numberOfNodes int32 numberOfNodes int32
multinodeDeployer MultinodeDeployer multinodeDeployer MultinodeDeployer
serviceName string serviceName string
component *v1alpha1.DynamoComponentDeploymentOverridesSpec component *v1alpha1.DynamoComponentDeploymentSharedSpec
initialArgs []string initialArgs []string
initialCommand []string initialCommand []string
expected string expected string
...@@ -560,12 +556,10 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) { ...@@ -560,12 +556,10 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) {
numberOfNodes: 3, numberOfNodes: 3,
multinodeDeployer: &GroveMultinodeDeployer{}, multinodeDeployer: &GroveMultinodeDeployer{},
serviceName: "test-service", serviceName: "test-service",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{ component: &v1alpha1.DynamoComponentDeploymentSharedSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{ Resources: &common.Resources{
Resources: &common.Resources{ Requests: &common.ResourceItem{
Requests: &common.ResourceItem{ GPU: "2",
GPU: "2",
},
}, },
}, },
}, },
...@@ -578,7 +572,7 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) { ...@@ -578,7 +572,7 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) {
numberOfNodes: 2, numberOfNodes: 2,
multinodeDeployer: &LWSMultinodeDeployer{}, multinodeDeployer: &LWSMultinodeDeployer{},
serviceName: "worker", serviceName: "worker",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{}, component: &v1alpha1.DynamoComponentDeploymentSharedSpec{},
initialArgs: []string{}, initialArgs: []string{},
initialCommand: []string{"python", "-m", "worker"}, initialCommand: []string{"python", "-m", "worker"},
expected: "mkdir -p ~/.ssh && ls -la /ssh-pk/ && cp /ssh-pk/private.key ~/.ssh/id_rsa && cp /ssh-pk/private.key.pub ~/.ssh/id_rsa.pub && cp /ssh-pk/private.key.pub ~/.ssh/authorized_keys && chmod 600 ~/.ssh/id_rsa ~/.ssh/authorized_keys && chmod 644 ~/.ssh/id_rsa.pub ~/.ssh/authorized_keys && printf 'Host *\\nIdentityFile ~/.ssh/id_rsa\\nStrictHostKeyChecking no\\nPort 2222\\n' > ~/.ssh/config && mpirun --oversubscribe -n 0 -H $(LWS_LEADER_ADDRESS),$(LWS_WORKER_1_ADDRESS) --mca pml ob1 --mca plm_rsh_args \"-p 2222 -o StrictHostKeyChecking=no -i ~/.ssh/id_rsa\" -x CUDA_VISIBLE_DEVICES -x HF_DATASETS_CACHE -x HF_ENDPOINT -x HF_HOME -x HF_TOKEN -x HOME -x HUGGING_FACE_HUB_TOKEN -x LD_LIBRARY_PATH -x MODEL_PATH -x NCCL_DEBUG -x NCCL_IB_DISABLE -x NCCL_P2P_DISABLE -x PATH -x PYTHONPATH -x TENSORRT_LLM_CACHE_DIR -x TOKENIZERS_PARALLELISM -x TRANSFORMERS_CACHE -x USER bash -c 'source /opt/dynamo/venv/bin/activate && trtllm-llmapi-launch python -m worker'", expected: "mkdir -p ~/.ssh && ls -la /ssh-pk/ && cp /ssh-pk/private.key ~/.ssh/id_rsa && cp /ssh-pk/private.key.pub ~/.ssh/id_rsa.pub && cp /ssh-pk/private.key.pub ~/.ssh/authorized_keys && chmod 600 ~/.ssh/id_rsa ~/.ssh/authorized_keys && chmod 644 ~/.ssh/id_rsa.pub ~/.ssh/authorized_keys && printf 'Host *\\nIdentityFile ~/.ssh/id_rsa\\nStrictHostKeyChecking no\\nPort 2222\\n' > ~/.ssh/config && mpirun --oversubscribe -n 0 -H $(LWS_LEADER_ADDRESS),$(LWS_WORKER_1_ADDRESS) --mca pml ob1 --mca plm_rsh_args \"-p 2222 -o StrictHostKeyChecking=no -i ~/.ssh/id_rsa\" -x CUDA_VISIBLE_DEVICES -x HF_DATASETS_CACHE -x HF_ENDPOINT -x HF_HOME -x HF_TOKEN -x HOME -x HUGGING_FACE_HUB_TOKEN -x LD_LIBRARY_PATH -x MODEL_PATH -x NCCL_DEBUG -x NCCL_IB_DISABLE -x NCCL_P2P_DISABLE -x PATH -x PYTHONPATH -x TENSORRT_LLM_CACHE_DIR -x TOKENIZERS_PARALLELISM -x TRANSFORMERS_CACHE -x USER bash -c 'source /opt/dynamo/venv/bin/activate && trtllm-llmapi-launch python -m worker'",
...@@ -588,12 +582,10 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) { ...@@ -588,12 +582,10 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) {
numberOfNodes: 2, numberOfNodes: 2,
multinodeDeployer: &GroveMultinodeDeployer{}, multinodeDeployer: &GroveMultinodeDeployer{},
serviceName: "test", serviceName: "test",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{ component: &v1alpha1.DynamoComponentDeploymentSharedSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{ Resources: &common.Resources{
Resources: &common.Resources{ Limits: &common.ResourceItem{
Limits: &common.ResourceItem{ GPU: "1",
GPU: "1",
},
}, },
}, },
}, },
...@@ -606,12 +598,10 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) { ...@@ -606,12 +598,10 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) {
numberOfNodes: 2, numberOfNodes: 2,
multinodeDeployer: &GroveMultinodeDeployer{}, multinodeDeployer: &GroveMultinodeDeployer{},
serviceName: "test", serviceName: "test",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{ component: &v1alpha1.DynamoComponentDeploymentSharedSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{ Resources: &common.Resources{
Resources: &common.Resources{ Limits: &common.ResourceItem{
Limits: &common.ResourceItem{ GPU: "1",
GPU: "1",
},
}, },
}, },
}, },
...@@ -624,12 +614,10 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) { ...@@ -624,12 +614,10 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) {
numberOfNodes: 2, numberOfNodes: 2,
multinodeDeployer: &GroveMultinodeDeployer{}, multinodeDeployer: &GroveMultinodeDeployer{},
serviceName: "test", serviceName: "test",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{ component: &v1alpha1.DynamoComponentDeploymentSharedSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{ Resources: &common.Resources{
Resources: &common.Resources{ Limits: &common.ResourceItem{
Limits: &common.ResourceItem{ GPU: "1",
GPU: "1",
},
}, },
}, },
}, },
...@@ -642,12 +630,10 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) { ...@@ -642,12 +630,10 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) {
numberOfNodes: 2, numberOfNodes: 2,
multinodeDeployer: &GroveMultinodeDeployer{}, multinodeDeployer: &GroveMultinodeDeployer{},
serviceName: "test", serviceName: "test",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{ component: &v1alpha1.DynamoComponentDeploymentSharedSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{ Resources: &common.Resources{
Resources: &common.Resources{ Limits: &common.ResourceItem{
Limits: &common.ResourceItem{ GPU: "1",
GPU: "1",
},
}, },
}, },
}, },
...@@ -660,12 +646,10 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) { ...@@ -660,12 +646,10 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) {
numberOfNodes: 2, numberOfNodes: 2,
multinodeDeployer: &GroveMultinodeDeployer{}, multinodeDeployer: &GroveMultinodeDeployer{},
serviceName: "test", serviceName: "test",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{ component: &v1alpha1.DynamoComponentDeploymentSharedSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{ Resources: &common.Resources{
Resources: &common.Resources{ Requests: &common.ResourceItem{
Requests: &common.ResourceItem{ GPU: "1",
GPU: "1",
},
}, },
}, },
}, },
...@@ -678,12 +662,10 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) { ...@@ -678,12 +662,10 @@ func TestTRTLLMBackend_setupLeaderContainer(t *testing.T) {
numberOfNodes: 2, numberOfNodes: 2,
multinodeDeployer: &GroveMultinodeDeployer{}, multinodeDeployer: &GroveMultinodeDeployer{},
serviceName: "test", serviceName: "test",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{ component: &v1alpha1.DynamoComponentDeploymentSharedSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{ Resources: &common.Resources{
Resources: &common.Resources{ Requests: &common.ResourceItem{
Requests: &common.ResourceItem{ GPU: "1",
GPU: "1",
},
}, },
}, },
}, },
...@@ -874,21 +856,19 @@ func TestTRTLLMBackend_UpdateContainer_UseAsCompilationCache(t *testing.T) { ...@@ -874,21 +856,19 @@ func TestTRTLLMBackend_UpdateContainer_UseAsCompilationCache(t *testing.T) {
tests := []struct { tests := []struct {
name string name string
component *v1alpha1.DynamoComponentDeploymentOverridesSpec component *v1alpha1.DynamoComponentDeploymentSharedSpec
volumeMounts []corev1.VolumeMount volumeMounts []corev1.VolumeMount
expectNoEnvVarChanges bool expectNoEnvVarChanges bool
expectLoggedPartialSupport bool expectLoggedPartialSupport bool
}{ }{
{ {
name: "TensorRT-LLM backend with useAsCompilationCache volume mount", name: "TensorRT-LLM backend with useAsCompilationCache volume mount",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{ component: &v1alpha1.DynamoComponentDeploymentSharedSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{ VolumeMounts: []v1alpha1.VolumeMount{
VolumeMounts: []v1alpha1.VolumeMount{ {
{ Name: "trtllm-cache",
Name: "trtllm-cache", MountPoint: "/cache/trtllm",
MountPoint: "/cache/trtllm", UseAsCompilationCache: true,
UseAsCompilationCache: true,
},
}, },
}, },
}, },
...@@ -898,14 +878,12 @@ func TestTRTLLMBackend_UpdateContainer_UseAsCompilationCache(t *testing.T) { ...@@ -898,14 +878,12 @@ func TestTRTLLMBackend_UpdateContainer_UseAsCompilationCache(t *testing.T) {
}, },
{ {
name: "TensorRT-LLM backend with useAsCompilationCache at custom mount point", name: "TensorRT-LLM backend with useAsCompilationCache at custom mount point",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{ component: &v1alpha1.DynamoComponentDeploymentSharedSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{ VolumeMounts: []v1alpha1.VolumeMount{
VolumeMounts: []v1alpha1.VolumeMount{ {
{ Name: "custom-cache",
Name: "custom-cache", MountPoint: "/custom/cache/path",
MountPoint: "/custom/cache/path", UseAsCompilationCache: true,
UseAsCompilationCache: true,
},
}, },
}, },
}, },
...@@ -915,13 +893,11 @@ func TestTRTLLMBackend_UpdateContainer_UseAsCompilationCache(t *testing.T) { ...@@ -915,13 +893,11 @@ func TestTRTLLMBackend_UpdateContainer_UseAsCompilationCache(t *testing.T) {
}, },
{ {
name: "TensorRT-LLM backend without useAsCompilationCache", name: "TensorRT-LLM backend without useAsCompilationCache",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{ component: &v1alpha1.DynamoComponentDeploymentSharedSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{ VolumeMounts: []v1alpha1.VolumeMount{
VolumeMounts: []v1alpha1.VolumeMount{ {
{ Name: "regular-volume",
Name: "regular-volume", MountPoint: "/data",
MountPoint: "/data",
},
}, },
}, },
}, },
...@@ -931,10 +907,8 @@ func TestTRTLLMBackend_UpdateContainer_UseAsCompilationCache(t *testing.T) { ...@@ -931,10 +907,8 @@ func TestTRTLLMBackend_UpdateContainer_UseAsCompilationCache(t *testing.T) {
}, },
{ {
name: "TensorRT-LLM backend with no volume mounts", name: "TensorRT-LLM backend with no volume mounts",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{ component: &v1alpha1.DynamoComponentDeploymentSharedSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{ VolumeMounts: nil,
VolumeMounts: nil,
},
}, },
volumeMounts: []corev1.VolumeMount{}, volumeMounts: []corev1.VolumeMount{},
expectNoEnvVarChanges: true, expectNoEnvVarChanges: true,
......
...@@ -15,7 +15,7 @@ const ( ...@@ -15,7 +15,7 @@ const (
type VLLMBackend struct{} type VLLMBackend struct{}
func (b *VLLMBackend) UpdateContainer(container *corev1.Container, numberOfNodes int32, role Role, component *v1alpha1.DynamoComponentDeploymentOverridesSpec, serviceName string, multinodeDeployer MultinodeDeployer) { func (b *VLLMBackend) UpdateContainer(container *corev1.Container, numberOfNodes int32, role Role, component *v1alpha1.DynamoComponentDeploymentSharedSpec, serviceName string, multinodeDeployer MultinodeDeployer) {
isMultinode := numberOfNodes > 1 isMultinode := numberOfNodes > 1
if isMultinode { if isMultinode {
...@@ -60,7 +60,7 @@ func (b *VLLMBackend) UpdateContainer(container *corev1.Container, numberOfNodes ...@@ -60,7 +60,7 @@ func (b *VLLMBackend) UpdateContainer(container *corev1.Container, numberOfNodes
} }
} }
func (b *VLLMBackend) UpdatePodSpec(podSpec *corev1.PodSpec, numberOfNodes int32, role Role, component *v1alpha1.DynamoComponentDeploymentOverridesSpec, serviceName string) { func (b *VLLMBackend) UpdatePodSpec(podSpec *corev1.PodSpec, numberOfNodes int32, role Role, component *v1alpha1.DynamoComponentDeploymentSharedSpec, serviceName string) {
// do nothing // do nothing
} }
......
...@@ -16,7 +16,7 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) { ...@@ -16,7 +16,7 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) {
name string name string
numberOfNodes int32 numberOfNodes int32
role Role role Role
component *v1alpha1.DynamoComponentDeploymentOverridesSpec component *v1alpha1.DynamoComponentDeploymentSharedSpec
multinodeDeployer MultinodeDeployer multinodeDeployer MultinodeDeployer
initialArgs []string initialArgs []string
initialLivenessProbe *corev1.Probe initialLivenessProbe *corev1.Probe
...@@ -31,7 +31,7 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) { ...@@ -31,7 +31,7 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) {
name: "single node does not modify args", name: "single node does not modify args",
numberOfNodes: 1, numberOfNodes: 1,
role: RoleMain, role: RoleMain,
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{}, component: &v1alpha1.DynamoComponentDeploymentSharedSpec{},
multinodeDeployer: &GroveMultinodeDeployer{}, multinodeDeployer: &GroveMultinodeDeployer{},
initialArgs: []string{"python3", "-m", "dynamo.vllm"}, initialArgs: []string{"python3", "-m", "dynamo.vllm"},
expectNotModified: true, expectNotModified: true,
...@@ -40,7 +40,7 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) { ...@@ -40,7 +40,7 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) {
name: "multinode leader prepends ray start --head", name: "multinode leader prepends ray start --head",
numberOfNodes: 3, numberOfNodes: 3,
role: RoleLeader, role: RoleLeader,
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{}, component: &v1alpha1.DynamoComponentDeploymentSharedSpec{},
multinodeDeployer: &GroveMultinodeDeployer{}, multinodeDeployer: &GroveMultinodeDeployer{},
initialArgs: []string{"python3", "-m", "dynamo.vllm", "--model", "test"}, initialArgs: []string{"python3", "-m", "dynamo.vllm", "--model", "test"},
expectContains: []string{"ray start --head --port=6379 &&", "python3", "-m", "dynamo.vllm", "--model", "test"}, expectContains: []string{"ray start --head --port=6379 &&", "python3", "-m", "dynamo.vllm", "--model", "test"},
...@@ -50,7 +50,7 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) { ...@@ -50,7 +50,7 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) {
name: "multinode worker replaces args with ray start --block", name: "multinode worker replaces args with ray start --block",
numberOfNodes: 3, numberOfNodes: 3,
role: RoleWorker, role: RoleWorker,
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{}, component: &v1alpha1.DynamoComponentDeploymentSharedSpec{},
multinodeDeployer: &GroveMultinodeDeployer{}, multinodeDeployer: &GroveMultinodeDeployer{},
initialArgs: []string{"python3", "-m", "dynamo.vllm", "--model", "test"}, initialArgs: []string{"python3", "-m", "dynamo.vllm", "--model", "test"},
expectedArgs: []string{"ray start --address=$(GROVE_PCSG_NAME)-$(GROVE_PCSG_INDEX)-test-service-ldr-0.$(GROVE_HEADLESS_SERVICE):6379 --block"}, expectedArgs: []string{"ray start --address=$(GROVE_PCSG_NAME)-$(GROVE_PCSG_INDEX)-test-service-ldr-0.$(GROVE_HEADLESS_SERVICE):6379 --block"},
...@@ -60,7 +60,7 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) { ...@@ -60,7 +60,7 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) {
name: "multinode worker with LWS deployment type", name: "multinode worker with LWS deployment type",
numberOfNodes: 2, numberOfNodes: 2,
role: RoleWorker, role: RoleWorker,
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{}, component: &v1alpha1.DynamoComponentDeploymentSharedSpec{},
multinodeDeployer: &LWSMultinodeDeployer{}, multinodeDeployer: &LWSMultinodeDeployer{},
initialArgs: []string{"python3", "-m", "dynamo.vllm"}, initialArgs: []string{"python3", "-m", "dynamo.vllm"},
expectedArgs: []string{"ray start --address=$(LWS_LEADER_ADDRESS):6379 --block"}, expectedArgs: []string{"ray start --address=$(LWS_LEADER_ADDRESS):6379 --block"},
...@@ -70,7 +70,7 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) { ...@@ -70,7 +70,7 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) {
name: "multinode leader with no initial args", name: "multinode leader with no initial args",
numberOfNodes: 2, numberOfNodes: 2,
role: RoleLeader, role: RoleLeader,
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{}, component: &v1alpha1.DynamoComponentDeploymentSharedSpec{},
multinodeDeployer: &GroveMultinodeDeployer{}, multinodeDeployer: &GroveMultinodeDeployer{},
initialArgs: []string{}, initialArgs: []string{},
expectNotModified: true, // Should not modify empty args expectNotModified: true, // Should not modify empty args
...@@ -79,7 +79,7 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) { ...@@ -79,7 +79,7 @@ func TestVLLMBackend_UpdateContainer(t *testing.T) {
name: "multinode main role (non-leader/worker) does not modify args", name: "multinode main role (non-leader/worker) does not modify args",
numberOfNodes: 3, numberOfNodes: 3,
role: RoleMain, role: RoleMain,
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{}, component: &v1alpha1.DynamoComponentDeploymentSharedSpec{},
multinodeDeployer: &GroveMultinodeDeployer{}, multinodeDeployer: &GroveMultinodeDeployer{},
initialArgs: []string{"python3", "-m", "dynamo.frontend"}, initialArgs: []string{"python3", "-m", "dynamo.frontend"},
expectNotModified: true, expectNotModified: true,
...@@ -131,7 +131,7 @@ func TestVLLMBackend_UpdateContainer_UseAsCompilationCache(t *testing.T) { ...@@ -131,7 +131,7 @@ func TestVLLMBackend_UpdateContainer_UseAsCompilationCache(t *testing.T) {
tests := []struct { tests := []struct {
name string name string
component *v1alpha1.DynamoComponentDeploymentOverridesSpec component *v1alpha1.DynamoComponentDeploymentSharedSpec
volumeMounts []corev1.VolumeMount volumeMounts []corev1.VolumeMount
expectCacheEnvVar bool expectCacheEnvVar bool
expectCacheEnvVarName string expectCacheEnvVarName string
...@@ -139,14 +139,12 @@ func TestVLLMBackend_UpdateContainer_UseAsCompilationCache(t *testing.T) { ...@@ -139,14 +139,12 @@ func TestVLLMBackend_UpdateContainer_UseAsCompilationCache(t *testing.T) {
}{ }{
{ {
name: "VLLM backend with useAsCompilationCache volume mount", name: "VLLM backend with useAsCompilationCache volume mount",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{ component: &v1alpha1.DynamoComponentDeploymentSharedSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{ VolumeMounts: []v1alpha1.VolumeMount{
VolumeMounts: []v1alpha1.VolumeMount{ {
{ Name: "vllm-cache",
Name: "vllm-cache", MountPoint: "/root/.cache/vllm",
MountPoint: "/root/.cache/vllm", UseAsCompilationCache: true,
UseAsCompilationCache: true,
},
}, },
}, },
}, },
...@@ -157,14 +155,12 @@ func TestVLLMBackend_UpdateContainer_UseAsCompilationCache(t *testing.T) { ...@@ -157,14 +155,12 @@ func TestVLLMBackend_UpdateContainer_UseAsCompilationCache(t *testing.T) {
}, },
{ {
name: "VLLM backend with useAsCompilationCache at custom mount point", name: "VLLM backend with useAsCompilationCache at custom mount point",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{ component: &v1alpha1.DynamoComponentDeploymentSharedSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{ VolumeMounts: []v1alpha1.VolumeMount{
VolumeMounts: []v1alpha1.VolumeMount{ {
{ Name: "custom-cache",
Name: "custom-cache", MountPoint: "/custom/cache/path",
MountPoint: "/custom/cache/path", UseAsCompilationCache: true,
UseAsCompilationCache: true,
},
}, },
}, },
}, },
...@@ -175,13 +171,11 @@ func TestVLLMBackend_UpdateContainer_UseAsCompilationCache(t *testing.T) { ...@@ -175,13 +171,11 @@ func TestVLLMBackend_UpdateContainer_UseAsCompilationCache(t *testing.T) {
}, },
{ {
name: "VLLM backend without useAsCompilationCache", name: "VLLM backend without useAsCompilationCache",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{ component: &v1alpha1.DynamoComponentDeploymentSharedSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{ VolumeMounts: []v1alpha1.VolumeMount{
VolumeMounts: []v1alpha1.VolumeMount{ {
{ Name: "regular-volume",
Name: "regular-volume", MountPoint: "/data",
MountPoint: "/data",
},
}, },
}, },
}, },
...@@ -190,10 +184,8 @@ func TestVLLMBackend_UpdateContainer_UseAsCompilationCache(t *testing.T) { ...@@ -190,10 +184,8 @@ func TestVLLMBackend_UpdateContainer_UseAsCompilationCache(t *testing.T) {
}, },
{ {
name: "VLLM backend with no volume mounts", name: "VLLM backend with no volume mounts",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{ component: &v1alpha1.DynamoComponentDeploymentSharedSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{ VolumeMounts: nil,
VolumeMounts: nil,
},
}, },
volumeMounts: []corev1.VolumeMount{}, volumeMounts: []corev1.VolumeMount{},
expectCacheEnvVar: false, expectCacheEnvVar: false,
......
...@@ -125,7 +125,7 @@ func GenerateDynamoComponentsDeployments(ctx context.Context, parentDynamoGraphD ...@@ -125,7 +125,7 @@ func GenerateDynamoComponentsDeployments(ctx context.Context, parentDynamoGraphD
} }
for componentName, component := range parentDynamoGraphDeployment.Spec.Services { for componentName, component := range parentDynamoGraphDeployment.Spec.Services {
deployment := &v1alpha1.DynamoComponentDeployment{} deployment := &v1alpha1.DynamoComponentDeployment{}
deployment.Spec.DynamoComponentDeploymentSharedSpec = component.DynamoComponentDeploymentSharedSpec deployment.Spec.DynamoComponentDeploymentSharedSpec = *component
deployment.Name = GetDynamoComponentName(parentDynamoGraphDeployment, componentName) deployment.Name = GetDynamoComponentName(parentDynamoGraphDeployment, componentName)
deployment.Spec.BackendFramework = parentDynamoGraphDeployment.Spec.BackendFramework deployment.Spec.BackendFramework = parentDynamoGraphDeployment.Spec.BackendFramework
deployment.Namespace = parentDynamoGraphDeployment.Namespace deployment.Namespace = parentDynamoGraphDeployment.Namespace
...@@ -597,18 +597,18 @@ const ( ...@@ -597,18 +597,18 @@ const (
// Backend interface for modular backend logic // Backend interface for modular backend logic
// Each backend (SGLang, VLLM, etc.) implements this interface // Each backend (SGLang, VLLM, etc.) implements this interface
type Backend interface { type Backend interface {
UpdateContainer(container *corev1.Container, numberOfNodes int32, role Role, component *v1alpha1.DynamoComponentDeploymentOverridesSpec, serviceName string, multinodeDeployer MultinodeDeployer) UpdateContainer(container *corev1.Container, numberOfNodes int32, role Role, component *v1alpha1.DynamoComponentDeploymentSharedSpec, serviceName string, multinodeDeployer MultinodeDeployer)
UpdatePodSpec(podSpec *corev1.PodSpec, numberOfNodes int32, role Role, component *v1alpha1.DynamoComponentDeploymentOverridesSpec, serviceName string) UpdatePodSpec(podSpec *corev1.PodSpec, numberOfNodes int32, role Role, component *v1alpha1.DynamoComponentDeploymentSharedSpec, serviceName string)
} }
// NoopBackend does no processing - used for non-worker components like frontend, planner, router // NoopBackend does no processing - used for non-worker components like frontend, planner, router
type NoopBackend struct{} type NoopBackend struct{}
func (b *NoopBackend) UpdateContainer(container *corev1.Container, numberOfNodes int32, role Role, component *v1alpha1.DynamoComponentDeploymentOverridesSpec, serviceName string, multinodeDeployer MultinodeDeployer) { func (b *NoopBackend) UpdateContainer(container *corev1.Container, numberOfNodes int32, role Role, component *v1alpha1.DynamoComponentDeploymentSharedSpec, serviceName string, multinodeDeployer MultinodeDeployer) {
// No-op: frontend, planner, router, etc. don't need backend-specific processing // No-op: frontend, planner, router, etc. don't need backend-specific processing
} }
func (b *NoopBackend) UpdatePodSpec(podSpec *corev1.PodSpec, numberOfNodes int32, role Role, component *v1alpha1.DynamoComponentDeploymentOverridesSpec, serviceName string) { func (b *NoopBackend) UpdatePodSpec(podSpec *corev1.PodSpec, numberOfNodes int32, role Role, component *v1alpha1.DynamoComponentDeploymentSharedSpec, serviceName string) {
// No-op: frontend, planner, router, etc. don't need backend-specific processing // No-op: frontend, planner, router, etc. don't need backend-specific processing
} }
...@@ -691,7 +691,7 @@ func addStandardEnvVars(container *corev1.Container, controllerConfig controller ...@@ -691,7 +691,7 @@ func addStandardEnvVars(container *corev1.Container, controllerConfig controller
// //
//nolint:gocyclo //nolint:gocyclo
func GenerateBasePodSpec( func GenerateBasePodSpec(
component *v1alpha1.DynamoComponentDeploymentOverridesSpec, component *v1alpha1.DynamoComponentDeploymentSharedSpec,
backendFramework BackendFramework, backendFramework BackendFramework,
secretsRetriever SecretsRetriever, secretsRetriever SecretsRetriever,
parentGraphDeploymentName string, parentGraphDeploymentName string,
...@@ -861,7 +861,7 @@ func setMetricsLabels(labels map[string]string, dynamoGraphDeployment *v1alpha1. ...@@ -861,7 +861,7 @@ func setMetricsLabels(labels map[string]string, dynamoGraphDeployment *v1alpha1.
labels[commonconsts.KubeLabelMetricsEnabled] = commonconsts.KubeLabelValueTrue labels[commonconsts.KubeLabelMetricsEnabled] = commonconsts.KubeLabelValueTrue
} }
func generateComponentContext(component *v1alpha1.DynamoComponentDeploymentOverridesSpec, parentGraphDeploymentName string, namespace string, numberOfNodes int32) ComponentContext { func generateComponentContext(component *v1alpha1.DynamoComponentDeploymentSharedSpec, parentGraphDeploymentName string, namespace string, numberOfNodes int32) ComponentContext {
componentContext := ComponentContext{ componentContext := ComponentContext{
numberOfNodes: numberOfNodes, numberOfNodes: numberOfNodes,
ParentGraphDeploymentName: parentGraphDeploymentName, ParentGraphDeploymentName: parentGraphDeploymentName,
...@@ -875,7 +875,7 @@ func generateComponentContext(component *v1alpha1.DynamoComponentDeploymentOverr ...@@ -875,7 +875,7 @@ func generateComponentContext(component *v1alpha1.DynamoComponentDeploymentOverr
// GeneratePodSpecForComponent creates a PodSpec for Grove deployments (simplified wrapper) // GeneratePodSpecForComponent creates a PodSpec for Grove deployments (simplified wrapper)
func GeneratePodSpecForComponent( func GeneratePodSpecForComponent(
component *v1alpha1.DynamoComponentDeploymentOverridesSpec, component *v1alpha1.DynamoComponentDeploymentSharedSpec,
backendFramework BackendFramework, backendFramework BackendFramework,
secretsRetriever SecretsRetriever, secretsRetriever SecretsRetriever,
dynamoDeployment *v1alpha1.DynamoGraphDeployment, dynamoDeployment *v1alpha1.DynamoGraphDeployment,
...@@ -1004,7 +1004,7 @@ func GenerateGrovePodCliqueSet( ...@@ -1004,7 +1004,7 @@ func GenerateGrovePodCliqueSet(
return controller_common.CanonicalizePodCliqueSet(gangSet), nil return controller_common.CanonicalizePodCliqueSet(gangSet), nil
} }
func generateLabels(component *v1alpha1.DynamoComponentDeploymentOverridesSpec, dynamoDeployment *v1alpha1.DynamoGraphDeployment, componentName string) (map[string]string, error) { func generateLabels(component *v1alpha1.DynamoComponentDeploymentSharedSpec, dynamoDeployment *v1alpha1.DynamoGraphDeployment, componentName string) (map[string]string, error) {
labels := make(map[string]string) labels := make(map[string]string)
labels[commonconsts.KubeLabelDynamoSelector] = GetDynamoComponentName(dynamoDeployment, componentName) labels[commonconsts.KubeLabelDynamoSelector] = GetDynamoComponentName(dynamoDeployment, componentName)
labels[commonconsts.KubeLabelDynamoGraphDeploymentName] = dynamoDeployment.Name labels[commonconsts.KubeLabelDynamoGraphDeploymentName] = dynamoDeployment.Name
...@@ -1033,7 +1033,7 @@ func generateLabels(component *v1alpha1.DynamoComponentDeploymentOverridesSpec, ...@@ -1033,7 +1033,7 @@ func generateLabels(component *v1alpha1.DynamoComponentDeploymentOverridesSpec,
return labels, nil return labels, nil
} }
func generateAnnotations(component *v1alpha1.DynamoComponentDeploymentOverridesSpec) (map[string]string, error) { func generateAnnotations(component *v1alpha1.DynamoComponentDeploymentSharedSpec) (map[string]string, error) {
annotations := make(map[string]string) annotations := make(map[string]string)
if component.Annotations != nil { if component.Annotations != nil {
err := mergo.Merge(&annotations, component.Annotations, mergo.WithOverride) err := mergo.Merge(&annotations, component.Annotations, mergo.WithOverride)
...@@ -1147,7 +1147,7 @@ func determineBackendFramework( ...@@ -1147,7 +1147,7 @@ func determineBackendFramework(
// 3. Return error if worker has neither detection nor explicit config // 3. Return error if worker has neither detection nor explicit config
// Also validates consistency between detected and explicit if both exist // Also validates consistency between detected and explicit if both exist
func getBackendFrameworkFromComponent( func getBackendFrameworkFromComponent(
component *v1alpha1.DynamoComponentDeploymentOverridesSpec, component *v1alpha1.DynamoComponentDeploymentSharedSpec,
dynamoDeployment *v1alpha1.DynamoGraphDeployment, dynamoDeployment *v1alpha1.DynamoGraphDeployment,
) (BackendFramework, error) { ) (BackendFramework, error) {
// Extract command/args from component // Extract command/args from component
...@@ -1170,10 +1170,8 @@ func getBackendFrameworkFromComponent( ...@@ -1170,10 +1170,8 @@ func getBackendFrameworkFromComponent(
// ConvertDynamoComponentDeploymentToSpec converts a DynamoComponentDeployment to our component spec interface // ConvertDynamoComponentDeploymentToSpec converts a DynamoComponentDeployment to our component spec interface
// This is a helper for the controller to use our backend logic // This is a helper for the controller to use our backend logic
func ConvertDynamoComponentDeploymentToSpec(dynComponent *v1alpha1.DynamoComponentDeployment) *v1alpha1.DynamoComponentDeploymentOverridesSpec { func ConvertDynamoComponentDeploymentToSpec(dynComponent *v1alpha1.DynamoComponentDeployment) *v1alpha1.DynamoComponentDeploymentSharedSpec {
return &v1alpha1.DynamoComponentDeploymentOverridesSpec{ return dynComponent.Spec.DynamoComponentDeploymentSharedSpec.DeepCopy()
DynamoComponentDeploymentSharedSpec: *dynComponent.Spec.DynamoComponentDeploymentSharedSpec.DeepCopy(),
}
} }
// GetBackendFrameworkFromDynamoComponent determines backend framework for a DynamoComponentDeployment // GetBackendFrameworkFromDynamoComponent determines backend framework for a DynamoComponentDeployment
......
...@@ -85,6 +85,7 @@ DynamoComponentDeployment is the Schema for the dynamocomponentdeployments API ...@@ -85,6 +85,7 @@ DynamoComponentDeployment is the Schema for the dynamocomponentdeployments API
_Appears in:_ _Appears in:_
- [DynamoComponentDeploymentSpec](#dynamocomponentdeploymentspec) - [DynamoComponentDeploymentSpec](#dynamocomponentdeploymentspec)
- [DynamoGraphDeploymentSpec](#dynamographdeploymentspec)
| Field | Description | Default | Validation | | Field | Description | Default | Validation |
| --- | --- | --- | --- | | --- | --- | --- | --- |
...@@ -92,12 +93,13 @@ _Appears in:_ ...@@ -92,12 +93,13 @@ _Appears in:_
| `labels` _object (keys:string, values:string)_ | Labels to add to generated Kubernetes resources for this component. | | | | `labels` _object (keys:string, values:string)_ | Labels to add to generated Kubernetes resources for this component. | | |
| `serviceName` _string_ | The name of the component | | | | `serviceName` _string_ | The name of the component | | |
| `componentType` _string_ | ComponentType indicates the role of this component (for example, "main"). | | | | `componentType` _string_ | ComponentType indicates the role of this component (for example, "main"). | | |
| `subComponentType` _string_ | SubComponentType indicates the sub-role of this component (for example, "prefill"). | | |
| `dynamoNamespace` _string_ | Dynamo namespace of the service (allows to override the Dynamo namespace of the service defined in annotations inside the Dynamo archive) | | | | `dynamoNamespace` _string_ | Dynamo namespace of the service (allows to override the Dynamo namespace of the service defined in annotations inside the Dynamo archive) | | |
| `resources` _[Resources](#resources)_ | Resources requested and limits for this component, including CPU, memory,<br />GPUs/devices, and any runtime-specific resources. | | | | `resources` _[Resources](#resources)_ | Resources requested and limits for this component, including CPU, memory,<br />GPUs/devices, and any runtime-specific resources. | | |
| `autoscaling` _[Autoscaling](#autoscaling)_ | Autoscaling config for this component (replica range, target utilization, etc.). | | | | `autoscaling` _[Autoscaling](#autoscaling)_ | Autoscaling config for this component (replica range, target utilization, etc.). | | |
| `envs` _[EnvVar](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#envvar-v1-core) array_ | Envs defines additional environment variables to inject into the component containers. | | | | `envs` _[EnvVar](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#envvar-v1-core) array_ | Envs defines additional environment variables to inject into the component containers. | | |
| `envFromSecret` _string_ | EnvFromSecret references a Secret whose key/value pairs will be exposed as<br />environment variables in the component containers. | | | | `envFromSecret` _string_ | EnvFromSecret references a Secret whose key/value pairs will be exposed as<br />environment variables in the component containers. | | |
| `pvc` _[PVC](#pvc)_ | PVC config describing volumes to be mounted by the component. | | | | `volumeMounts` _[VolumeMount](#volumemount) array_ | VolumeMounts references PVCs defined at the top level for volumes to be mounted by the component. | | |
| `ingress` _[IngressSpec](#ingressspec)_ | Ingress config to expose the component outside the cluster (or through a service mesh). | | | | `ingress` _[IngressSpec](#ingressspec)_ | Ingress config to expose the component outside the cluster (or through a service mesh). | | |
| `sharedMemory` _[SharedMemorySpec](#sharedmemoryspec)_ | SharedMemory controls the tmpfs mounted at /dev/shm (enable/disable and size). | | | | `sharedMemory` _[SharedMemorySpec](#sharedmemoryspec)_ | SharedMemory controls the tmpfs mounted at /dev/shm (enable/disable and size). | | |
| `extraPodMetadata` _[ExtraPodMetadata](#extrapodmetadata)_ | ExtraPodMetadata adds labels/annotations to the created Pods. | | | | `extraPodMetadata` _[ExtraPodMetadata](#extrapodmetadata)_ | ExtraPodMetadata adds labels/annotations to the created Pods. | | |
...@@ -121,19 +123,18 @@ _Appears in:_ ...@@ -121,19 +123,18 @@ _Appears in:_
| Field | Description | Default | Validation | | Field | Description | Default | Validation |
| --- | --- | --- | --- | | --- | --- | --- | --- |
| `dynamoComponent` _string_ | DynamoComponent selects the Dynamo component from the archive to deploy.<br />Typically corresponds to a component defined in the packaged Dynamo artifacts. | | |
| `dynamoTag` _string_ | contains the tag of the DynamoComponent: for example, "my_package:MyService" | | |
| `backendFramework` _string_ | BackendFramework specifies the backend framework (e.g., "sglang", "vllm", "trtllm") | | Enum: [sglang vllm trtllm] <br /> | | `backendFramework` _string_ | BackendFramework specifies the backend framework (e.g., "sglang", "vllm", "trtllm") | | Enum: [sglang vllm trtllm] <br /> |
| `annotations` _object (keys:string, values:string)_ | Annotations to add to generated Kubernetes resources for this component<br />(such as Pod, Service, and Ingress when applicable). | | | | `annotations` _object (keys:string, values:string)_ | Annotations to add to generated Kubernetes resources for this component<br />(such as Pod, Service, and Ingress when applicable). | | |
| `labels` _object (keys:string, values:string)_ | Labels to add to generated Kubernetes resources for this component. | | | | `labels` _object (keys:string, values:string)_ | Labels to add to generated Kubernetes resources for this component. | | |
| `serviceName` _string_ | The name of the component | | | | `serviceName` _string_ | The name of the component | | |
| `componentType` _string_ | ComponentType indicates the role of this component (for example, "main"). | | | | `componentType` _string_ | ComponentType indicates the role of this component (for example, "main"). | | |
| `subComponentType` _string_ | SubComponentType indicates the sub-role of this component (for example, "prefill"). | | |
| `dynamoNamespace` _string_ | Dynamo namespace of the service (allows to override the Dynamo namespace of the service defined in annotations inside the Dynamo archive) | | | | `dynamoNamespace` _string_ | Dynamo namespace of the service (allows to override the Dynamo namespace of the service defined in annotations inside the Dynamo archive) | | |
| `resources` _[Resources](#resources)_ | Resources requested and limits for this component, including CPU, memory,<br />GPUs/devices, and any runtime-specific resources. | | | | `resources` _[Resources](#resources)_ | Resources requested and limits for this component, including CPU, memory,<br />GPUs/devices, and any runtime-specific resources. | | |
| `autoscaling` _[Autoscaling](#autoscaling)_ | Autoscaling config for this component (replica range, target utilization, etc.). | | | | `autoscaling` _[Autoscaling](#autoscaling)_ | Autoscaling config for this component (replica range, target utilization, etc.). | | |
| `envs` _[EnvVar](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#envvar-v1-core) array_ | Envs defines additional environment variables to inject into the component containers. | | | | `envs` _[EnvVar](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#envvar-v1-core) array_ | Envs defines additional environment variables to inject into the component containers. | | |
| `envFromSecret` _string_ | EnvFromSecret references a Secret whose key/value pairs will be exposed as<br />environment variables in the component containers. | | | | `envFromSecret` _string_ | EnvFromSecret references a Secret whose key/value pairs will be exposed as<br />environment variables in the component containers. | | |
| `pvc` _[PVC](#pvc)_ | PVC config describing volumes to be mounted by the component. | | | | `volumeMounts` _[VolumeMount](#volumemount) array_ | VolumeMounts references PVCs defined at the top level for volumes to be mounted by the component. | | |
| `ingress` _[IngressSpec](#ingressspec)_ | Ingress config to expose the component outside the cluster (or through a service mesh). | | | | `ingress` _[IngressSpec](#ingressspec)_ | Ingress config to expose the component outside the cluster (or through a service mesh). | | |
| `sharedMemory` _[SharedMemorySpec](#sharedmemoryspec)_ | SharedMemory controls the tmpfs mounted at /dev/shm (enable/disable and size). | | | | `sharedMemory` _[SharedMemorySpec](#sharedmemoryspec)_ | SharedMemory controls the tmpfs mounted at /dev/shm (enable/disable and size). | | |
| `extraPodMetadata` _[ExtraPodMetadata](#extrapodmetadata)_ | ExtraPodMetadata adds labels/annotations to the created Pods. | | | | `extraPodMetadata` _[ExtraPodMetadata](#extrapodmetadata)_ | ExtraPodMetadata adds labels/annotations to the created Pods. | | |
...@@ -176,8 +177,9 @@ _Appears in:_ ...@@ -176,8 +177,9 @@ _Appears in:_
| Field | Description | Default | Validation | | Field | Description | Default | Validation |
| --- | --- | --- | --- | | --- | --- | --- | --- |
| `dynamoGraph` _string_ | DynamoGraph selects the graph (workflow/topology) to deploy. This must match<br />a graph name packaged with the Dynamo archive. | | | | `pvcs` _[PVC](#pvc) array_ | PVCs defines a list of persistent volume claims that can be referenced by components.<br />Each PVC must have a unique name that can be referenced in component specifications. | | Optional: {} <br /> |
| `envs` _[EnvVar](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#envvar-v1-core) array_ | Envs are environment variables applied to all services in the graph unless<br />overridden by service-specific configuration. | | Optional: {} <br /> | | `services` _object (keys:string, values:[DynamoComponentDeploymentSharedSpec](#dynamocomponentdeploymentsharedspec))_ | Services are the services to deploy as part of this deployment. | | Optional: {} <br /> |
| `envs` _[EnvVar](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#envvar-v1-core) array_ | Envs are environment variables applied to all services in the deployment unless<br />overridden by service-specific configuration. | | Optional: {} <br /> |
| `backendFramework` _string_ | BackendFramework specifies the backend framework (e.g., "sglang", "vllm", "trtllm"). | | Enum: [sglang vllm trtllm] <br /> | | `backendFramework` _string_ | BackendFramework specifies the backend framework (e.g., "sglang", "vllm", "trtllm"). | | Enum: [sglang vllm trtllm] <br /> |
...@@ -266,17 +268,15 @@ _Appears in:_ ...@@ -266,17 +268,15 @@ _Appears in:_
_Appears in:_ _Appears in:_
- [DynamoComponentDeploymentSharedSpec](#dynamocomponentdeploymentsharedspec) - [DynamoGraphDeploymentSpec](#dynamographdeploymentspec)
- [DynamoComponentDeploymentSpec](#dynamocomponentdeploymentspec)
| Field | Description | Default | Validation | | Field | Description | Default | Validation |
| --- | --- | --- | --- | | --- | --- | --- | --- |
| `create` _boolean_ | Create indicates to create a new PVC | | | | `create` _boolean_ | Create indicates to create a new PVC | | |
| `name` _string_ | Name is the name of the PVC | | | | `name` _string_ | Name is the name of the PVC | | Required: {} <br /> |
| `storageClass` _string_ | StorageClass to be used for PVC creation. Leave it as empty if the PVC is already created. | | | | `storageClass` _string_ | StorageClass to be used for PVC creation. Required when create is true. | | |
| `size` _[Quantity](#quantity)_ | Size of the NIM cache in Gi, used during PVC creation | | | | `size` _[Quantity](#quantity)_ | Size of the volume in Gi, used during PVC creation. Required when create is true. | | |
| `volumeAccessMode` _[PersistentVolumeAccessMode](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#persistentvolumeaccessmode-v1-core)_ | VolumeAccessMode is the volume access mode of the PVC | | | | `volumeAccessMode` _[PersistentVolumeAccessMode](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#persistentvolumeaccessmode-v1-core)_ | VolumeAccessMode is the volume access mode of the PVC. Required when create is true. | | |
| `mountPoint` _string_ | | | |
#### SharedMemorySpec #### SharedMemorySpec
...@@ -297,3 +297,247 @@ _Appears in:_ ...@@ -297,3 +297,247 @@ _Appears in:_
| `size` _[Quantity](#quantity)_ | | | | | `size` _[Quantity](#quantity)_ | | | |
#### VolumeMount
VolumeMount references a PVC defined at the top level for volumes to be mounted by the component
_Appears in:_
- [DynamoComponentDeploymentSharedSpec](#dynamocomponentdeploymentsharedspec)
- [DynamoComponentDeploymentSpec](#dynamocomponentdeploymentspec)
| Field | Description | Default | Validation |
| --- | --- | --- | --- |
| `name` _string_ | Name references a PVC name defined in the top-level PVCs map | | Required: {} <br /> |
| `mountPoint` _string_ | MountPoint specifies where to mount the volume.<br />If useAsCompilationCache is true and mountPoint is not specified,<br />a backend-specific default will be used. | | |
| `useAsCompilationCache` _boolean_ | UseAsCompilationCache indicates this volume should be used as a compilation cache.<br />When true, backend-specific environment variables will be set and default mount points may be used. | false | |
# Operator Default Values Injection
The Dynamo operator automatically applies default values to various fields when they are not explicitly specified in your deployments. These defaults include:
- **Health Probes**: Startup, liveness, and readiness probes are configured differently for frontend, worker, and planner components. For example, worker components receive a startup probe with a 2-hour timeout (720 failures × 10 seconds) to accommodate long model loading times.
- **Shared Memory**: All components receive an 8Gi shared memory volume mounted at `/dev/shm` by default (can be disabled or resized via the `sharedMemory` field).
- **Environment Variables**: Components automatically receive environment variables like `DYN_NAMESPACE`, `DYN_PARENT_DGD_K8S_NAME`, `DYNAMO_PORT`, and backend-specific variables.
- **Pod Configuration**: Default `terminationGracePeriodSeconds` of 60 seconds and `restartPolicy: Always`.
- **Autoscaling**: When enabled without explicit metrics, defaults to CPU-based autoscaling with 80% target utilization.
- **Backend-Specific Behavior**: For multinode deployments, probes are automatically modified or removed for worker nodes depending on the backend framework (VLLM, SGLang, or TensorRT-LLM).
## Pod Specification Defaults
All components receive the following pod-level defaults unless overridden:
- **`terminationGracePeriodSeconds`**: `60` seconds
- **`restartPolicy`**: `Always`
## Shared Memory Configuration
Shared memory is enabled by default for all components:
- **Enabled**: `true` (unless explicitly disabled via `sharedMemory.disabled`)
- **Size**: `8Gi`
- **Mount Path**: `/dev/shm`
- **Volume Type**: `emptyDir` with `memory` medium
To disable shared memory or customize the size, use the `sharedMemory` field in your component specification.
## Health Probes by Component Type
The operator applies different default health probes based on the component type.
### Frontend Components
Frontend components receive the following probe configurations:
**Liveness Probe:**
- **Type**: HTTP GET
- **Path**: `/health`
- **Port**: `http` (8000)
- **Initial Delay**: 60 seconds
- **Period**: 60 seconds
- **Timeout**: 30 seconds
- **Failure Threshold**: 10
**Readiness Probe:**
- **Type**: Exec command
- **Command**: `curl -s http://localhost:${DYNAMO_PORT}/health | jq -e ".status == \"healthy\""`
- **Initial Delay**: 60 seconds
- **Period**: 60 seconds
- **Timeout**: 30 seconds
- **Failure Threshold**: 10
### Worker Components
Worker components receive the following probe configurations:
**Liveness Probe:**
- **Type**: HTTP GET
- **Path**: `/live`
- **Port**: `system` (9090)
- **Period**: 5 seconds
- **Timeout**: 30 seconds
- **Failure Threshold**: 1
**Readiness Probe:**
- **Type**: HTTP GET
- **Path**: `/health`
- **Port**: `system` (9090)
- **Period**: 10 seconds
- **Timeout**: 30 seconds
- **Failure Threshold**: 60
**Startup Probe:**
- **Type**: HTTP GET
- **Path**: `/live`
- **Port**: `system` (9090)
- **Period**: 10 seconds
- **Timeout**: 5 seconds
- **Failure Threshold**: 720 (allows up to 2 hours for startup: 10s × 720 = 7200s)
:::{note}
For larger models (typically >70B parameters) or slower storage systems, you may need to increase the `failureThreshold` to allow more time for model loading. Calculate the required threshold based on your expected startup time: `failureThreshold = (expected_startup_seconds / period)`. Override the startup probe in your component specification if the default 2-hour window is insufficient.
:::
### Multinode Deployment Probe Modifications
For multinode deployments, the operator modifies probes based on the backend framework and node role:
#### VLLM Backend
- **Worker nodes**: All probes (liveness, readiness, startup) are removed
#### SGLang Backend
- **Worker nodes**: All probes (liveness, readiness, startup) are removed
#### TensorRT-LLM Backend
- **Leader nodes**: All probes remain unchanged
- **Worker nodes**:
- Liveness and startup probes are removed
- Readiness probe is replaced with a TCP socket check on SSH port (2222):
- **Initial Delay**: 20 seconds
- **Period**: 20 seconds
- **Timeout**: 5 seconds
- **Failure Threshold**: 10
## Environment Variables
The operator automatically injects environment variables based on component type and configuration:
### All Components
- **`DYN_NAMESPACE`**: The Dynamo namespace for the component
- **`DYN_PARENT_DGD_K8S_NAME`**: The parent DynamoGraphDeployment Kubernetes resource name
- **`DYN_PARENT_DGD_K8S_NAMESPACE`**: The parent DynamoGraphDeployment Kubernetes namespace
### Frontend Components
- **`DYNAMO_PORT`**: `8000`
- **`DYN_HTTP_PORT`**: `8000`
### Worker Components
- **`DYN_SYSTEM_ENABLED`**: `true`
- **`DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS`**: `["generate"]`
- **`DYN_SYSTEM_PORT`**: `9090`
### Planner Components
- **`PLANNER_PROMETHEUS_PORT`**: `9085`
### VLLM Backend (with compilation cache)
When a volume mount is configured with `useAsCompilationCache: true`:
- **`VLLM_CACHE_ROOT`**: Set to the mount point of the cache volume
## Service Account
Planner components automatically receive the following service account:
- **`serviceAccountName`**: `planner-serviceaccount`
## Image Pull Secrets
The operator automatically discovers and injects image pull secrets for container images. When a component specifies a container image, the operator:
1. Scans all Kubernetes secrets of type `kubernetes.io/dockerconfigjson` in the component's namespace
2. Extracts the docker registry server URLs from each secret's authentication configuration
3. Matches the container image's registry host against the discovered registry URLs
4. Automatically injects matching secrets as `imagePullSecrets` in the pod specification
This eliminates the need to manually specify image pull secrets for each component. The operator maintains an internal index of docker secrets and their associated registries, refreshing this index periodically.
**To disable automatic image pull secret discovery** for a specific component, add the following annotation:
```yaml
annotations:
nvidia.com/disable-image-pull-secret-discovery: "true"
```
## Autoscaling Defaults
When autoscaling is enabled but no metrics are specified, the operator applies:
- **Default Metric**: CPU utilization
- **Target Average Utilization**: `80%`
## Port Configurations
Default container ports are configured based on component type:
### Frontend Components
- **Port**: 8000
- **Protocol**: TCP
- **Name**: `http`
### Worker Components
- **Port**: 9090
- **Protocol**: TCP
- **Name**: `system`
### Planner Components
- **Port**: 9085
- **Protocol**: TCP
- **Name**: `metrics`
## Backend-Specific Configurations
### VLLM
- **Ray Head Port**: 6379 (for multinode deployments)
### SGLang
- **Distribution Init Port**: 29500 (for multinode deployments)
### TensorRT-LLM
- **SSH Port**: 2222 (for multinode MPI communication)
- **OpenMPI Environment**: `OMPI_MCA_orte_keep_fqdn_hostnames=1`
## Implementation Reference
For users who want to understand the implementation details or contribute to the operator, the default values described in this document are set in the following source files:
- **Health Probes & Pod Specifications**: [`internal/dynamo/graph.go`](https://github.com/ai-dynamo/dynamo/blob/main/deploy/cloud/operator/internal/dynamo/graph.go) - Contains the main logic for applying default probes, environment variables, shared memory, and pod configurations
- **Component-Specific Defaults**:
- [`internal/dynamo/component_frontend.go`](https://github.com/ai-dynamo/dynamo/blob/main/deploy/cloud/operator/internal/dynamo/component_frontend.go)
- [`internal/dynamo/component_worker.go`](https://github.com/ai-dynamo/dynamo/blob/main/deploy/cloud/operator/internal/dynamo/component_worker.go)
- [`internal/dynamo/component_planner.go`](https://github.com/ai-dynamo/dynamo/blob/main/deploy/cloud/operator/internal/dynamo/component_planner.go)
- **Image Pull Secrets**: [`internal/secrets/docker.go`](https://github.com/ai-dynamo/dynamo/blob/main/deploy/cloud/operator/internal/secrets/docker.go) - Implements the docker secret indexer and automatic discovery
- **Backend-Specific Behavior**:
- [`internal/dynamo/backend_vllm.go`](https://github.com/ai-dynamo/dynamo/blob/main/deploy/cloud/operator/internal/dynamo/backend_vllm.go)
- [`internal/dynamo/backend_sglang.go`](https://github.com/ai-dynamo/dynamo/blob/main/deploy/cloud/operator/internal/dynamo/backend_sglang.go)
- [`internal/dynamo/backend_trtllm.go`](https://github.com/ai-dynamo/dynamo/blob/main/deploy/cloud/operator/internal/dynamo/backend_trtllm.go)
- **Constants & Annotations**: [`internal/consts/consts.go`](https://github.com/ai-dynamo/dynamo/blob/main/deploy/cloud/operator/internal/consts/consts.go) - Defines annotation keys and other constants
## Notes
- All these defaults can be overridden by explicitly specifying values in your DynamoComponentDeployment or DynamoGraphDeployment resources
- User-specified probes (via `livenessProbe`, `readinessProbe`, or `startupProbe` fields) take precedence over operator defaults
- For multinode deployments, some defaults are modified or removed as described above to accommodate distributed execution patterns
- The `extraPodSpec.mainContainer` field can be used to override probe configurations set by the operator
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment