Unverified Commit ec296823 authored by Thomas Montfort's avatar Thomas Montfort Committed by GitHub
Browse files

feat(operator): dynamoNamespace now becomes k8s ns + DGD name (#4126)

parent 1ab2fe1b
......@@ -678,7 +678,9 @@ spec:
description: ComponentType indicates the role of this component (for example, "main").
type: string
dynamoNamespace:
description: Dynamo namespace of the service (allows to override the Dynamo namespace of the service defined in annotations inside the Dynamo archive)
description: |-
DynamoNamespace is deprecated and will be removed in a future version.
The DGD Kubernetes namespace and DynamoGraphDeployment name are used to construct the Dynamo namespace for each component
type: string
envFromSecret:
description: |-
......@@ -9808,6 +9810,9 @@ spec:
x-kubernetes-list-type: map
required: []
type: object
globalDynamoNamespace:
description: GlobalDynamoNamespace indicates that the Component will be placed in the global Dynamo namespace
type: boolean
ingress:
description: Ingress config to expose the component outside the cluster (or through a service mesh).
properties:
......
......@@ -206,6 +206,7 @@ spec:
x-kubernetes-validations:
- message: When create is true, size, storageClass, and volumeAccessMode are required
rule: '!has(self.create) || self.create == false || (has(self.size) && has(self.storageClass) && has(self.volumeAccessMode))'
maxItems: 100
type: array
services:
additionalProperties:
......@@ -812,7 +813,9 @@ spec:
description: ComponentType indicates the role of this component (for example, "main").
type: string
dynamoNamespace:
description: Dynamo namespace of the service (allows to override the Dynamo namespace of the service defined in annotations inside the Dynamo archive)
description: |-
DynamoNamespace is deprecated and will be removed in a future version.
The DGD Kubernetes namespace and DynamoGraphDeployment name are used to construct the Dynamo namespace for each component
type: string
envFromSecret:
description: |-
......@@ -9942,6 +9945,9 @@ spec:
x-kubernetes-list-type: map
required: []
type: object
globalDynamoNamespace:
description: GlobalDynamoNamespace indicates that the Component will be placed in the global Dynamo namespace
type: boolean
ingress:
description: Ingress config to expose the component outside the cluster (or through a service mesh).
properties:
......@@ -10404,6 +10410,7 @@ spec:
type: array
type: object
description: Services are the services to deploy as part of this deployment.
maxProperties: 25
type: object
type: object
status:
......
......@@ -64,9 +64,14 @@ type DynamoComponentDeploymentSharedSpec struct {
// SubComponentType indicates the sub-role of this component (for example, "prefill").
SubComponentType string `json:"subComponentType,omitempty"`
// Dynamo namespace of the service (allows to override the Dynamo namespace of the service defined in annotations inside the Dynamo archive)
// DynamoNamespace is deprecated and will be removed in a future version.
// The DGD Kubernetes namespace and DynamoGraphDeployment name are used to construct the Dynamo namespace for each component
// +kubebuilder:validation:Optional
DynamoNamespace *string `json:"dynamoNamespace,omitempty"`
// GlobalDynamoNamespace indicates that the Component will be placed in the global Dynamo namespace
GlobalDynamoNamespace bool `json:"globalDynamoNamespace,omitempty"`
// Resources requested and limits for this component, including CPU, memory,
// GPUs/devices, and any runtime-specific resources.
Resources *dynamoCommon.Resources `json:"resources,omitempty"`
......
......@@ -32,9 +32,11 @@ type DynamoGraphDeploymentSpec struct {
// PVCs defines a list of persistent volume claims that can be referenced by components.
// Each PVC must have a unique name that can be referenced in component specifications.
// +kubebuilder:validation:Optional
// +kubebuilder:validation:MaxItems=100
PVCs []PVC `json:"pvcs,omitempty"`
// Services are the services to deploy as part of this deployment.
// +kubebuilder:validation:Optional
// +kubebuilder:validation:MaxProperties=25
Services map[string]*DynamoComponentDeploymentSharedSpec `json:"services,omitempty"`
// Envs are environment variables applied to all services in the deployment unless
// overridden by service-specific configuration.
......
......@@ -678,7 +678,9 @@ spec:
description: ComponentType indicates the role of this component (for example, "main").
type: string
dynamoNamespace:
description: Dynamo namespace of the service (allows to override the Dynamo namespace of the service defined in annotations inside the Dynamo archive)
description: |-
DynamoNamespace is deprecated and will be removed in a future version.
The DGD Kubernetes namespace and DynamoGraphDeployment name are used to construct the Dynamo namespace for each component
type: string
envFromSecret:
description: |-
......@@ -9808,6 +9810,9 @@ spec:
x-kubernetes-list-type: map
required: []
type: object
globalDynamoNamespace:
description: GlobalDynamoNamespace indicates that the Component will be placed in the global Dynamo namespace
type: boolean
ingress:
description: Ingress config to expose the component outside the cluster (or through a service mesh).
properties:
......
......@@ -206,6 +206,7 @@ spec:
x-kubernetes-validations:
- message: When create is true, size, storageClass, and volumeAccessMode are required
rule: '!has(self.create) || self.create == false || (has(self.size) && has(self.storageClass) && has(self.volumeAccessMode))'
maxItems: 100
type: array
services:
additionalProperties:
......@@ -812,7 +813,9 @@ spec:
description: ComponentType indicates the role of this component (for example, "main").
type: string
dynamoNamespace:
description: Dynamo namespace of the service (allows to override the Dynamo namespace of the service defined in annotations inside the Dynamo archive)
description: |-
DynamoNamespace is deprecated and will be removed in a future version.
The DGD Kubernetes namespace and DynamoGraphDeployment name are used to construct the Dynamo namespace for each component
type: string
envFromSecret:
description: |-
......@@ -9942,6 +9945,9 @@ spec:
x-kubernetes-list-type: map
required: []
type: object
globalDynamoNamespace:
description: GlobalDynamoNamespace indicates that the Component will be placed in the global Dynamo namespace
type: boolean
ingress:
description: Ingress config to expose the component outside the cluster (or through a service mesh).
properties:
......@@ -10404,6 +10410,7 @@ spec:
type: array
type: object
description: Services are the services to deploy as part of this deployment.
maxProperties: 25
type: object
type: object
status:
......
......@@ -48,6 +48,8 @@ const (
DynamoDeploymentConfigEnvVar = "DYN_DEPLOYMENT_CONFIG"
GlobalDynamoNamespace = "dynamo"
ComponentTypePlanner = "planner"
ComponentTypeFrontend = "frontend"
ComponentTypeWorker = "worker"
......
......@@ -106,10 +106,6 @@ func (s ServiceConfig) GetNamespace() *string {
return &s.Config.Dynamo.Namespace
}
func GetDefaultDynamoNamespace(dynamoDeployment *v1alpha1.DynamoGraphDeployment) string {
return fmt.Sprintf("dynamo-%s", dynamoDeployment.Name)
}
func ParseDynDeploymentConfig(ctx context.Context, jsonContent []byte) (DynDeploymentConfig, error) {
var config DynDeploymentConfig
err := json.Unmarshal(jsonContent, &config)
......@@ -119,25 +115,22 @@ func ParseDynDeploymentConfig(ctx context.Context, jsonContent []byte) (DynDeplo
// GenerateDynamoComponentsDeployments generates a map of DynamoComponentDeployments from a DynamoGraphConfig
func GenerateDynamoComponentsDeployments(ctx context.Context, parentDynamoGraphDeployment *v1alpha1.DynamoGraphDeployment, defaultIngressSpec *v1alpha1.IngressSpec) (map[string]*v1alpha1.DynamoComponentDeployment, error) {
deployments := make(map[string]*v1alpha1.DynamoComponentDeployment)
graphDynamoNamespace, err := getDynamoNamespace(parentDynamoGraphDeployment)
if err != nil {
return nil, fmt.Errorf("failed to get the graph dynamo namespace: %w", err)
}
for componentName, component := range parentDynamoGraphDeployment.Spec.Services {
dynamoNamespace := getDynamoNamespace(parentDynamoGraphDeployment, component)
deployment := &v1alpha1.DynamoComponentDeployment{}
deployment.Spec.DynamoComponentDeploymentSharedSpec = *component
deployment.Name = GetDynamoComponentName(parentDynamoGraphDeployment, componentName)
deployment.Spec.BackendFramework = parentDynamoGraphDeployment.Spec.BackendFramework
deployment.Namespace = parentDynamoGraphDeployment.Namespace
deployment.Spec.ServiceName = componentName
deployment.Spec.DynamoNamespace = &graphDynamoNamespace
deployment.Spec.DynamoNamespace = &dynamoNamespace
labels := make(map[string]string)
// add the labels in the spec in order to label all sub-resources
deployment.Spec.Labels = labels
// and add the labels to the deployment itself
deployment.Labels = labels
labels[commonconsts.KubeLabelDynamoComponent] = componentName
labels[commonconsts.KubeLabelDynamoNamespace] = graphDynamoNamespace
labels[commonconsts.KubeLabelDynamoNamespace] = dynamoNamespace
labels[commonconsts.KubeLabelDynamoGraphDeploymentName] = parentDynamoGraphDeployment.Name
// Propagate metrics annotation from parent deployment if present
......@@ -187,22 +180,11 @@ func GenerateDynamoComponentsDeployments(ctx context.Context, parentDynamoGraphD
return deployments, nil
}
func getDynamoNamespace(parentDynamoGraphDeployment *v1alpha1.DynamoGraphDeployment) (string, error) {
graphDynamoNamespace := ""
for componentName, component := range parentDynamoGraphDeployment.Spec.Services {
dynamoNamespace := ""
if component.DynamoNamespace != nil && *component.DynamoNamespace != "" {
dynamoNamespace = *component.DynamoNamespace
func getDynamoNamespace(object metav1.Object, service *v1alpha1.DynamoComponentDeploymentSharedSpec) string {
if service.GlobalDynamoNamespace {
return commonconsts.GlobalDynamoNamespace
}
if graphDynamoNamespace != "" && graphDynamoNamespace != dynamoNamespace {
return "", fmt.Errorf("namespace mismatch for component %s: graph uses namespace %s but component specifies %s", componentName, graphDynamoNamespace, dynamoNamespace)
}
graphDynamoNamespace = dynamoNamespace
}
if graphDynamoNamespace == "" {
graphDynamoNamespace = GetDefaultDynamoNamespace(parentDynamoGraphDeployment)
}
return graphDynamoNamespace, nil
return fmt.Sprintf("%s-%s", object.GetNamespace(), object.GetName())
}
// updateDynDeploymentConfig updates the runtime config object for the given dynamoDeploymentComponent
......@@ -933,12 +915,9 @@ func GenerateGrovePodCliqueSet(
}
}
dynamoNamespace, err := getDynamoNamespace(dynamoDeployment)
if err != nil {
return nil, fmt.Errorf("failed to get the graph dynamo namespace: %w", err)
}
var scalingGroups []grovev1alpha1.PodCliqueScalingGroupConfig
for serviceName, component := range dynamoDeployment.Spec.Services {
dynamoNamespace := getDynamoNamespace(dynamoDeployment, component)
component.DynamoNamespace = &dynamoNamespace
// Determine backend framework using hybrid approach
backendFramework, err := getBackendFrameworkFromComponent(component, dynamoDeployment)
......
......@@ -77,7 +77,7 @@ _Appears in:_
| Field | Description | Default | Validation |
| --- | --- | --- | --- |
| `name` _string_ | Name of the ConfigMap containing the desired data. | | Required: {} <br /> |
| `name` _string_ | Name of the ConfigMap containing the desired data. | | Required: \{\} <br /> |
| `key` _string_ | Key in the ConfigMap to select. If not specified, defaults to "disagg.yaml". | disagg.yaml | |
......@@ -95,11 +95,11 @@ _Appears in:_
| Field | Description | Default | Validation |
| --- | --- | --- | --- |
| `name` _string_ | Name is the desired name for the created DynamoGraphDeployment.<br />If not specified, defaults to the DGDR name. | | Optional: {} <br /> |
| `namespace` _string_ | Namespace is the desired namespace for the created DynamoGraphDeployment.<br />If not specified, defaults to the DGDR namespace. | | Optional: {} <br /> |
| `labels` _object (keys:string, values:string)_ | Labels are additional labels to add to the DynamoGraphDeployment metadata.<br />These are merged with auto-generated labels from the profiling process. | | Optional: {} <br /> |
| `annotations` _object (keys:string, values:string)_ | Annotations are additional annotations to add to the DynamoGraphDeployment metadata. | | Optional: {} <br /> |
| `workersImage` _string_ | WorkersImage specifies the container image to use for DynamoGraphDeployment worker components.<br />This image is used for both temporary DGDs created during online profiling and the final DGD.<br />If omitted, the image from the base config file (e.g., disagg.yaml) is used.<br />Example: "nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.6.1" | | Optional: {} <br /> |
| `name` _string_ | Name is the desired name for the created DynamoGraphDeployment.<br />If not specified, defaults to the DGDR name. | | Optional: \{\} <br /> |
| `namespace` _string_ | Namespace is the desired namespace for the created DynamoGraphDeployment.<br />If not specified, defaults to the DGDR namespace. | | Optional: \{\} <br /> |
| `labels` _object (keys:string, values:string)_ | Labels are additional labels to add to the DynamoGraphDeployment metadata.<br />These are merged with auto-generated labels from the profiling process. | | Optional: \{\} <br /> |
| `annotations` _object (keys:string, values:string)_ | Annotations are additional annotations to add to the DynamoGraphDeployment metadata. | | Optional: \{\} <br /> |
| `workersImage` _string_ | WorkersImage specifies the container image to use for DynamoGraphDeployment worker components.<br />This image is used for both temporary DGDs created during online profiling and the final DGD.<br />If omitted, the image from the base config file (e.g., disagg.yaml) is used.<br />Example: "nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.6.1" | | Optional: \{\} <br /> |
#### DeploymentStatus
......@@ -159,7 +159,8 @@ _Appears in:_
| `serviceName` _string_ | The name of the component | | |
| `componentType` _string_ | ComponentType indicates the role of this component (for example, "main"). | | |
| `subComponentType` _string_ | SubComponentType indicates the sub-role of this component (for example, "prefill"). | | |
| `dynamoNamespace` _string_ | Dynamo namespace of the service (allows to override the Dynamo namespace of the service defined in annotations inside the Dynamo archive) | | |
| `dynamoNamespace` _string_ | DynamoNamespace is deprecated and will be removed in a future version.<br />The DGD Kubernetes namespace and DynamoGraphDeployment name are used to construct the Dynamo namespace for each component | | Optional: \{\} <br /> |
| `globalDynamoNamespace` _boolean_ | GlobalDynamoNamespace indicates that the Component will be placed in the global Dynamo namespace | | |
| `resources` _[Resources](#resources)_ | Resources requested and limits for this component, including CPU, memory,<br />GPUs/devices, and any runtime-specific resources. | | |
| `autoscaling` _[Autoscaling](#autoscaling)_ | Autoscaling config for this component (replica range, target utilization, etc.). | | |
| `envs` _[EnvVar](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#envvar-v1-core) array_ | Envs defines additional environment variables to inject into the component containers. | | |
......@@ -194,7 +195,8 @@ _Appears in:_
| `serviceName` _string_ | The name of the component | | |
| `componentType` _string_ | ComponentType indicates the role of this component (for example, "main"). | | |
| `subComponentType` _string_ | SubComponentType indicates the sub-role of this component (for example, "prefill"). | | |
| `dynamoNamespace` _string_ | Dynamo namespace of the service (allows to override the Dynamo namespace of the service defined in annotations inside the Dynamo archive) | | |
| `dynamoNamespace` _string_ | DynamoNamespace is deprecated and will be removed in a future version.<br />The DGD Kubernetes namespace and DynamoGraphDeployment name are used to construct the Dynamo namespace for each component | | Optional: \{\} <br /> |
| `globalDynamoNamespace` _boolean_ | GlobalDynamoNamespace indicates that the Component will be placed in the global Dynamo namespace | | |
| `resources` _[Resources](#resources)_ | Resources requested and limits for this component, including CPU, memory,<br />GPUs/devices, and any runtime-specific resources. | | |
| `autoscaling` _[Autoscaling](#autoscaling)_ | Autoscaling config for this component (replica range, target utilization, etc.). | | |
| `envs` _[EnvVar](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#envvar-v1-core) array_ | Envs defines additional environment variables to inject into the component containers. | | |
......@@ -237,7 +239,6 @@ DynamoGraphDeploymentRequest is the Schema for the dynamographdeploymentrequests
It serves as the primary interface for users to request model deployments with
specific performance and resource constraints, enabling SLA-driven deployments.
Lifecycle:
1. Initial → Pending: Validates spec and prepares for profiling
2. Pending → Profiling: Creates and runs profiling job (online or AIC)
......@@ -246,7 +247,6 @@ Lifecycle:
5. Ready: Terminal state when DGD is operational or spec is available
6. DeploymentDeleted: Terminal state when auto-created DGD is manually deleted
The spec becomes immutable once profiling starts. Users must delete and recreate
the DGDR to modify configuration after this point.
......@@ -278,12 +278,12 @@ _Appears in:_
| Field | Description | Default | Validation |
| --- | --- | --- | --- |
| `model` _string_ | Model specifies the model to deploy (e.g., "Qwen/Qwen3-0.6B", "meta-llama/Llama-3-70b").<br />This is a high-level identifier for easy reference in kubectl output and logs.<br />The controller automatically sets this value in profilingConfig.config.deployment.model. | | Required: {} <br /> |
| `backend` _string_ | Backend specifies the inference backend to use.<br />The controller automatically sets this value in profilingConfig.config.engine.backend. | | Enum: [vllm sglang trtllm] <br />Required: {} <br /> |
| `enableGpuDiscovery` _boolean_ | EnableGpuDiscovery controls whether the profiler should automatically discover GPU<br />resources from the Kubernetes cluster nodes. When enabled, the profiler will override<br />any manually specified hardware configuration (min_num_gpus_per_engine, max_num_gpus_per_engine,<br />num_gpus_per_node) with values detected from the cluster.<br />Requires cluster-wide node access permissions - only available with cluster-scoped operators. | false | Optional: {} <br /> |
| `profilingConfig` _[ProfilingConfigSpec](#profilingconfigspec)_ | ProfilingConfig provides the complete configuration for the profiling job.<br />This configuration is passed directly to the profiler.<br />The structure matches the profile_sla config format exactly (see ProfilingConfigSpec for schema).<br />Note: deployment.model and engine.backend are automatically set from the high-level<br />modelName and backend fields and should not be specified in this config. | | Required: {} <br /> |
| `model` _string_ | Model specifies the model to deploy (e.g., "Qwen/Qwen3-0.6B", "meta-llama/Llama-3-70b").<br />This is a high-level identifier for easy reference in kubectl output and logs.<br />The controller automatically sets this value in profilingConfig.config.deployment.model. | | Required: \{\} <br /> |
| `backend` _string_ | Backend specifies the inference backend to use.<br />The controller automatically sets this value in profilingConfig.config.engine.backend. | | Enum: [vllm sglang trtllm] <br />Required: \{\} <br /> |
| `enableGpuDiscovery` _boolean_ | EnableGpuDiscovery controls whether the profiler should automatically discover GPU<br />resources from the Kubernetes cluster nodes. When enabled, the profiler will override<br />any manually specified hardware configuration (min_num_gpus_per_engine, max_num_gpus_per_engine,<br />num_gpus_per_node) with values detected from the cluster.<br />Requires cluster-wide node access permissions - only available with cluster-scoped operators. | false | Optional: \{\} <br /> |
| `profilingConfig` _[ProfilingConfigSpec](#profilingconfigspec)_ | ProfilingConfig provides the complete configuration for the profiling job.<br />This configuration is passed directly to the profiler.<br />The structure matches the profile_sla config format exactly (see ProfilingConfigSpec for schema).<br />Note: deployment.model and engine.backend are automatically set from the high-level<br />modelName and backend fields and should not be specified in this config. | | Required: \{\} <br /> |
| `autoApply` _boolean_ | AutoApply indicates whether to automatically create a DynamoGraphDeployment<br />after profiling completes. If false, only the spec is generated and stored in status.<br />Users can then manually create a DGD using the generated spec. | false | |
| `deploymentOverrides` _[DeploymentOverridesSpec](#deploymentoverridesspec)_ | DeploymentOverrides allows customizing metadata for the auto-created DGD.<br />Only applicable when AutoApply is true. | | Optional: {} <br /> |
| `deploymentOverrides` _[DeploymentOverridesSpec](#deploymentoverridesspec)_ | DeploymentOverrides allows customizing metadata for the auto-created DGD.<br />Only applicable when AutoApply is true. | | Optional: \{\} <br /> |
#### DynamoGraphDeploymentRequestStatus
......@@ -301,12 +301,12 @@ _Appears in:_
| Field | Description | Default | Validation |
| --- | --- | --- | --- |
| `state` _string_ | State is a high-level textual status of the deployment request lifecycle.<br />Possible values: "", "Pending", "Profiling", "Deploying", "Ready", "DeploymentDeleted", "Failed"<br />Empty string ("") represents the initial state before initialization. | | |
| `backend` _string_ | Backend is extracted from profilingConfig.config.engine.backend for display purposes.<br />This field is populated by the controller and shown in kubectl output. | | Optional: {} <br /> |
| `backend` _string_ | Backend is extracted from profilingConfig.config.engine.backend for display purposes.<br />This field is populated by the controller and shown in kubectl output. | | Optional: \{\} <br /> |
| `observedGeneration` _integer_ | ObservedGeneration reflects the generation of the most recently observed spec.<br />Used to detect spec changes and enforce immutability after profiling starts. | | |
| `conditions` _[Condition](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#condition-v1-meta) array_ | Conditions contains the latest observed conditions of the deployment request.<br />Standard condition types include: Validation, Profiling, SpecGenerated, DeploymentReady.<br />Conditions are merged by type on patch updates. | | |
| `profilingResults` _string_ | ProfilingResults contains a reference to the ConfigMap holding profiling data.<br />Format: "configmap/<name>" | | Optional: {} <br /> |
| `generatedDeployment` _[RawExtension](#rawextension)_ | GeneratedDeployment contains the full generated DynamoGraphDeployment specification<br />including metadata, based on profiling results. Users can extract this to create<br />a DGD manually, or it's used automatically when autoApply is true.<br />Stored as RawExtension to preserve all fields including metadata. | | EmbeddedResource: {} <br />Optional: {} <br /> |
| `deployment` _[DeploymentStatus](#deploymentstatus)_ | Deployment tracks the auto-created DGD when AutoApply is true.<br />Contains name, namespace, state, and creation status of the managed DGD. | | Optional: {} <br /> |
| `profilingResults` _string_ | ProfilingResults contains a reference to the ConfigMap holding profiling data.<br />Format: "configmap/<name>" | | Optional: \{\} <br /> |
| `generatedDeployment` _[RawExtension](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#rawextension-runtime-pkg)_ | GeneratedDeployment contains the full generated DynamoGraphDeployment specification<br />including metadata, based on profiling results. Users can extract this to create<br />a DGD manually, or it's used automatically when autoApply is true.<br />Stored as RawExtension to preserve all fields including metadata. | | EmbeddedResource: \{\} <br />Optional: \{\} <br /> |
| `deployment` _[DeploymentStatus](#deploymentstatus)_ | Deployment tracks the auto-created DGD when AutoApply is true.<br />Contains name, namespace, state, and creation status of the managed DGD. | | Optional: \{\} <br /> |
#### DynamoGraphDeploymentSpec
......@@ -322,9 +322,9 @@ _Appears in:_
| Field | Description | Default | Validation |
| --- | --- | --- | --- |
| `pvcs` _[PVC](#pvc) array_ | PVCs defines a list of persistent volume claims that can be referenced by components.<br />Each PVC must have a unique name that can be referenced in component specifications. | | Optional: {} <br /> |
| `services` _object (keys:string, values:[DynamoComponentDeploymentSharedSpec](#dynamocomponentdeploymentsharedspec))_ | Services are the services to deploy as part of this deployment. | | Optional: {} <br /> |
| `envs` _[EnvVar](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#envvar-v1-core) array_ | Envs are environment variables applied to all services in the deployment unless<br />overridden by service-specific configuration. | | Optional: {} <br /> |
| `pvcs` _[PVC](#pvc) array_ | PVCs defines a list of persistent volume claims that can be referenced by components.<br />Each PVC must have a unique name that can be referenced in component specifications. | | MaxItems: 100 <br />Optional: \{\} <br /> |
| `services` _object (keys:string, values:[DynamoComponentDeploymentSharedSpec](#dynamocomponentdeploymentsharedspec))_ | Services are the services to deploy as part of this deployment. | | MaxProperties: 25 <br />Optional: \{\} <br /> |
| `envs` _[EnvVar](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#envvar-v1-core) array_ | Envs are environment variables applied to all services in the deployment unless<br />overridden by service-specific configuration. | | Optional: \{\} <br /> |
| `backendFramework` _string_ | BackendFramework specifies the backend framework (e.g., "sglang", "vllm", "trtllm"). | | Enum: [sglang vllm trtllm] <br /> |
......@@ -418,9 +418,9 @@ _Appears in:_
| Field | Description | Default | Validation |
| --- | --- | --- | --- |
| `create` _boolean_ | Create indicates to create a new PVC | | |
| `name` _string_ | Name is the name of the PVC | | Required: {} <br /> |
| `name` _string_ | Name is the name of the PVC | | Required: \{\} <br /> |
| `storageClass` _string_ | StorageClass to be used for PVC creation. Required when create is true. | | |
| `size` _[Quantity](#quantity)_ | Size of the volume in Gi, used during PVC creation. Required when create is true. | | |
| `size` _[Quantity](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#quantity-resource-api)_ | Size of the volume in Gi, used during PVC creation. Required when create is true. | | |
| `volumeAccessMode` _[PersistentVolumeAccessMode](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#persistentvolumeaccessmode-v1-core)_ | VolumeAccessMode is the volume access mode of the PVC. Required when create is true. | | |
......@@ -439,9 +439,9 @@ _Appears in:_
| Field | Description | Default | Validation |
| --- | --- | --- | --- |
| `config` _[JSON](#json)_ | Config is the profiling configuration as arbitrary JSON/YAML. This will be passed directly to the profiler.<br />The profiler will validate the configuration and report any errors. | | Optional: {} <br />Type: object <br /> |
| `configMapRef` _[ConfigMapKeySelector](#configmapkeyselector)_ | ConfigMapRef is an optional reference to a ConfigMap containing the DynamoGraphDeployment<br />base config file (disagg.yaml). This is separate from the profiling config above.<br />The path to this config will be set as engine.config in the profiling config. | | Optional: {} <br /> |
| `profilerImage` _string_ | ProfilerImage specifies the container image to use for profiling jobs.<br />This image contains the profiler code and dependencies needed for SLA-based profiling.<br />Example: "nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.6.1" | | Required: {} <br /> |
| `config` _[JSON](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#json-v1-apiextensions-k8s-io)_ | Config is the profiling configuration as arbitrary JSON/YAML. This will be passed directly to the profiler.<br />The profiler will validate the configuration and report any errors. | | Optional: \{\} <br />Type: object <br /> |
| `configMapRef` _[ConfigMapKeySelector](#configmapkeyselector)_ | ConfigMapRef is an optional reference to a ConfigMap containing the DynamoGraphDeployment<br />base config file (disagg.yaml). This is separate from the profiling config above.<br />The path to this config will be set as engine.config in the profiling config. | | Optional: \{\} <br /> |
| `profilerImage` _string_ | ProfilerImage specifies the container image to use for profiling jobs.<br />This image contains the profiler code and dependencies needed for SLA-based profiling.<br />Example: "nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.6.1" | | Required: \{\} <br /> |
#### SharedMemorySpec
......@@ -459,7 +459,7 @@ _Appears in:_
| Field | Description | Default | Validation |
| --- | --- | --- | --- |
| `disabled` _boolean_ | | | |
| `size` _[Quantity](#quantity)_ | | | |
| `size` _[Quantity](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#quantity-resource-api)_ | | | |
#### VolumeMount
......@@ -476,7 +476,7 @@ _Appears in:_
| Field | Description | Default | Validation |
| --- | --- | --- | --- |
| `name` _string_ | Name references a PVC name defined in the top-level PVCs map | | Required: {} <br /> |
| `name` _string_ | Name references a PVC name defined in the top-level PVCs map | | Required: \{\} <br /> |
| `mountPoint` _string_ | MountPoint specifies where to mount the volume.<br />If useAsCompilationCache is true and mountPoint is not specified,<br />a backend-specific default will be used. | | |
| `useAsCompilationCache` _boolean_ | UseAsCompilationCache indicates this volume should be used as a compilation cache.<br />When true, backend-specific environment variables will be set and default mount points may be used. | false | |
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment