).// Watch DGDs created by this controller (via label)
WithEventFilter(commonController.EphemeralDeploymentEventFilter(r.Config)).// set the event filter to ignore resources handled by other controllers in namespace-restricted mode
).
// Watch DGDs created by this controller (via label)
// Set the event filter to ignore resources handled by other controllers in namespace-restricted mode
@@ -36,153 +36,22 @@ type ExcludedNamespacesInterface interface {
Contains(namespacestring)bool
}
typeGroveConfigstruct{
// Enabled is automatically determined by checking if Grove CRDs are installed in the cluster
Enabledbool
// TerminationDelay configures the termination delay for Grove PodCliqueSets
TerminationDelaytime.Duration
}
typeLWSConfigstruct{
// Enabled is automatically determined by checking if LWS CRDs are installed in the cluster
Enabledbool
}
typeKaiSchedulerConfigstruct{
// Enabled is automatically determined by checking if Kai-scheduler CRDs are installed in the cluster
Enabledbool
}
typeMpiRunConfigstruct{
// SecretName is the name of the secret containing the SSH key for MPI Run
SecretNamestring
}
typeConfigstruct{
// Enable resources filtering, only the resources belonging to the given namespace will be handled.
RestrictedNamespacestring
GroveGroveConfig
LWSLWSConfig
KaiSchedulerKaiSchedulerConfig
EtcdAddressstring
NatsAddressstring
IngressConfigIngressConfig
// ModelExpressURL is the URL of the Model Express server to inject into all pods
ModelExpressURLstring
// PrometheusEndpoint is the URL of the Prometheus endpoint to use for metrics
PrometheusEndpointstring
MpiRunMpiRunConfig
// RBAC configuration for cross-namespace resource management
RBACRBACConfig
// ExcludedNamespaces is a thread-safe set of namespaces to exclude (cluster-wide mode only)
ExcludedNamespacesExcludedNamespacesInterface
// DiscoveryBackend is the discovery backend to use. Default is "kubernetes" for Kubernetes API service discovery. Set to "etcd" to use ETCD for discovery.
DiscoveryBackendstring
// GPUDiscoveryEnabled indicates whether Helm provisioned node read access for the namespace-scoped operator.
// Only relevant for namespace-scoped operators (RestrictedNamespace != "").
GPUDiscoveryEnabledbool
// Checkpoint configuration for checkpoint/restore functionality
CheckpointCheckpointConfig
}
// RBACConfig holds configuration for RBAC management
typeRBACConfigstruct{
// PlannerClusterRoleName is the name of the ClusterRole for planner (cluster-wide mode only)
PlannerClusterRoleNamestring
// DGDRProfilingClusterRoleName is the name of the ClusterRole for DGDR profiling jobs (cluster-wide mode only)
DGDRProfilingClusterRoleNamestring
// EPPClusterRoleName is the name of the ClusterRole for EPP (cluster-wide mode only)
EPPClusterRoleNamestring
}
// CheckpointConfig holds configuration for checkpoint/restore functionality
typeCheckpointConfigstruct{
// Enabled indicates if checkpoint functionality is enabled
Enabledbool
// Storage holds storage backend configuration
StorageCheckpointStorageConfig
// ReadyForCheckpointFilePath is the file path used to signal model readiness for checkpoint jobs
ReadyForCheckpointFilePathstring
}
// Checkpoint storage type constants
const(
CheckpointStorageTypePVC="pvc"
CheckpointStorageTypeS3="s3"
CheckpointStorageTypeOCI="oci"
)
// CheckpointStorageConfig holds storage backend configuration for checkpoints
typeCheckpointStorageConfigstruct{
// Type is the storage backend type: pvc, s3, or oci
| `leaseDuration` _[Duration](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#duration-v1-meta)_ | LeaseDuration is the duration of namespace scope marker lease before expiration | 30s | |
| `leaseRenewInterval` _[Duration](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#duration-v1-meta)_ | LeaseRenewInterval is the interval for renewing namespace scope marker lease | 10s | |
#### OperatorConfiguration
OperatorConfiguration is the Schema for the operator configuration.
WebhookServer extends Server with host and certificate directory.
_Appears in:_
- [ServerConfiguration](#serverconfiguration)
| Field | Description | Default | Validation |
| --- | --- | --- | --- |
| `bindAddress` _string_ | BindAddress is the address the server binds to | | |
| `port` _integer_ | Port is the port the server listens on | | |
| `host` _string_ | Host is the address the webhook server binds to | | |
| `certDir` _string_ | CertDir is the directory containing TLS certificates | | |
# Operator Default Values Injection
The Dynamo operator automatically applies default values to various fields when they are not explicitly specified in your deployments. These defaults include: