# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # Used to generate top-level secrets (overridden by custom-values.yaml) # Subcharts configuration # Dynamo operator configuration dynamo-operator: # -- Whether to enable the Dynamo Kubernetes operator deployment enabled: true # -- NATS server address for operator communication (leave empty to use the bundled NATS chart). Format: "nats://hostname:port" natsAddr: "" # -- etcd server address for operator state storage (leave empty to use the bundled etcd chart). Format: "http://hostname:port" or "https://hostname:port" etcdAddr: "" # -- URL for the Model Express server if not deployed by this helm chart. This is ignored if Model Express server is installed by this helm chart (global.model-express.enabled is true). modelExpressURL: "" # -- Namespace access controls for the operator namespaceRestriction: # -- Whether to restrict operator to specific namespaces enabled: true # -- Target namespace for operator deployment (leave empty for current namespace) targetNamespace: # Controller manager configuration controllerManager: # -- Node tolerations for controller manager pods tolerations: [] manager: # Container image configuration for the operator manager image: # -- Official NVIDIA Dynamo operator image repository repository: "nvcr.io/nvidia/ai-dynamo/kubernetes-operator" # -- Image tag (leave empty to use chart default) tag: "" # -- Image pull policy - when to pull the image pullPolicy: IfNotPresent # Command line arguments for the operator manager args: # -- Health probe endpoint for Kubernetes health checks - --health-probe-bind-address=:8081 # -- Metrics endpoint for Prometheus scraping (localhost only for security) - --metrics-bind-address=127.0.0.1:8080 # -- Secrets for pulling private container images imagePullSecrets: [] # Core Dynamo platform configuration dynamo: # -- How long to wait before forcefully terminating Grove instances groveTerminationDelay: 15m # Internal utility images used by the platform internalImages: # -- Debugger image for troubleshooting deployments debugger: python:3.12-slim # -- Whether to enable restricted security contexts for enhanced security enableRestrictedSecurityContext: false # Docker registry configuration for private repositories dockerRegistry: # -- Whether to use Kubernetes secrets for registry authentication useKubernetesSecret: false # -- Docker registry server URL server: # -- Registry username username: # -- Registry password (consider using existingSecretName instead) password: # -- Name of existing Kubernetes secret containing registry credentials existingSecretName: # -- Whether the registry uses HTTPS secure: true # Ingress configuration for external access ingress: # -- Whether to create ingress resources enabled: false # -- Ingress class name (e.g., "nginx", "traefik") className: # -- Secret name containing TLS certificates tlsSecretName: my-tls-secret # Istio service mesh configuration istio: # -- Whether to enable Istio integration enabled: false # -- Istio gateway name for routing gateway: # -- Host suffix for generated ingress hostnames ingressHostSuffix: "" # -- Whether VirtualServices should support HTTPS routing virtualServiceSupportsHTTPS: false # Metrics configuration metrics: # -- Endpoint that services can use to retrieve metrics. If set, dynamo operator will automatically inject the PROMETHEUS_ENDPOINT environment variable into services it manages. Users can override the value of the PROMETHEUS_ENDPOINT environment variable by modifying the corresponding deployment's environment variables prometheusEndpoint: "" # Grove component - distributed inference orchestration grove: # -- Whether to enable Grove for multi-node inference coordination, if enabled, the Grove operator will be deployed cluster-wide enabled: false # Kai Scheduler component - advanced workload scheduling kai-scheduler: # -- Whether to enable Kai Scheduler for intelligent resource allocation, if enabled, the Kai Scheduler operator will be deployed cluster-wide enabled: false # etcd configuration - distributed key-value store for operator state # For complete configuration options, see: https://github.com/bitnami/charts/tree/main/bitnami/etcd etcd: # -- Whether to enable etcd deployment, disable if you want to use an external etcd instance enabled: true image: # -- following bitnami announcement for brownout - https://github.com/bitnami/charts/tree/main/bitnami/etcd#%EF%B8%8F-important-notice-upcoming-changes-to-the-bitnami-catalog, we need to use the legacy repository until we migrate to the new "secure" repository repository: bitnamilegacy/etcd tag: 3.5.18-debian-12-r5 # Persistent storage configuration for etcd data persistence: # Whether to enable persistent storage (recommended for production) enabled: true # Use the cluster default storage-class or override with a named class storageClass: null # Size of persistent volume for etcd data size: 1Gi # Pre-upgrade job configuration preUpgrade: # Whether to run pre-upgrade validation jobs enabled: false # Number of etcd replicas (1 for single-node, 3+ for HA) replicaCount: 1 # Authentication and authorization settings # Explicitly remove authentication for simplified internal communication auth: rbac: # Whether to create RBAC authentication (disabled for internal use) create: false # Health check configuration readinessProbe: # Whether to enable readiness probes (disabled to reduce startup complexity) enabled: false livenessProbe: # Whether to enable liveness probes (disabled to reduce startup complexity) enabled: false # Node tolerations for etcd pods (allows scheduling on specific nodes) tolerations: [] # NATS configuration - messaging system for operator communication # For complete configuration options, see: https://github.com/nats-io/k8s/tree/main/helm/charts/nats nats: # -- Whether to enable NATS deployment, disable if you want to use an external NATS instance enabled: true # TLS Certificate Authority configuration for secure communication # Reference a common CA Certificate or Bundle in all nats config `tls` blocks and nats-box contexts # Note: `tls.verify` still must be set in the appropriate nats config `tls` blocks to require mTLS tlsCA: # Whether to enable TLS CA configuration enabled: false # Core NATS server configuration config: # NATS clustering for high availability (multiple NATS servers) cluster: # Whether to enable NATS clustering (disabled for single-node setups) enabled: false # JetStream - persistent messaging and streaming capabilities jetstream: # Whether to enable JetStream (recommended for persistent messaging) enabled: true # File-based storage for JetStream streams and consumers fileStore: # Whether to enable file storage (persistent across restarts) enabled: true # Directory path for JetStream file storage dir: /data ############################################################ # Persistent Volume Claim for JetStream file storage ############################################################ pvc: # Whether to create a PVC for JetStream storage enabled: true # Size of the persistent volume for JetStream data size: 10Gi # Storage class name (leave empty for default) storageClassName: # Advanced PVC configuration (merge additional fields) # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#persistentvolumeclaim-v1-core merge: {} patch: [] # PVC name (defaults to "{{ include "nats.fullname" $ }}-js") name: # Maximum size for JetStream file storage (defaults to PVC size) maxSize: # Memory-based storage for JetStream (non-persistent) memoryStore: # Whether to enable memory storage (faster but not persistent) enabled: false # Advanced JetStream configuration # For options see: https://docs.nats.io/running-a-nats-service/configuration#jetstream merge: {} patch: [] # Core NATS server settings nats: # Port for NATS client connections port: 4222 # TLS configuration for encrypted connections tls: # Whether to enable TLS encryption enabled: false # Advanced TLS configuration # For options see: https://docs.nats.io/running-a-nats-service/configuration/securing_nats/tls merge: {} patch: [] # Leaf nodes for creating NATS topologies and remote connections leafnodes: # Whether to enable leaf node connections enabled: false # WebSocket support for browser-based NATS clients websocket: # Whether to enable WebSocket protocol support enabled: false # MQTT protocol bridge for IoT device connectivity mqtt: # Whether to enable MQTT protocol support enabled: false # Gateway connections for multi-cluster NATS deployments gateway: # Whether to enable gateway connections enabled: false # HTTP monitoring endpoint for NATS server metrics monitor: # Whether to enable HTTP monitoring interface enabled: true # Port for monitoring HTTP endpoint port: 8222 # TLS configuration for monitoring endpoint tls: # Whether to enable HTTPS for monitoring (requires config.nats.tls enabled) # When enabled, monitoring port will use HTTPS with the options from config.nats.tls enabled: false # Go pprof profiling endpoint for performance debugging profiling: # Whether to enable profiling endpoint (for debugging only) enabled: false # Port for profiling endpoint port: 65432 # Account resolver for multi-tenant NATS deployments resolver: # Whether to enable account resolution (for advanced multi-tenancy) enabled: false # Server naming configuration # Adds a prefix to the server name, which defaults to the pod name # Helpful for ensuring server name is unique in a super cluster serverNamePrefix: "" # Advanced NATS configuration merging and patching # For complete options see: https://docs.nats.io/running-a-nats-service/configuration # Special rules apply: # 1. strings that start with << and end with >> will be unquoted # use this for variables and numbers with units # 2. keys ending in $include will be switched to include directives # keys are sorted alphabetically, use prefix before $includes to control includes ordering # paths should be relative to /etc/nats-config/nats.conf # Example: # merge: # $include: ./my-config.conf # zzz$include: ./my-config-last.conf # server_name: nats # authorization: # token: << $TOKEN >> # jetstream: # max_memory_store: << 1GB >> merge: {} patch: [] ############################################################ # NATS container configuration in StatefulSet ############################################################ container: # NATS server container image configuration image: # Official NATS server repository repository: nats # NATS server version (Alpine-based for smaller size) tag: 2.10.21-alpine # Image pull policy (leave empty for chart default) pullPolicy: # Custom registry URL (leave empty for Docker Hub) registry: # Container port configuration # Note: Ports must also be enabled in the config section above # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#containerport-v1-core ports: # Main NATS client connection port nats: {} # Leaf node connection port leafnodes: {} # WebSocket connection port websocket: {} # MQTT protocol port mqtt: {} # Cluster communication port cluster: {} # Gateway connection port gateway: {} # HTTP monitoring port monitor: {} # Go profiling port profiling: {} # Environment variables for the NATS container # Map with key as env var name, value can be string or map # Example: # env: # GOMEMLIMIT: 7GiB # TOKEN: # valueFrom: # secretKeyRef: # name: nats-auth # key: token env: {} # Advanced container configuration merging and patching # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#container-v1-core merge: {} patch: [] ############################################################ # Configuration reloader container for hot config updates ############################################################ reloader: # Whether to enable the config reloader sidecar container enabled: true # Config reloader container image image: # Official NATS config reloader repository repository: natsio/nats-server-config-reloader # Config reloader version tag: 0.16.0 # Image pull policy (leave empty for chart default) pullPolicy: # Custom registry URL (leave empty for Docker Hub) registry: # Environment variables for the reloader container env: {} # Volume mount prefixes from NATS container to share with reloader # All NATS container volume mounts with these prefixes will be mounted into the reloader natsVolumeMountPrefixes: - /etc/ # Advanced reloader container configuration # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#container-v1-core merge: {} patch: [] ############################################################ # Prometheus metrics exporter container (optional) ############################################################ # Note: config.monitor must be enabled for this to work promExporter: # Whether to enable Prometheus metrics exporter sidecar enabled: false ############################################################ # Kubernetes Service for NATS access ############################################################ service: # Whether to create a Kubernetes Service for NATS enabled: true # Service port configuration # Additional boolean field 'enabled' controls whether port is exposed in the service # Note: Ports must also be enabled in the config section above # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#serviceport-v1-core ports: # Main NATS client connection port nats: enabled: true # Leaf node connection port leafnodes: enabled: true # WebSocket connection port websocket: enabled: true # MQTT protocol port mqtt: enabled: true # Cluster communication port (typically internal only) cluster: enabled: false # Gateway connection port (typically internal only) gateway: enabled: false # HTTP monitoring port (typically internal only) monitor: enabled: false # Go profiling port (typically internal only) profiling: enabled: false # Advanced service configuration # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#service-v1-core merge: {} patch: [] # Service name (defaults to "{{ include "nats.fullname" $ }}") name: ############################################################ # Advanced NATS Kubernetes resource configuration ############################################################ # StatefulSet configuration for NATS server persistence statefulSet: # Advanced StatefulSet configuration merging and patching # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#statefulset-v1-apps merge: {} patch: [] # StatefulSet name (defaults to "{{ include "nats.fullname" $ }}") name: # Pod template configuration for NATS StatefulSet podTemplate: # Whether to add a hash of the ConfigMap as a pod annotation # This will cause the StatefulSet to roll when the ConfigMap is updated configChecksumAnnotation: true # Pod topology spread constraints for better distribution across nodes # Map of topologyKey: topologySpreadConstraint # labelSelector will be added automatically to match StatefulSet pods # Example: # topologySpreadConstraints: # kubernetes.io/hostname: # maxSkew: 1 topologySpreadConstraints: {} # Advanced pod template configuration # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#pod-v1-core merge: spec: # Node tolerations for NATS pods (allows scheduling on specific nodes) tolerations: [] patch: [] # Headless service for StatefulSet pod discovery headlessService: # Advanced headless service configuration # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#service-v1-core merge: {} patch: [] # Headless service name (defaults to "{{ include "nats.fullname" $ }}-headless") name: # ConfigMap for NATS server configuration configMap: # Advanced ConfigMap configuration # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#configmap-v1-core merge: {} patch: [] # ConfigMap name (defaults to "{{ include "nats.fullname" $ }}-config") name: # Pod Disruption Budget for controlled rolling updates podDisruptionBudget: # Whether to create a PodDisruptionBudget (recommended for production) enabled: true # Service Account for NATS server pods serviceAccount: # Whether to create and use a dedicated service account enabled: false ############################################################ # NATS Box - CLI tools and debugging container # NATS Box provides CLI tools for interacting with NATS server ############################################################ natsBox: # Whether to deploy NATS Box for CLI access and debugging enabled: true ############################################################ # NATS client contexts for authentication and connection ############################################################ contexts: # Default context configuration default: # Credentials-based authentication creds: # Inline credentials file contents (base64 encoded) contents: # Name of existing secret containing credentials file secretName: # Directory to mount credentials (defaults to /etc/nats-creds/) dir: # Key name in secret for credentials file key: nats.creds # NKey-based authentication (public/private key pairs) nkey: # Inline NKey file contents (base64 encoded) contents: # Name of existing secret containing NKey file secretName: # Directory to mount NKey (defaults to /etc/nats-nkeys/) dir: # Key name in secret for NKey file key: nats.nk # TLS client certificate authentication tls: # Name of existing secret containing TLS client certificates secretName: # Directory to mount certificates (defaults to /etc/nats-certs/) dir: # Certificate file name in secret cert: tls.crt # Private key file name in secret key: tls.key # Advanced context configuration # For options see: https://docs.nats.io/using-nats/nats-tools/nats_cli#nats-contexts merge: {} patch: [] # Name of context to select by default for NATS CLI operations defaultContextName: default ############################################################ # NATS Box container configuration ############################################################ container: # NATS Box container image image: # Official NATS Box repository with CLI tools repository: natsio/nats-box # NATS Box version tag: 0.14.5 # Image pull policy (leave empty for chart default) pullPolicy: # Custom registry URL (leave empty for Docker Hub) registry: # Environment variables for NATS Box container env: {} # Advanced container configuration # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#container-v1-core merge: {} patch: [] # Service Account for NATS Box deployment serviceAccount: # Whether to create and use a dedicated service account for NATS Box enabled: false # Pod template configuration for NATS Box deployment podTemplate: merge: spec: # Node tolerations for NATS Box pods tolerations: [] patch: []