values.yaml 30.1 KB
Newer Older
1
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Used to generate top-level secrets (overridden by custom-values.yaml)
16

17
18
19
20
21
22
23
global:
  etcd:
    # -- Whether this chart should install the bundled etcd subchart.
    # When true, deploys etcd and auto-configures the operator with its address.
    # When false, etcd is not deployed. Use dynamo-operator.etcdAddr to point at an external instance if you are bringing your own etcd.
    install: false

24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
  kai-scheduler:
    # -- Whether this chart should install the bundled kai-scheduler subchart.
    # When true, deploys kai-scheduler and its CRDs. Integration is automatically enabled.
    # NOTE: For production environments, it is recommended to install kai-scheduler separately.
    install: false
    # -- Whether to enable Kai Scheduler integration (queue creation, schedulerName injection).
    # Set to true when kai-scheduler is available in the cluster (installed externally).
    # Automatically enabled when install=true. The operator uses this to decide whether to
    # inject schedulerName and queue labels into pod templates.
    enabled: false

  grove:
    # -- Whether this chart should install the bundled Grove subchart.
    # When true, deploys the Grove operator cluster-wide. Integration is automatically enabled.
    # NOTE: For production environments, it is recommended to install Grove separately.
    install: false
    # -- Whether to enable Grove integration (multinode orchestration via PodCliqueSets).
    # Set to true when Grove is available in the cluster (installed externally).
    # Automatically true when install=true. The operator uses this to decide whether to
    # create PodCliqueSets for multinode deployments.
    enabled: false

46
47
48
# Subcharts configuration

# Dynamo operator configuration
49
dynamo-operator:
50
  # -- Whether to enable the Dynamo Kubernetes operator deployment
51
  enabled: true
52

53
54
55
56
  # -- Whether to manage CRDs via a pre-install/pre-upgrade hook Job.
  # The Job runs the operator image with the crd-apply tool to apply CRDs via server-side apply.
  upgradeCRD: true

57
58
59
  # Environment variables to pass to operator Deployment.
  env: []

60
  # -- NATS server address for operator communication (leave empty to use the bundled NATS chart). Format: "nats://hostname:port"
61
  natsAddr: ""
62

63
  # -- etcd server address for an external etcd instance. Only needed when using external etcd without the bundled subchart. Format: "http://hostname:port" or "https://hostname:port"
64
  etcdAddr: ""
65

Biswa Panda's avatar
Biswa Panda committed
66
67
68
69
  nats:
    # -- Whether the NATS is enabled
    enabled: true

70
71
72
  # -- URL for the Model Express server if not deployed by this helm chart. This is ignored if Model Express server is installed by this helm chart (global.model-express.enabled is true).
  modelExpressURL: ""
  # -- Namespace access controls for the operator
73
  namespaceRestriction:
74
75
    # -- Whether to restrict operator to specific namespaces. By default, the operator will run with cluster-wide permissions. Only 1 instance of the operator should be deployed in the cluster. If you want to deploy multiple operator instances, you can set this to true and specify the target namespace (by default, the target namespace is the helm release namespace).
    enabled: false
76
    # -- Target namespace for operator deployment (leave empty for current namespace)
77
    targetNamespace:
78
79
80
81
82
83
84
    # Namespace scope marker lease configuration (used to prevent conflicts when running both cluster-wide and namespace-restricted operators)
    lease:
      # Duration before the namespace scope marker lease expires if not renewed (namespace-restricted mode only). When a namespace-restricted operator is running, it creates a lease in its namespace. The cluster-wide operator detects this lease and excludes that namespace from processing. If the namespace operator stops renewing the lease (e.g., crashes), the lease expires and the cluster-wide operator automatically resumes processing that namespace.
      duration: 30s
      # Interval for renewing the namespace scope marker lease (namespace-restricted mode only). The namespace-restricted operator renews its lease at this interval to signal it's still running.
      renewInterval: 10s

85
86
87
88
89
90
91
  # -- GPU discovery configuration (only applies when namespaceRestriction.enabled=true)
  gpuDiscovery:
    # -- Whether to provision a ClusterRole for the namespace-scoped operator to read GPU node labels.
    # When true (default), Helm creates a ClusterRole/ClusterRoleBinding granting node read access.
    # Set to false if your installer lacks ClusterRole creation permissions.
    enabled: true

92
93
  # -- The Dynamo discovery backend to use. Default is "kubernetes" for Kubernetes API service discovery. Set to "etcd" to use ETCD for discovery. --
  discoveryBackend: "kubernetes"
94
95

  # Controller manager configuration
96
  controllerManager:
97
    # -- Node tolerations for controller manager pods
98
    tolerations: []
99

100
    # -- Affinity for controller manager pods
101
    affinity: {}
102

103
104
105
106
107
108
109
    # Leader election configuration for cluster-wide coordination
    leaderElection:
      # -- Leader election ID for cluster-wide coordination. WARNING: All cluster-wide operators must use the SAME ID to prevent split-brain. Different IDs would allow multiple leaders simultaneously.
      id: ""  # If empty, defaults to: dynamo.nvidia.com (shared across all cluster-wide operators)
      # -- Namespace for leader election leases (only used in cluster-wide mode). If empty, defaults to kube-system for cluster-wide coordination. All cluster-wide operators should use the SAME namespace for proper leader election.
      namespace: ""

110
    manager:
111
      # Container image configuration for the operator manager
112
      image:
113
        # -- Official NVIDIA Dynamo operator image repository
114
        repository: "nvcr.io/nvidia/ai-dynamo/kubernetes-operator"
115
        # -- Image tag (leave empty to use chart default)
116
        tag: ""
117
        # -- Image pull policy - when to pull the image
118
        pullPolicy: IfNotPresent
119
120

      # Command line arguments for the operator manager
121
      args:
122
        # -- Health probe endpoint for Kubernetes health checks
123
        - --health-probe-bind-address=:8081
124
        # -- Metrics endpoint for Prometheus scraping (localhost only for security)
125
        - --metrics-bind-address=127.0.0.1:8080
126
127

  # -- Secrets for pulling private container images
128
  imagePullSecrets: []
129
130

  # Core Dynamo platform configuration
131
  dynamo:
132
    # -- How long to wait before forcefully terminating Grove instances
133
    groveTerminationDelay: 4h
134
135

    # Docker registry configuration for private repositories
136
    dockerRegistry:
137
      # -- Whether to use Kubernetes secrets for registry authentication
138
      useKubernetesSecret: false
139
      # -- Docker registry server URL
140
      server:
141
      # -- Registry username
142
      username:
143
      # -- Registry password (consider using existingSecretName instead)
144
      password:
145
      # -- Name of existing Kubernetes secret containing registry credentials
146
      existingSecretName:
147
      # -- Whether the registry uses HTTPS
148
      secure: true
149
150

    # Ingress configuration for external access
151
    ingress:
152
      # -- Whether to create ingress resources
153
      enabled: false
154
      # -- Ingress class name (e.g., "nginx", "traefik")
155
      className:
156
      # -- Secret name containing TLS certificates
157
      tlsSecretName: my-tls-secret
158
159

    # Istio service mesh configuration
160
    istio:
161
      # -- Whether to enable Istio integration
162
      enabled: false
163
      # -- Istio gateway name for routing
164
      gateway:
165
166

    # -- Host suffix for generated ingress hostnames
167
    ingressHostSuffix: ""
168
169

    # -- Whether VirtualServices should support HTTPS routing
170
    virtualServiceSupportsHTTPS: false
171

172
173
174
175
176
    # Metrics configuration
    metrics:
      # -- Endpoint that services can use to retrieve metrics. If set, dynamo operator will automatically inject the PROMETHEUS_ENDPOINT environment variable into services it manages. Users can override the value of the PROMETHEUS_ENDPOINT environment variable by modifying the corresponding deployment's environment variables
      prometheusEndpoint: ""

177
178
179
180
181
    # MPI Run configuration
    mpiRun:
      # -- Name of the secret containing the SSH key for MPI Run
      secretName: "mpi-run-ssh-secret"

182
183
184
185
186
187
188
  # Webhook configuration for admission control and validation
  webhook:
    # Certificate configuration for webhook TLS
    certificateSecret:
      # -- Name of the Kubernetes secret containing webhook TLS certificates. The secret must contain three keys: tls.crt (server certificate), tls.key (server private key), and ca.crt (Certificate Authority certificate).
      name: webhook-server-cert

189
      # -- Whether to manage the certificate secret externally. When false (default), the operator's built-in cert-controller generates and rotates certificates automatically. When true, you must create the secret manually before installing the chart.
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
      external: false

    # -- CA bundle (base64 encoded) for webhook validation. Only used when certificateSecret.external=true. For automatic certificate generation or cert-manager integration, leave this empty as it will be injected automatically.
    caBundle: ""

    # -- Webhook failure policy controls how Kubernetes handles requests when the webhook is unavailable. 'Fail' (recommended for production) rejects requests if the webhook cannot be reached, ensuring strict validation. 'Ignore' allows requests through if the webhook is unavailable, providing availability over validation guarantees.
    failurePolicy: Fail

    # -- Timeout in seconds for webhook validation calls. If the webhook doesn't respond within this time, the request will be handled according to the failurePolicy.
    timeoutSeconds: 10

    # Namespace selector for webhook scope control
    # -- Custom namespace selector for webhook validation. Use this to include or exclude specific namespaces from webhook validation. For CLUSTER-WIDE operators, you can exclude namespaces managed by namespace-restricted operators by using: matchExpressions: [{ key: "dynamo-operator", operator: "NotIn", values: ["namespace-restricted"] }]. For NAMESPACE-RESTRICTED operators, leave empty as it will be auto-configured to match only the operator's namespace.
    namespaceSelector: {}

    # cert-manager integration for automated certificate lifecycle management
    certManager:
207
      # -- Whether to use cert-manager for automatic certificate management. Requires cert-manager to be installed in the cluster. When enabled, cert-manager will provision and rotate certificates instead of the operator's built-in cert-controller.
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
      enabled: false

      # Certificate configuration for cert-manager
      certificate:
        # -- Certificate duration for webhook certificates managed by cert-manager (e.g., "8760h" for 1 year). cert-manager will automatically renew the certificate before it expires.
        duration: "8760h"

        # -- Time before certificate expiration to trigger renewal (e.g., "360h" for 15 days). cert-manager will attempt to renew the certificate when this threshold is reached.
        renewBefore: "360h"

        # Root CA configuration for cert-manager
        rootCA:
          # -- Duration for the root CA certificate (e.g., "87600h" for 10 years). The root CA typically has a much longer lifetime than the leaf certificates it signs.
          duration: "87600h"

          # -- Time before root CA expiration to trigger renewal (e.g., "720h" for 30 days). Renewing a CA can be disruptive as all signed certificates must be reissued.
          renewBefore: "720h"

226
227
  # Checkpoint configuration for fast pod restore using CRIU/cuda-checkpoint
  # NOTE: The checkpoint infrastructure (PVC + DaemonSet) must be installed separately
228
  # using the snapshot Helm chart in each namespace where checkpointing is needed.
229
230
  checkpoint:
    # -- Whether to enable checkpoint/restore functionality
231
    enabled: false
232

233
234
235
    # -- Path written by worker when model is loaded and ready for checkpointing
    readyForCheckpointFilePath: "/tmp/ready-for-checkpoint"

236
# Grove component - distributed inference orchestration
237
# Installation is controlled by global.grove.install above.
238
grove:
239
240
241
242
  # -- Node tolerations for Grove pods
  tolerations: []
  # -- Affinity for Grove pods
  affinity: {}
243
244

# Kai Scheduler component - advanced workload scheduling
245
246
# Installation is controlled by global.kai-scheduler.install above.
# Integration is controlled by global.kai-scheduler.enabled above.
247
kai-scheduler:
248
249
250
251
252
253
  # Global configuration for kai-scheduler (applies to all components including crd-upgrader)
  global:
    # -- Node tolerations for kai-scheduler pods
    tolerations: []
    # -- Affinity for kai-scheduler pods
    affinity: {}
254

255
256
# etcd configuration - distributed key-value store
# Installation is controlled by global.etcd.install above.
257
etcd:
258
  image:
259
    # -- following bitnami announcement for brownout - https://github.com/bitnami/charts/tree/main/bitnami/etcd#%EF%B8%8F-important-notice-upcoming-changes-to-the-bitnami-catalog, we need to use the legacy repository until we migrate to the new "secure" repository
260
    repository: bitnamilegacy/etcd
261
    tag: 3.5.18-debian-12-r5
262

263
  # Persistent storage configuration for etcd data
264
  persistence:
265
    # Whether to enable persistent storage (recommended for production)
266
267
268
    enabled: true
    # Use the cluster default storage-class or override with a named class
    storageClass: null
269
    # Size of persistent volume for etcd data
270
    size: 1Gi
271
272

  # Pre-upgrade job configuration
273
  preUpgradeJob:
274
    # Whether to run pre-upgrade validation jobs
275
    enabled: false
276
277

  # Number of etcd replicas (1 for single-node, 3+ for HA)
278
  replicaCount: 1
279
280
281

  # Authentication and authorization settings
  # Explicitly remove authentication for simplified internal communication
282
283
  auth:
    rbac:
284
      # Whether to create RBAC authentication (disabled for internal use)
285
286
      create: false

287
  # Health check configuration
288
  readinessProbe:
289
    # Whether to enable readiness probes (disabled to reduce startup complexity)
290
291
292
    enabled: false

  livenessProbe:
293
    # Whether to enable liveness probes (disabled to reduce startup complexity)
294
295
    enabled: false

296
297
298
299
300
301
  # Pod Disruption Budget configuration
  # Should be enabled for HA deployments with 3+ replicas
  pdb:
    # Whether to create a PodDisruptionBudget (disabled for single-node deployments)
    create: false

302
  # Node tolerations for etcd pods (allows scheduling on specific nodes)
303
304
  tolerations: []

305
306
307
  # Affinity for etcd pods
  affinity: {}

308
# NATS configuration - messaging system for operator communication
309
nats:
310
  # -- Whether to enable NATS deployment, disable if you want to use an external NATS instance. For complete configuration options, see: https://github.com/nats-io/k8s/tree/main/helm/charts/nats , all nats settings should be prefixed with "nats."
311
  enabled: true
312
313
314
315

  # TLS Certificate Authority configuration for secure communication
  # Reference a common CA Certificate or Bundle in all nats config `tls` blocks and nats-box contexts
  # Note: `tls.verify` still must be set in the appropriate nats config `tls` blocks to require mTLS
316
  tlsCA:
317
    # Whether to enable TLS CA configuration
318
319
    enabled: false

320
  # Core NATS server configuration
321
  config:
322
    # NATS clustering for high availability (multiple NATS servers)
323
    cluster:
324
      # Whether to enable NATS clustering (disabled for single-node setups)
325
326
      enabled: false

327
    # JetStream - persistent messaging and streaming capabilities
328
    jetstream:
329
      # Whether to enable JetStream (recommended for persistent messaging)
330
331
      enabled: true

332
      # File-based storage for JetStream streams and consumers
333
      fileStore:
334
        # Whether to enable file storage (persistent across restarts)
335
        enabled: true
336
        # Directory path for JetStream file storage
337
338
339
        dir: /data

        ############################################################
340
        # Persistent Volume Claim for JetStream file storage
341
342
        ############################################################
        pvc:
343
          # Whether to create a PVC for JetStream storage
344
          enabled: true
345
          # Size of the persistent volume for JetStream data
346
          size: 10Gi
347
          # Storage class name (leave empty for default)
348
349
          storageClassName:

350
          # Advanced PVC configuration (merge additional fields)
351
352
353
          # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#persistentvolumeclaim-v1-core
          merge: {}
          patch: []
354
          # PVC name (defaults to "{{ include "nats.fullname" $ }}-js")
355
356
          name:

357
        # Maximum size for JetStream file storage (defaults to PVC size)
358
359
        maxSize:

360
      # Memory-based storage for JetStream (non-persistent)
361
      memoryStore:
362
        # Whether to enable memory storage (faster but not persistent)
363
364
        enabled: false

365
366
      # Advanced JetStream configuration
      # For options see: https://docs.nats.io/running-a-nats-service/configuration#jetstream
367
368
369
      merge: {}
      patch: []

370
    # Core NATS server settings
371
    nats:
372
      # Port for NATS client connections
373
      port: 4222
374
375

      # TLS configuration for encrypted connections
376
      tls:
377
        # Whether to enable TLS encryption
378
        enabled: false
379
380
        # Advanced TLS configuration
        # For options see: https://docs.nats.io/running-a-nats-service/configuration/securing_nats/tls
381
382
383
        merge: {}
        patch: []

384
    # Leaf nodes for creating NATS topologies and remote connections
385
    leafnodes:
386
      # Whether to enable leaf node connections
387
388
      enabled: false

389
    # WebSocket support for browser-based NATS clients
390
    websocket:
391
      # Whether to enable WebSocket protocol support
392
393
      enabled: false

394
    # MQTT protocol bridge for IoT device connectivity
395
    mqtt:
396
      # Whether to enable MQTT protocol support
397
398
      enabled: false

399
    # Gateway connections for multi-cluster NATS deployments
400
    gateway:
401
      # Whether to enable gateway connections
402
403
      enabled: false

404
    # HTTP monitoring endpoint for NATS server metrics
405
    monitor:
406
      # Whether to enable HTTP monitoring interface
407
      enabled: true
408
      # Port for monitoring HTTP endpoint
409
      port: 8222
410
411

      # TLS configuration for monitoring endpoint
412
      tls:
413
414
        # Whether to enable HTTPS for monitoring (requires config.nats.tls enabled)
        # When enabled, monitoring port will use HTTPS with the options from config.nats.tls
415
416
        enabled: false

417
    # Go pprof profiling endpoint for performance debugging
418
    profiling:
419
      # Whether to enable profiling endpoint (for debugging only)
420
      enabled: false
421
      # Port for profiling endpoint
422
423
      port: 65432

424
    # Account resolver for multi-tenant NATS deployments
425
    resolver:
426
      # Whether to enable account resolution (for advanced multi-tenancy)
427
428
      enabled: false

429
430
431
    # Server naming configuration
    # Adds a prefix to the server name, which defaults to the pod name
    # Helpful for ensuring server name is unique in a super cluster
432
433
    serverNamePrefix: ""

434
435
436
    # Advanced NATS configuration merging and patching
    # For complete options see: https://docs.nats.io/running-a-nats-service/configuration
    # Special rules apply:
437
438
439
440
441
    #  1. strings that start with << and end with >> will be unquoted
    #     use this for variables and numbers with units
    #  2. keys ending in $include will be switched to include directives
    #     keys are sorted alphabetically, use prefix before $includes to control includes ordering
    #     paths should be relative to /etc/nats-config/nats.conf
442
    # Example:
443
444
445
446
447
448
449
450
    #   merge:
    #     $include: ./my-config.conf
    #     zzz$include: ./my-config-last.conf
    #     server_name: nats
    #     authorization:
    #       token: << $TOKEN >>
    #     jetstream:
    #       max_memory_store: << 1GB >>
451
    merge:
452
453
454
      # 15MB to accommodate prompt embeddings: 10MB decoded → ~13.3MB base64-encoded + metadata
      # Also allows larger context: 256K tokens (int32 - 4 bytes each) = 1MB
      max_payload: 15728640
455
456
457
    patch: []

  ############################################################
458
  # NATS container configuration in StatefulSet
459
460
  ############################################################
  container:
461
    # NATS server container image configuration
462
    image:
463
      # Official NATS server repository
464
      repository: nats
465
      # NATS server version (Alpine-based for smaller size)
466
      tag: 2.10.21-alpine
467
      # Image pull policy (leave empty for chart default)
468
      pullPolicy:
469
      # Custom registry URL (leave empty for Docker Hub)
470
471
      registry:

472
473
    # Container port configuration
    # Note: Ports must also be enabled in the config section above
474
475
    # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#containerport-v1-core
    ports:
476
      # Main NATS client connection port
477
      nats: {}
478
      # Leaf node connection port
479
      leafnodes: {}
480
      # WebSocket connection port
481
      websocket: {}
482
      # MQTT protocol port
483
      mqtt: {}
484
      # Cluster communication port
485
      cluster: {}
486
      # Gateway connection port
487
      gateway: {}
488
      # HTTP monitoring port
489
      monitor: {}
490
      # Go profiling port
491
492
      profiling: {}

493
494
495
    # Environment variables for the NATS container
    # Map with key as env var name, value can be string or map
    # Example:
496
497
498
499
500
501
502
503
504
    #   env:
    #     GOMEMLIMIT: 7GiB
    #     TOKEN:
    #       valueFrom:
    #         secretKeyRef:
    #           name: nats-auth
    #           key: token
    env: {}

505
    # Advanced container configuration merging and patching
506
507
508
509
510
    # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#container-v1-core
    merge: {}
    patch: []

  ############################################################
511
  # Configuration reloader container for hot config updates
512
513
  ############################################################
  reloader:
514
    # Whether to enable the config reloader sidecar container
515
    enabled: true
516
517

    # Config reloader container image
518
    image:
519
      # Official NATS config reloader repository
520
      repository: natsio/nats-server-config-reloader
521
      # Config reloader version
522
      tag: 0.16.0
523
      # Image pull policy (leave empty for chart default)
524
      pullPolicy:
525
      # Custom registry URL (leave empty for Docker Hub)
526
527
      registry:

528
    # Environment variables for the reloader container
529
530
    env: {}

531
532
    # Volume mount prefixes from NATS container to share with reloader
    # All NATS container volume mounts with these prefixes will be mounted into the reloader
533
534
535
    natsVolumeMountPrefixes:
    - /etc/

536
    # Advanced reloader container configuration
537
538
539
540
541
    # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#container-v1-core
    merge: {}
    patch: []

  ############################################################
542
  # Prometheus metrics exporter container (optional)
543
  ############################################################
544
  # Note: config.monitor must be enabled for this to work
545
  promExporter:
546
    # Whether to enable Prometheus metrics exporter sidecar
547
548
549
    enabled: false

  ############################################################
550
  # Kubernetes Service for NATS access
551
552
  ############################################################
  service:
553
    # Whether to create a Kubernetes Service for NATS
554
555
    enabled: true

556
557
558
    # Service port configuration
    # Additional boolean field 'enabled' controls whether port is exposed in the service
    # Note: Ports must also be enabled in the config section above
559
560
    # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#serviceport-v1-core
    ports:
561
      # Main NATS client connection port
562
563
      nats:
        enabled: true
564
      # Leaf node connection port
565
566
      leafnodes:
        enabled: true
567
      # WebSocket connection port
568
569
      websocket:
        enabled: true
570
      # MQTT protocol port
571
572
      mqtt:
        enabled: true
573
      # Cluster communication port (typically internal only)
574
575
      cluster:
        enabled: false
576
      # Gateway connection port (typically internal only)
577
578
      gateway:
        enabled: false
579
      # HTTP monitoring port (typically internal only)
580
581
      monitor:
        enabled: false
582
      # Go profiling port (typically internal only)
583
584
585
      profiling:
        enabled: false

586
    # Advanced service configuration
587
588
589
    # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#service-v1-core
    merge: {}
    patch: []
590
    # Service name (defaults to "{{ include "nats.fullname" $ }}")
591
592
593
    name:

  ############################################################
594
  # Advanced NATS Kubernetes resource configuration
595
596
  ############################################################

597
  # StatefulSet configuration for NATS server persistence
598
  statefulSet:
599
    # Advanced StatefulSet configuration merging and patching
600
601
602
    # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#statefulset-v1-apps
    merge: {}
    patch: []
603
    # StatefulSet name (defaults to "{{ include "nats.fullname" $ }}")
604
605
    name:

606
  # Pod template configuration for NATS StatefulSet
607
  podTemplate:
608
609
    # Whether to add a hash of the ConfigMap as a pod annotation
    # This will cause the StatefulSet to roll when the ConfigMap is updated
610
611
    configChecksumAnnotation: true

612
613
614
615
616
617
618
    # Pod topology spread constraints for better distribution across nodes
    # Map of topologyKey: topologySpreadConstraint
    # labelSelector will be added automatically to match StatefulSet pods
    # Example:
    #   topologySpreadConstraints:
    #     kubernetes.io/hostname:
    #       maxSkew: 1
619
620
    topologySpreadConstraints: {}

621
    # Advanced pod template configuration
622
    # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#pod-v1-core
623
624
    merge:
      spec:
625
        # Node tolerations for NATS pods (allows scheduling on specific nodes)
626
        tolerations: []
627
628
        # Affinity for NATS pods
        affinity: {}
629
630
    patch: []

631
  # Headless service for StatefulSet pod discovery
632
  headlessService:
633
    # Advanced headless service configuration
634
635
636
    # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#service-v1-core
    merge: {}
    patch: []
637
    # Headless service name (defaults to "{{ include "nats.fullname" $ }}-headless")
638
639
    name:

640
  # ConfigMap for NATS server configuration
641
  configMap:
642
    # Advanced ConfigMap configuration
643
644
645
    # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#configmap-v1-core
    merge: {}
    patch: []
646
    # ConfigMap name (defaults to "{{ include "nats.fullname" $ }}-config")
647
648
    name:

649
  # Pod Disruption Budget for controlled rolling updates
650
  podDisruptionBudget:
651
    # Whether to create a PodDisruptionBudget (recommended for production)
652
653
    enabled: true

654
  # Service Account for NATS server pods
655
  serviceAccount:
656
    # Whether to create and use a dedicated service account
657
658
659
    enabled: false

  ############################################################
660
661
  # NATS Box - CLI tools and debugging container
  # NATS Box provides CLI tools for interacting with NATS server
662
663
  ############################################################
  natsBox:
664
    # Whether to deploy NATS Box for CLI access and debugging
665
    enabled: false
666
667

    ############################################################
668
    # NATS client contexts for authentication and connection
669
670
    ############################################################
    contexts:
671
      # Default context configuration
672
      default:
673
        # Credentials-based authentication
674
        creds:
675
          # Inline credentials file contents (base64 encoded)
676
          contents:
677
          # Name of existing secret containing credentials file
678
          secretName:
679
          # Directory to mount credentials (defaults to /etc/nats-creds/<context-name>)
680
          dir:
681
          # Key name in secret for credentials file
682
          key: nats.creds
683
684

        # NKey-based authentication (public/private key pairs)
685
        nkey:
686
          # Inline NKey file contents (base64 encoded)
687
          contents:
688
          # Name of existing secret containing NKey file
689
          secretName:
690
          # Directory to mount NKey (defaults to /etc/nats-nkeys/<context-name>)
691
          dir:
692
          # Key name in secret for NKey file
693
          key: nats.nk
694
695

        # TLS client certificate authentication
696
        tls:
697
          # Name of existing secret containing TLS client certificates
698
          secretName:
699
          # Directory to mount certificates (defaults to /etc/nats-certs/<context-name>)
700
          dir:
701
          # Certificate file name in secret
702
          cert: tls.crt
703
          # Private key file name in secret
704
705
          key: tls.key

706
707
        # Advanced context configuration
        # For options see: https://docs.nats.io/using-nats/nats-tools/nats_cli#nats-contexts
708
709
710
        merge: {}
        patch: []

711
    # Name of context to select by default for NATS CLI operations
712
713
714
    defaultContextName: default

    ############################################################
715
    # NATS Box container configuration
716
717
    ############################################################
    container:
718
      # NATS Box container image
719
      image:
720
        # Official NATS Box repository with CLI tools
721
        repository: natsio/nats-box
722
        # NATS Box version
723
        tag: 0.14.5
724
        # Image pull policy (leave empty for chart default)
725
        pullPolicy:
726
        # Custom registry URL (leave empty for Docker Hub)
727
728
        registry:

729
      # Environment variables for NATS Box container
730
731
      env: {}

732
      # Advanced container configuration
733
734
735
      # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#container-v1-core
      merge: {}
      patch: []
736
737

    # Service Account for NATS Box deployment
738
    serviceAccount:
739
      # Whether to create and use a dedicated service account for NATS Box
740
      enabled: false
741

742
    # Pod template configuration for NATS Box deployment
743
744
745
    podTemplate:
      merge:
        spec:
746
          # Node tolerations for NATS Box pods
747
          tolerations: []
748
749
          # Affinity for NATS Box pods
          affinity: {}
750
      patch: []