values.yaml 32 KB
Newer Older
1
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Used to generate top-level secrets (overridden by custom-values.yaml)
16

17
18
19
20
21
22
23
global:
  etcd:
    # -- Whether this chart should install the bundled etcd subchart.
    # When true, deploys etcd and auto-configures the operator with its address.
    # When false, etcd is not deployed. Use dynamo-operator.etcdAddr to point at an external instance if you are bringing your own etcd.
    install: false

24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
  kai-scheduler:
    # -- Whether this chart should install the bundled kai-scheduler subchart.
    # When true, deploys kai-scheduler and its CRDs. Integration is automatically enabled.
    # NOTE: For production environments, it is recommended to install kai-scheduler separately.
    install: false
    # -- Whether to enable Kai Scheduler integration (queue creation, schedulerName injection).
    # Set to true when kai-scheduler is available in the cluster (installed externally).
    # Automatically enabled when install=true. The operator uses this to decide whether to
    # inject schedulerName and queue labels into pod templates.
    enabled: false

  grove:
    # -- Whether this chart should install the bundled Grove subchart.
    # When true, deploys the Grove operator cluster-wide. Integration is automatically enabled.
    # NOTE: For production environments, it is recommended to install Grove separately.
    install: false
    # -- Whether to enable Grove integration (multinode orchestration via PodCliqueSets).
    # Set to true when Grove is available in the cluster (installed externally).
    # Automatically true when install=true. The operator uses this to decide whether to
    # create PodCliqueSets for multinode deployments.
    enabled: false

46
47
48
# Subcharts configuration

# Dynamo operator configuration
49
dynamo-operator:
50
  # -- Whether to enable the Dynamo Kubernetes operator deployment
51
  enabled: true
52

53
54
55
56
  # -- Whether to manage CRDs via a pre-install/pre-upgrade hook Job.
  # The Job runs the operator image with the crd-apply tool to apply CRDs via server-side apply.
  upgradeCRD: true

57
  # -- NATS server address for operator communication (leave empty to use the bundled NATS chart). Format: "nats://hostname:port"
58
  natsAddr: ""
59

60
  # -- etcd server address for an external etcd instance. Only needed when using external etcd without the bundled subchart. Format: "http://hostname:port" or "https://hostname:port"
61
  etcdAddr: ""
62

Biswa Panda's avatar
Biswa Panda committed
63
64
65
66
  nats:
    # -- Whether the NATS is enabled
    enabled: true

67
68
69
  # -- URL for the Model Express server if not deployed by this helm chart. This is ignored if Model Express server is installed by this helm chart (global.model-express.enabled is true).
  modelExpressURL: ""
  # -- Namespace access controls for the operator
70
  namespaceRestriction:
71
72
    # -- Whether to restrict operator to specific namespaces. By default, the operator will run with cluster-wide permissions. Only 1 instance of the operator should be deployed in the cluster. If you want to deploy multiple operator instances, you can set this to true and specify the target namespace (by default, the target namespace is the helm release namespace).
    enabled: false
73
    # -- Target namespace for operator deployment (leave empty for current namespace)
74
    targetNamespace:
75
76
77
78
79
80
81
    # Namespace scope marker lease configuration (used to prevent conflicts when running both cluster-wide and namespace-restricted operators)
    lease:
      # Duration before the namespace scope marker lease expires if not renewed (namespace-restricted mode only). When a namespace-restricted operator is running, it creates a lease in its namespace. The cluster-wide operator detects this lease and excludes that namespace from processing. If the namespace operator stops renewing the lease (e.g., crashes), the lease expires and the cluster-wide operator automatically resumes processing that namespace.
      duration: 30s
      # Interval for renewing the namespace scope marker lease (namespace-restricted mode only). The namespace-restricted operator renews its lease at this interval to signal it's still running.
      renewInterval: 10s

82
83
84
85
86
87
88
  # -- GPU discovery configuration (only applies when namespaceRestriction.enabled=true)
  gpuDiscovery:
    # -- Whether to provision a ClusterRole for the namespace-scoped operator to read GPU node labels.
    # When true (default), Helm creates a ClusterRole/ClusterRoleBinding granting node read access.
    # Set to false if your installer lacks ClusterRole creation permissions.
    enabled: true

89
90
  # -- The Dynamo discovery backend to use. Default is "kubernetes" for Kubernetes API service discovery. Set to "etcd" to use ETCD for discovery. --
  discoveryBackend: "kubernetes"
91
92

  # Controller manager configuration
93
  controllerManager:
94
    # -- Node tolerations for controller manager pods
95
    tolerations: []
96

97
    # -- Affinity for controller manager pods
98
    affinity: {}
99

100
101
102
103
104
105
106
    # Leader election configuration for cluster-wide coordination
    leaderElection:
      # -- Leader election ID for cluster-wide coordination. WARNING: All cluster-wide operators must use the SAME ID to prevent split-brain. Different IDs would allow multiple leaders simultaneously.
      id: ""  # If empty, defaults to: dynamo.nvidia.com (shared across all cluster-wide operators)
      # -- Namespace for leader election leases (only used in cluster-wide mode). If empty, defaults to kube-system for cluster-wide coordination. All cluster-wide operators should use the SAME namespace for proper leader election.
      namespace: ""

107
    manager:
108
      # Container image configuration for the operator manager
109
      image:
110
        # -- Official NVIDIA Dynamo operator image repository
111
        repository: "nvcr.io/nvidia/ai-dynamo/kubernetes-operator"
112
        # -- Image tag (leave empty to use chart default)
113
        tag: ""
114
        # -- Image pull policy - when to pull the image
115
        pullPolicy: IfNotPresent
116
117

      # Command line arguments for the operator manager
118
      args:
119
        # -- Health probe endpoint for Kubernetes health checks
120
        - --health-probe-bind-address=:8081
121
        # -- Metrics endpoint for Prometheus scraping (localhost only for security)
122
        - --metrics-bind-address=127.0.0.1:8080
123
124

  # -- Secrets for pulling private container images
125
  imagePullSecrets: []
126
127

  # Core Dynamo platform configuration
128
  dynamo:
129
    # -- How long to wait before forcefully terminating Grove instances
130
    groveTerminationDelay: 4h
131
132

    # Docker registry configuration for private repositories
133
    dockerRegistry:
134
      # -- Whether to use Kubernetes secrets for registry authentication
135
      useKubernetesSecret: false
136
      # -- Docker registry server URL
137
      server:
138
      # -- Registry username
139
      username:
140
      # -- Registry password (consider using existingSecretName instead)
141
      password:
142
      # -- Name of existing Kubernetes secret containing registry credentials
143
      existingSecretName:
144
      # -- Whether the registry uses HTTPS
145
      secure: true
146
147

    # Ingress configuration for external access
148
    ingress:
149
      # -- Whether to create ingress resources
150
      enabled: false
151
      # -- Ingress class name (e.g., "nginx", "traefik")
152
      className:
153
      # -- Secret name containing TLS certificates
154
      tlsSecretName: my-tls-secret
155
156

    # Istio service mesh configuration
157
    istio:
158
      # -- Whether to enable Istio integration
159
      enabled: false
160
      # -- Istio gateway name for routing
161
      gateway:
162
163

    # -- Host suffix for generated ingress hostnames
164
    ingressHostSuffix: ""
165
166

    # -- Whether VirtualServices should support HTTPS routing
167
    virtualServiceSupportsHTTPS: false
168

169
170
171
172
173
    # Metrics configuration
    metrics:
      # -- Endpoint that services can use to retrieve metrics. If set, dynamo operator will automatically inject the PROMETHEUS_ENDPOINT environment variable into services it manages. Users can override the value of the PROMETHEUS_ENDPOINT environment variable by modifying the corresponding deployment's environment variables
      prometheusEndpoint: ""

174
175
176
177
178
179
180
181
182
    # MPI Run configuration
    mpiRun:
      # -- Name of the secret containing the SSH key for MPI Run
      secretName: "mpi-run-ssh-secret"
      # SSH key generation configuration
      sshKeygen:
        # -- Whether to enable SSH key generation for MPI Run
        enabled: true

183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
  # Webhook configuration for admission control and validation
  webhook:
    # Certificate configuration for webhook TLS
    certificateSecret:
      # -- Name of the Kubernetes secret containing webhook TLS certificates. The secret must contain three keys: tls.crt (server certificate), tls.key (server private key), and ca.crt (Certificate Authority certificate).
      name: webhook-server-cert

      # -- Whether to manage the certificate secret externally. When false (default), certificates are automatically generated via Helm hooks during installation. When true, you must create the secret manually before installing the chart.
      external: false

    # -- Certificate validity duration in days for auto-generated certificates. Only used when certManager.enabled=false and certificateSecret.external=false. After this duration, certificates will expire and need to be regenerated.
    certificateValidity: 365

    # Container image for certificate generation and CA injection jobs
    # Only used when certManager.enabled=false and certificateSecret.external=false
    certGenerator:
      image:
        # -- Container image repository for certificate generation jobs. This image must contain both openssl and kubectl commands.
        repository: bitnami/kubectl
        # -- Container image tag for certificate generation jobs
        tag: latest
        # -- Image pull policy for certificate generation jobs
        pullPolicy: IfNotPresent

    # -- CA bundle (base64 encoded) for webhook validation. Only used when certificateSecret.external=true. For automatic certificate generation or cert-manager integration, leave this empty as it will be injected automatically.
    caBundle: ""

    # -- Webhook failure policy controls how Kubernetes handles requests when the webhook is unavailable. 'Fail' (recommended for production) rejects requests if the webhook cannot be reached, ensuring strict validation. 'Ignore' allows requests through if the webhook is unavailable, providing availability over validation guarantees.
    failurePolicy: Fail

    # -- Timeout in seconds for webhook validation calls. If the webhook doesn't respond within this time, the request will be handled according to the failurePolicy.
    timeoutSeconds: 10

    # Namespace selector for webhook scope control
    # -- Custom namespace selector for webhook validation. Use this to include or exclude specific namespaces from webhook validation. For CLUSTER-WIDE operators, you can exclude namespaces managed by namespace-restricted operators by using: matchExpressions: [{ key: "dynamo-operator", operator: "NotIn", values: ["namespace-restricted"] }]. For NAMESPACE-RESTRICTED operators, leave empty as it will be auto-configured to match only the operator's namespace.
    namespaceSelector: {}

    # cert-manager integration for automated certificate lifecycle management
    certManager:
      # -- Whether to use cert-manager for automatic certificate management. Requires cert-manager to be installed in the cluster. When enabled, cert-manager will automatically generate, renew, and rotate certificates, and the automatic certificate generation via Helm hooks will be disabled.
      enabled: false

      # Certificate configuration for cert-manager
      certificate:
        # -- Certificate duration for webhook certificates managed by cert-manager (e.g., "8760h" for 1 year). cert-manager will automatically renew the certificate before it expires.
        duration: "8760h"

        # -- Time before certificate expiration to trigger renewal (e.g., "360h" for 15 days). cert-manager will attempt to renew the certificate when this threshold is reached.
        renewBefore: "360h"

        # Root CA configuration for cert-manager
        rootCA:
          # -- Duration for the root CA certificate (e.g., "87600h" for 10 years). The root CA typically has a much longer lifetime than the leaf certificates it signs.
          duration: "87600h"

          # -- Time before root CA expiration to trigger renewal (e.g., "720h" for 30 days). Renewing a CA can be disruptive as all signed certificates must be reissued.
          renewBefore: "720h"

241
242
243
244
245
  # Checkpoint configuration for fast pod restore using CRIU/cuda-checkpoint
  # NOTE: The checkpoint infrastructure (PVC + DaemonSet) must be installed separately
  # using the chrek Helm chart in each namespace where checkpointing is needed.
  checkpoint:
    # -- Whether to enable checkpoint/restore functionality
246
    enabled: false
247

248
249
250
    # -- Path written by worker when model is loaded and ready for checkpointing
    readyForCheckpointFilePath: "/tmp/ready-for-checkpoint"

251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
    # Storage configuration
    # These settings tell the operator where to find checkpoint storage
    # Must match the configuration in the chrek chart
    storage:
      # -- Storage backend type: pvc, s3, or oci
      type: pvc

      # PVC storage configuration (used when type=pvc)
      pvc:
        # -- Name of the PVC created by the chrek chart
        pvcName: "chrek-pvc"
        # -- Base path within the PVC for storing checkpoints
        basePath: "/checkpoints"

      # S3 storage configuration (used when type=s3)
      s3:
        # -- S3 URI in format: s3://[endpoint/]bucket/prefix
        uri: ""
        # -- Reference to a secret containing AWS credentials
        credentialsSecretRef: ""

      # OCI registry storage configuration (used when type=oci)
      oci:
        # -- OCI URI in format: oci://registry/repository
        uri: ""
        # -- Reference to a docker config secret for registry authentication
        credentialsSecretRef: ""

279
# Grove component - distributed inference orchestration
280
# Installation is controlled by global.grove.install above.
281
grove:
282
283
284
285
  # -- Node tolerations for Grove pods
  tolerations: []
  # -- Affinity for Grove pods
  affinity: {}
286
287

# Kai Scheduler component - advanced workload scheduling
288
289
# Installation is controlled by global.kai-scheduler.install above.
# Integration is controlled by global.kai-scheduler.enabled above.
290
kai-scheduler:
291
292
293
294
295
296
  # Global configuration for kai-scheduler (applies to all components including crd-upgrader)
  global:
    # -- Node tolerations for kai-scheduler pods
    tolerations: []
    # -- Affinity for kai-scheduler pods
    affinity: {}
297

298
299
# etcd configuration - distributed key-value store
# Installation is controlled by global.etcd.install above.
300
etcd:
301
  image:
302
    # -- following bitnami announcement for brownout - https://github.com/bitnami/charts/tree/main/bitnami/etcd#%EF%B8%8F-important-notice-upcoming-changes-to-the-bitnami-catalog, we need to use the legacy repository until we migrate to the new "secure" repository
303
    repository: bitnamilegacy/etcd
304
    tag: 3.5.18-debian-12-r5
305

306
  # Persistent storage configuration for etcd data
307
  persistence:
308
    # Whether to enable persistent storage (recommended for production)
309
310
311
    enabled: true
    # Use the cluster default storage-class or override with a named class
    storageClass: null
312
    # Size of persistent volume for etcd data
313
    size: 1Gi
314
315

  # Pre-upgrade job configuration
316
  preUpgradeJob:
317
    # Whether to run pre-upgrade validation jobs
318
    enabled: false
319
320

  # Number of etcd replicas (1 for single-node, 3+ for HA)
321
  replicaCount: 1
322
323
324

  # Authentication and authorization settings
  # Explicitly remove authentication for simplified internal communication
325
326
  auth:
    rbac:
327
      # Whether to create RBAC authentication (disabled for internal use)
328
329
      create: false

330
  # Health check configuration
331
  readinessProbe:
332
    # Whether to enable readiness probes (disabled to reduce startup complexity)
333
334
335
    enabled: false

  livenessProbe:
336
    # Whether to enable liveness probes (disabled to reduce startup complexity)
337
338
    enabled: false

339
340
341
342
343
344
  # Pod Disruption Budget configuration
  # Should be enabled for HA deployments with 3+ replicas
  pdb:
    # Whether to create a PodDisruptionBudget (disabled for single-node deployments)
    create: false

345
  # Node tolerations for etcd pods (allows scheduling on specific nodes)
346
347
  tolerations: []

348
349
350
  # Affinity for etcd pods
  affinity: {}

351
# NATS configuration - messaging system for operator communication
352
nats:
353
  # -- Whether to enable NATS deployment, disable if you want to use an external NATS instance. For complete configuration options, see: https://github.com/nats-io/k8s/tree/main/helm/charts/nats , all nats settings should be prefixed with "nats."
354
  enabled: true
355
356
357
358

  # TLS Certificate Authority configuration for secure communication
  # Reference a common CA Certificate or Bundle in all nats config `tls` blocks and nats-box contexts
  # Note: `tls.verify` still must be set in the appropriate nats config `tls` blocks to require mTLS
359
  tlsCA:
360
    # Whether to enable TLS CA configuration
361
362
    enabled: false

363
  # Core NATS server configuration
364
  config:
365
    # NATS clustering for high availability (multiple NATS servers)
366
    cluster:
367
      # Whether to enable NATS clustering (disabled for single-node setups)
368
369
      enabled: false

370
    # JetStream - persistent messaging and streaming capabilities
371
    jetstream:
372
      # Whether to enable JetStream (recommended for persistent messaging)
373
374
      enabled: true

375
      # File-based storage for JetStream streams and consumers
376
      fileStore:
377
        # Whether to enable file storage (persistent across restarts)
378
        enabled: true
379
        # Directory path for JetStream file storage
380
381
382
        dir: /data

        ############################################################
383
        # Persistent Volume Claim for JetStream file storage
384
385
        ############################################################
        pvc:
386
          # Whether to create a PVC for JetStream storage
387
          enabled: true
388
          # Size of the persistent volume for JetStream data
389
          size: 10Gi
390
          # Storage class name (leave empty for default)
391
392
          storageClassName:

393
          # Advanced PVC configuration (merge additional fields)
394
395
396
          # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#persistentvolumeclaim-v1-core
          merge: {}
          patch: []
397
          # PVC name (defaults to "{{ include "nats.fullname" $ }}-js")
398
399
          name:

400
        # Maximum size for JetStream file storage (defaults to PVC size)
401
402
        maxSize:

403
      # Memory-based storage for JetStream (non-persistent)
404
      memoryStore:
405
        # Whether to enable memory storage (faster but not persistent)
406
407
        enabled: false

408
409
      # Advanced JetStream configuration
      # For options see: https://docs.nats.io/running-a-nats-service/configuration#jetstream
410
411
412
      merge: {}
      patch: []

413
    # Core NATS server settings
414
    nats:
415
      # Port for NATS client connections
416
      port: 4222
417
418

      # TLS configuration for encrypted connections
419
      tls:
420
        # Whether to enable TLS encryption
421
        enabled: false
422
423
        # Advanced TLS configuration
        # For options see: https://docs.nats.io/running-a-nats-service/configuration/securing_nats/tls
424
425
426
        merge: {}
        patch: []

427
    # Leaf nodes for creating NATS topologies and remote connections
428
    leafnodes:
429
      # Whether to enable leaf node connections
430
431
      enabled: false

432
    # WebSocket support for browser-based NATS clients
433
    websocket:
434
      # Whether to enable WebSocket protocol support
435
436
      enabled: false

437
    # MQTT protocol bridge for IoT device connectivity
438
    mqtt:
439
      # Whether to enable MQTT protocol support
440
441
      enabled: false

442
    # Gateway connections for multi-cluster NATS deployments
443
    gateway:
444
      # Whether to enable gateway connections
445
446
      enabled: false

447
    # HTTP monitoring endpoint for NATS server metrics
448
    monitor:
449
      # Whether to enable HTTP monitoring interface
450
      enabled: true
451
      # Port for monitoring HTTP endpoint
452
      port: 8222
453
454

      # TLS configuration for monitoring endpoint
455
      tls:
456
457
        # Whether to enable HTTPS for monitoring (requires config.nats.tls enabled)
        # When enabled, monitoring port will use HTTPS with the options from config.nats.tls
458
459
        enabled: false

460
    # Go pprof profiling endpoint for performance debugging
461
    profiling:
462
      # Whether to enable profiling endpoint (for debugging only)
463
      enabled: false
464
      # Port for profiling endpoint
465
466
      port: 65432

467
    # Account resolver for multi-tenant NATS deployments
468
    resolver:
469
      # Whether to enable account resolution (for advanced multi-tenancy)
470
471
      enabled: false

472
473
474
    # Server naming configuration
    # Adds a prefix to the server name, which defaults to the pod name
    # Helpful for ensuring server name is unique in a super cluster
475
476
    serverNamePrefix: ""

477
478
479
    # Advanced NATS configuration merging and patching
    # For complete options see: https://docs.nats.io/running-a-nats-service/configuration
    # Special rules apply:
480
481
482
483
484
    #  1. strings that start with << and end with >> will be unquoted
    #     use this for variables and numbers with units
    #  2. keys ending in $include will be switched to include directives
    #     keys are sorted alphabetically, use prefix before $includes to control includes ordering
    #     paths should be relative to /etc/nats-config/nats.conf
485
    # Example:
486
487
488
489
490
491
492
493
    #   merge:
    #     $include: ./my-config.conf
    #     zzz$include: ./my-config-last.conf
    #     server_name: nats
    #     authorization:
    #       token: << $TOKEN >>
    #     jetstream:
    #       max_memory_store: << 1GB >>
494
    merge:
495
496
497
      # 15MB to accommodate prompt embeddings: 10MB decoded → ~13.3MB base64-encoded + metadata
      # Also allows larger context: 256K tokens (int32 - 4 bytes each) = 1MB
      max_payload: 15728640
498
499
500
    patch: []

  ############################################################
501
  # NATS container configuration in StatefulSet
502
503
  ############################################################
  container:
504
    # NATS server container image configuration
505
    image:
506
      # Official NATS server repository
507
      repository: nats
508
      # NATS server version (Alpine-based for smaller size)
509
      tag: 2.10.21-alpine
510
      # Image pull policy (leave empty for chart default)
511
      pullPolicy:
512
      # Custom registry URL (leave empty for Docker Hub)
513
514
      registry:

515
516
    # Container port configuration
    # Note: Ports must also be enabled in the config section above
517
518
    # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#containerport-v1-core
    ports:
519
      # Main NATS client connection port
520
      nats: {}
521
      # Leaf node connection port
522
      leafnodes: {}
523
      # WebSocket connection port
524
      websocket: {}
525
      # MQTT protocol port
526
      mqtt: {}
527
      # Cluster communication port
528
      cluster: {}
529
      # Gateway connection port
530
      gateway: {}
531
      # HTTP monitoring port
532
      monitor: {}
533
      # Go profiling port
534
535
      profiling: {}

536
537
538
    # Environment variables for the NATS container
    # Map with key as env var name, value can be string or map
    # Example:
539
540
541
542
543
544
545
546
547
    #   env:
    #     GOMEMLIMIT: 7GiB
    #     TOKEN:
    #       valueFrom:
    #         secretKeyRef:
    #           name: nats-auth
    #           key: token
    env: {}

548
    # Advanced container configuration merging and patching
549
550
551
552
553
    # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#container-v1-core
    merge: {}
    patch: []

  ############################################################
554
  # Configuration reloader container for hot config updates
555
556
  ############################################################
  reloader:
557
    # Whether to enable the config reloader sidecar container
558
    enabled: true
559
560

    # Config reloader container image
561
    image:
562
      # Official NATS config reloader repository
563
      repository: natsio/nats-server-config-reloader
564
      # Config reloader version
565
      tag: 0.16.0
566
      # Image pull policy (leave empty for chart default)
567
      pullPolicy:
568
      # Custom registry URL (leave empty for Docker Hub)
569
570
      registry:

571
    # Environment variables for the reloader container
572
573
    env: {}

574
575
    # Volume mount prefixes from NATS container to share with reloader
    # All NATS container volume mounts with these prefixes will be mounted into the reloader
576
577
578
    natsVolumeMountPrefixes:
    - /etc/

579
    # Advanced reloader container configuration
580
581
582
583
584
    # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#container-v1-core
    merge: {}
    patch: []

  ############################################################
585
  # Prometheus metrics exporter container (optional)
586
  ############################################################
587
  # Note: config.monitor must be enabled for this to work
588
  promExporter:
589
    # Whether to enable Prometheus metrics exporter sidecar
590
591
592
    enabled: false

  ############################################################
593
  # Kubernetes Service for NATS access
594
595
  ############################################################
  service:
596
    # Whether to create a Kubernetes Service for NATS
597
598
    enabled: true

599
600
601
    # Service port configuration
    # Additional boolean field 'enabled' controls whether port is exposed in the service
    # Note: Ports must also be enabled in the config section above
602
603
    # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#serviceport-v1-core
    ports:
604
      # Main NATS client connection port
605
606
      nats:
        enabled: true
607
      # Leaf node connection port
608
609
      leafnodes:
        enabled: true
610
      # WebSocket connection port
611
612
      websocket:
        enabled: true
613
      # MQTT protocol port
614
615
      mqtt:
        enabled: true
616
      # Cluster communication port (typically internal only)
617
618
      cluster:
        enabled: false
619
      # Gateway connection port (typically internal only)
620
621
      gateway:
        enabled: false
622
      # HTTP monitoring port (typically internal only)
623
624
      monitor:
        enabled: false
625
      # Go profiling port (typically internal only)
626
627
628
      profiling:
        enabled: false

629
    # Advanced service configuration
630
631
632
    # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#service-v1-core
    merge: {}
    patch: []
633
    # Service name (defaults to "{{ include "nats.fullname" $ }}")
634
635
636
    name:

  ############################################################
637
  # Advanced NATS Kubernetes resource configuration
638
639
  ############################################################

640
  # StatefulSet configuration for NATS server persistence
641
  statefulSet:
642
    # Advanced StatefulSet configuration merging and patching
643
644
645
    # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#statefulset-v1-apps
    merge: {}
    patch: []
646
    # StatefulSet name (defaults to "{{ include "nats.fullname" $ }}")
647
648
    name:

649
  # Pod template configuration for NATS StatefulSet
650
  podTemplate:
651
652
    # Whether to add a hash of the ConfigMap as a pod annotation
    # This will cause the StatefulSet to roll when the ConfigMap is updated
653
654
    configChecksumAnnotation: true

655
656
657
658
659
660
661
    # Pod topology spread constraints for better distribution across nodes
    # Map of topologyKey: topologySpreadConstraint
    # labelSelector will be added automatically to match StatefulSet pods
    # Example:
    #   topologySpreadConstraints:
    #     kubernetes.io/hostname:
    #       maxSkew: 1
662
663
    topologySpreadConstraints: {}

664
    # Advanced pod template configuration
665
    # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#pod-v1-core
666
667
    merge:
      spec:
668
        # Node tolerations for NATS pods (allows scheduling on specific nodes)
669
        tolerations: []
670
671
        # Affinity for NATS pods
        affinity: {}
672
673
    patch: []

674
  # Headless service for StatefulSet pod discovery
675
  headlessService:
676
    # Advanced headless service configuration
677
678
679
    # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#service-v1-core
    merge: {}
    patch: []
680
    # Headless service name (defaults to "{{ include "nats.fullname" $ }}-headless")
681
682
    name:

683
  # ConfigMap for NATS server configuration
684
  configMap:
685
    # Advanced ConfigMap configuration
686
687
688
    # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#configmap-v1-core
    merge: {}
    patch: []
689
    # ConfigMap name (defaults to "{{ include "nats.fullname" $ }}-config")
690
691
    name:

692
  # Pod Disruption Budget for controlled rolling updates
693
  podDisruptionBudget:
694
    # Whether to create a PodDisruptionBudget (recommended for production)
695
696
    enabled: true

697
  # Service Account for NATS server pods
698
  serviceAccount:
699
    # Whether to create and use a dedicated service account
700
701
702
    enabled: false

  ############################################################
703
704
  # NATS Box - CLI tools and debugging container
  # NATS Box provides CLI tools for interacting with NATS server
705
706
  ############################################################
  natsBox:
707
    # Whether to deploy NATS Box for CLI access and debugging
708
    enabled: false
709
710

    ############################################################
711
    # NATS client contexts for authentication and connection
712
713
    ############################################################
    contexts:
714
      # Default context configuration
715
      default:
716
        # Credentials-based authentication
717
        creds:
718
          # Inline credentials file contents (base64 encoded)
719
          contents:
720
          # Name of existing secret containing credentials file
721
          secretName:
722
          # Directory to mount credentials (defaults to /etc/nats-creds/<context-name>)
723
          dir:
724
          # Key name in secret for credentials file
725
          key: nats.creds
726
727

        # NKey-based authentication (public/private key pairs)
728
        nkey:
729
          # Inline NKey file contents (base64 encoded)
730
          contents:
731
          # Name of existing secret containing NKey file
732
          secretName:
733
          # Directory to mount NKey (defaults to /etc/nats-nkeys/<context-name>)
734
          dir:
735
          # Key name in secret for NKey file
736
          key: nats.nk
737
738

        # TLS client certificate authentication
739
        tls:
740
          # Name of existing secret containing TLS client certificates
741
          secretName:
742
          # Directory to mount certificates (defaults to /etc/nats-certs/<context-name>)
743
          dir:
744
          # Certificate file name in secret
745
          cert: tls.crt
746
          # Private key file name in secret
747
748
          key: tls.key

749
750
        # Advanced context configuration
        # For options see: https://docs.nats.io/using-nats/nats-tools/nats_cli#nats-contexts
751
752
753
        merge: {}
        patch: []

754
    # Name of context to select by default for NATS CLI operations
755
756
757
    defaultContextName: default

    ############################################################
758
    # NATS Box container configuration
759
760
    ############################################################
    container:
761
      # NATS Box container image
762
      image:
763
        # Official NATS Box repository with CLI tools
764
        repository: natsio/nats-box
765
        # NATS Box version
766
        tag: 0.14.5
767
        # Image pull policy (leave empty for chart default)
768
        pullPolicy:
769
        # Custom registry URL (leave empty for Docker Hub)
770
771
        registry:

772
      # Environment variables for NATS Box container
773
774
      env: {}

775
      # Advanced container configuration
776
777
778
      # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#container-v1-core
      merge: {}
      patch: []
779
780

    # Service Account for NATS Box deployment
781
    serviceAccount:
782
      # Whether to create and use a dedicated service account for NATS Box
783
      enabled: false
784

785
    # Pod template configuration for NATS Box deployment
786
787
788
    podTemplate:
      merge:
        spec:
789
          # Node tolerations for NATS Box pods
790
          tolerations: []
791
792
          # Affinity for NATS Box pods
          affinity: {}
793
      patch: []