values.yaml 31.4 KB
Newer Older
1
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Used to generate top-level secrets (overridden by custom-values.yaml)
16

17
18
19
20
21
22
23
global:
  etcd:
    # -- Whether this chart should install the bundled etcd subchart.
    # When true, deploys etcd and auto-configures the operator with its address.
    # When false, etcd is not deployed. Use dynamo-operator.etcdAddr to point at an external instance if you are bringing your own etcd.
    install: false

24
25
26
# Subcharts configuration

# Dynamo operator configuration
27
dynamo-operator:
28
  # -- Whether to enable the Dynamo Kubernetes operator deployment
29
  enabled: true
30
31

  # -- NATS server address for operator communication (leave empty to use the bundled NATS chart). Format: "nats://hostname:port"
32
  natsAddr: ""
33

34
  # -- etcd server address for an external etcd instance. Only needed when using external etcd without the bundled subchart. Format: "http://hostname:port" or "https://hostname:port"
35
  etcdAddr: ""
36

Biswa Panda's avatar
Biswa Panda committed
37
38
39
40
  nats:
    # -- Whether the NATS is enabled
    enabled: true

41
42
43
  # -- URL for the Model Express server if not deployed by this helm chart. This is ignored if Model Express server is installed by this helm chart (global.model-express.enabled is true).
  modelExpressURL: ""
  # -- Namespace access controls for the operator
44
  namespaceRestriction:
45
46
    # -- Whether to restrict operator to specific namespaces. By default, the operator will run with cluster-wide permissions. Only 1 instance of the operator should be deployed in the cluster. If you want to deploy multiple operator instances, you can set this to true and specify the target namespace (by default, the target namespace is the helm release namespace).
    enabled: false
47
    # -- Target namespace for operator deployment (leave empty for current namespace)
48
    targetNamespace:
49
50
51
52
53
54
55
    # Namespace scope marker lease configuration (used to prevent conflicts when running both cluster-wide and namespace-restricted operators)
    lease:
      # Duration before the namespace scope marker lease expires if not renewed (namespace-restricted mode only). When a namespace-restricted operator is running, it creates a lease in its namespace. The cluster-wide operator detects this lease and excludes that namespace from processing. If the namespace operator stops renewing the lease (e.g., crashes), the lease expires and the cluster-wide operator automatically resumes processing that namespace.
      duration: 30s
      # Interval for renewing the namespace scope marker lease (namespace-restricted mode only). The namespace-restricted operator renews its lease at this interval to signal it's still running.
      renewInterval: 10s

56
57
58
59
60
61
62
  # -- GPU discovery configuration (only applies when namespaceRestriction.enabled=true)
  gpuDiscovery:
    # -- Whether to provision a ClusterRole for the namespace-scoped operator to read GPU node labels.
    # When true (default), Helm creates a ClusterRole/ClusterRoleBinding granting node read access.
    # Set to false if your installer lacks ClusterRole creation permissions.
    enabled: true

63
64
  # -- The Dynamo discovery backend to use. Default is "kubernetes" for Kubernetes API service discovery. Set to "etcd" to use ETCD for discovery. --
  discoveryBackend: "kubernetes"
65
66

  # Controller manager configuration
67
  controllerManager:
68
    # -- Node tolerations for controller manager pods
69
    tolerations: []
70

71
    # -- Affinity for controller manager pods
72
    affinity: {}
73

74
75
76
77
78
79
80
    # Leader election configuration for cluster-wide coordination
    leaderElection:
      # -- Leader election ID for cluster-wide coordination. WARNING: All cluster-wide operators must use the SAME ID to prevent split-brain. Different IDs would allow multiple leaders simultaneously.
      id: ""  # If empty, defaults to: dynamo.nvidia.com (shared across all cluster-wide operators)
      # -- Namespace for leader election leases (only used in cluster-wide mode). If empty, defaults to kube-system for cluster-wide coordination. All cluster-wide operators should use the SAME namespace for proper leader election.
      namespace: ""

81
    manager:
82
      # Container image configuration for the operator manager
83
      image:
84
        # -- Official NVIDIA Dynamo operator image repository
85
        repository: "nvcr.io/nvidia/ai-dynamo/kubernetes-operator"
86
        # -- Image tag (leave empty to use chart default)
87
        tag: ""
88
        # -- Image pull policy - when to pull the image
89
        pullPolicy: IfNotPresent
90
91

      # Command line arguments for the operator manager
92
      args:
93
        # -- Health probe endpoint for Kubernetes health checks
94
        - --health-probe-bind-address=:8081
95
        # -- Metrics endpoint for Prometheus scraping (localhost only for security)
96
        - --metrics-bind-address=127.0.0.1:8080
97
98

  # -- Secrets for pulling private container images
99
  imagePullSecrets: []
100
101

  # Core Dynamo platform configuration
102
  dynamo:
103
    # -- How long to wait before forcefully terminating Grove instances
104
    groveTerminationDelay: 4h
105
106

    # Docker registry configuration for private repositories
107
    dockerRegistry:
108
      # -- Whether to use Kubernetes secrets for registry authentication
109
      useKubernetesSecret: false
110
      # -- Docker registry server URL
111
      server:
112
      # -- Registry username
113
      username:
114
      # -- Registry password (consider using existingSecretName instead)
115
      password:
116
      # -- Name of existing Kubernetes secret containing registry credentials
117
      existingSecretName:
118
      # -- Whether the registry uses HTTPS
119
      secure: true
120
121

    # Ingress configuration for external access
122
    ingress:
123
      # -- Whether to create ingress resources
124
      enabled: false
125
      # -- Ingress class name (e.g., "nginx", "traefik")
126
      className:
127
      # -- Secret name containing TLS certificates
128
      tlsSecretName: my-tls-secret
129
130

    # Istio service mesh configuration
131
    istio:
132
      # -- Whether to enable Istio integration
133
      enabled: false
134
      # -- Istio gateway name for routing
135
      gateway:
136
137

    # -- Host suffix for generated ingress hostnames
138
    ingressHostSuffix: ""
139
140

    # -- Whether VirtualServices should support HTTPS routing
141
    virtualServiceSupportsHTTPS: false
142

143
144
145
146
147
    # Metrics configuration
    metrics:
      # -- Endpoint that services can use to retrieve metrics. If set, dynamo operator will automatically inject the PROMETHEUS_ENDPOINT environment variable into services it manages. Users can override the value of the PROMETHEUS_ENDPOINT environment variable by modifying the corresponding deployment's environment variables
      prometheusEndpoint: ""

148
149
150
151
152
153
154
155
156
    # MPI Run configuration
    mpiRun:
      # -- Name of the secret containing the SSH key for MPI Run
      secretName: "mpi-run-ssh-secret"
      # SSH key generation configuration
      sshKeygen:
        # -- Whether to enable SSH key generation for MPI Run
        enabled: true

157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
  # Webhook configuration for admission control and validation
  webhook:
    # -- Whether to enable admission webhooks for resource validation. When enabled, the operator will validate DynamoComponentDeployment and DynamoGraphDeployment resources before they are created or updated in the cluster. Enabled by default for production-ready validation and better error reporting.
    enabled: true

    # Certificate configuration for webhook TLS
    certificateSecret:
      # -- Name of the Kubernetes secret containing webhook TLS certificates. The secret must contain three keys: tls.crt (server certificate), tls.key (server private key), and ca.crt (Certificate Authority certificate).
      name: webhook-server-cert

      # -- Whether to manage the certificate secret externally. When false (default), certificates are automatically generated via Helm hooks during installation. When true, you must create the secret manually before installing the chart.
      external: false

    # -- Certificate validity duration in days for auto-generated certificates. Only used when certManager.enabled=false and certificateSecret.external=false. After this duration, certificates will expire and need to be regenerated.
    certificateValidity: 365

    # Container image for certificate generation and CA injection jobs
    # Only used when certManager.enabled=false and certificateSecret.external=false
    certGenerator:
      image:
        # -- Container image repository for certificate generation jobs. This image must contain both openssl and kubectl commands.
        repository: bitnami/kubectl
        # -- Container image tag for certificate generation jobs
        tag: latest
        # -- Image pull policy for certificate generation jobs
        pullPolicy: IfNotPresent

    # -- CA bundle (base64 encoded) for webhook validation. Only used when certificateSecret.external=true. For automatic certificate generation or cert-manager integration, leave this empty as it will be injected automatically.
    caBundle: ""

    # -- Webhook failure policy controls how Kubernetes handles requests when the webhook is unavailable. 'Fail' (recommended for production) rejects requests if the webhook cannot be reached, ensuring strict validation. 'Ignore' allows requests through if the webhook is unavailable, providing availability over validation guarantees.
    failurePolicy: Fail

    # -- Timeout in seconds for webhook validation calls. If the webhook doesn't respond within this time, the request will be handled according to the failurePolicy.
    timeoutSeconds: 10

    # Namespace selector for webhook scope control
    # -- Custom namespace selector for webhook validation. Use this to include or exclude specific namespaces from webhook validation. For CLUSTER-WIDE operators, you can exclude namespaces managed by namespace-restricted operators by using: matchExpressions: [{ key: "dynamo-operator", operator: "NotIn", values: ["namespace-restricted"] }]. For NAMESPACE-RESTRICTED operators, leave empty as it will be auto-configured to match only the operator's namespace.
    namespaceSelector: {}

    # cert-manager integration for automated certificate lifecycle management
    certManager:
      # -- Whether to use cert-manager for automatic certificate management. Requires cert-manager to be installed in the cluster. When enabled, cert-manager will automatically generate, renew, and rotate certificates, and the automatic certificate generation via Helm hooks will be disabled.
      enabled: false

      # Certificate configuration for cert-manager
      certificate:
        # -- Certificate duration for webhook certificates managed by cert-manager (e.g., "8760h" for 1 year). cert-manager will automatically renew the certificate before it expires.
        duration: "8760h"

        # -- Time before certificate expiration to trigger renewal (e.g., "360h" for 15 days). cert-manager will attempt to renew the certificate when this threshold is reached.
        renewBefore: "360h"

        # Root CA configuration for cert-manager
        rootCA:
          # -- Duration for the root CA certificate (e.g., "87600h" for 10 years). The root CA typically has a much longer lifetime than the leaf certificates it signs.
          duration: "87600h"

          # -- Time before root CA expiration to trigger renewal (e.g., "720h" for 30 days). Renewing a CA can be disruptive as all signed certificates must be reissued.
          renewBefore: "720h"

218
219
220
221
222
  # Checkpoint configuration for fast pod restore using CRIU/cuda-checkpoint
  # NOTE: The checkpoint infrastructure (PVC + DaemonSet) must be installed separately
  # using the chrek Helm chart in each namespace where checkpointing is needed.
  checkpoint:
    # -- Whether to enable checkpoint/restore functionality
223
    enabled: false
224

225
226
227
228
229
230
231
232
233
    # -- Image used for init containers in checkpoint jobs (e.g., signal file cleanup)
    initContainerImage: "busybox:latest"

    # -- Path written by worker when model is loaded and ready for checkpointing
    readyForCheckpointFilePath: "/tmp/ready-for-checkpoint"

    # -- Path written by restore-entrypoint after successful CRIU restore
    restoreMarkerFilePath: "/tmp/dynamo-restored"

234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
    # Storage configuration
    # These settings tell the operator where to find checkpoint storage
    # Must match the configuration in the chrek chart
    storage:
      # -- Storage backend type: pvc, s3, or oci
      type: pvc

      # -- Host path for signal files (communication between checkpoint pod and DaemonSet)
      signalHostPath: "/var/lib/chrek/signals"

      # PVC storage configuration (used when type=pvc)
      pvc:
        # -- Name of the PVC created by the chrek chart
        pvcName: "chrek-pvc"
        # -- Base path within the PVC for storing checkpoints
        basePath: "/checkpoints"

      # S3 storage configuration (used when type=s3)
      s3:
        # -- S3 URI in format: s3://[endpoint/]bucket/prefix
        uri: ""
        # -- Reference to a secret containing AWS credentials
        credentialsSecretRef: ""

      # OCI registry storage configuration (used when type=oci)
      oci:
        # -- OCI URI in format: oci://registry/repository
        uri: ""
        # -- Reference to a docker config secret for registry authentication
        credentialsSecretRef: ""

265
266
267
268
# Grove component - distributed inference orchestration
grove:
  # -- Whether to enable Grove for multi-node inference coordination, if enabled, the Grove operator will be deployed cluster-wide
  enabled: false
269
270
271
272
  # -- Node tolerations for Grove pods
  tolerations: []
  # -- Affinity for Grove pods
  affinity: {}
273
274
275
276
277

# Kai Scheduler component - advanced workload scheduling
kai-scheduler:
  # -- Whether to enable Kai Scheduler for intelligent resource allocation, if enabled, the Kai Scheduler operator will be deployed cluster-wide
  enabled: false
278
279
280
281
282
283
  # Global configuration for kai-scheduler (applies to all components including crd-upgrader)
  global:
    # -- Node tolerations for kai-scheduler pods
    tolerations: []
    # -- Affinity for kai-scheduler pods
    affinity: {}
284

285
286
# etcd configuration - distributed key-value store
# Installation is controlled by global.etcd.install above.
287
etcd:
288
  image:
289
    # -- following bitnami announcement for brownout - https://github.com/bitnami/charts/tree/main/bitnami/etcd#%EF%B8%8F-important-notice-upcoming-changes-to-the-bitnami-catalog, we need to use the legacy repository until we migrate to the new "secure" repository
290
    repository: bitnamilegacy/etcd
291
    tag: 3.5.18-debian-12-r5
292

293
  # Persistent storage configuration for etcd data
294
  persistence:
295
    # Whether to enable persistent storage (recommended for production)
296
297
298
    enabled: true
    # Use the cluster default storage-class or override with a named class
    storageClass: null
299
    # Size of persistent volume for etcd data
300
    size: 1Gi
301
302

  # Pre-upgrade job configuration
303
  preUpgradeJob:
304
    # Whether to run pre-upgrade validation jobs
305
    enabled: false
306
307

  # Number of etcd replicas (1 for single-node, 3+ for HA)
308
  replicaCount: 1
309
310
311

  # Authentication and authorization settings
  # Explicitly remove authentication for simplified internal communication
312
313
  auth:
    rbac:
314
      # Whether to create RBAC authentication (disabled for internal use)
315
316
      create: false

317
  # Health check configuration
318
  readinessProbe:
319
    # Whether to enable readiness probes (disabled to reduce startup complexity)
320
321
322
    enabled: false

  livenessProbe:
323
    # Whether to enable liveness probes (disabled to reduce startup complexity)
324
325
    enabled: false

326
327
328
329
330
331
  # Pod Disruption Budget configuration
  # Should be enabled for HA deployments with 3+ replicas
  pdb:
    # Whether to create a PodDisruptionBudget (disabled for single-node deployments)
    create: false

332
  # Node tolerations for etcd pods (allows scheduling on specific nodes)
333
334
  tolerations: []

335
336
337
  # Affinity for etcd pods
  affinity: {}

338
# NATS configuration - messaging system for operator communication
339
nats:
340
  # -- Whether to enable NATS deployment, disable if you want to use an external NATS instance. For complete configuration options, see: https://github.com/nats-io/k8s/tree/main/helm/charts/nats , all nats settings should be prefixed with "nats."
341
  enabled: true
342
343
344
345

  # TLS Certificate Authority configuration for secure communication
  # Reference a common CA Certificate or Bundle in all nats config `tls` blocks and nats-box contexts
  # Note: `tls.verify` still must be set in the appropriate nats config `tls` blocks to require mTLS
346
  tlsCA:
347
    # Whether to enable TLS CA configuration
348
349
    enabled: false

350
  # Core NATS server configuration
351
  config:
352
    # NATS clustering for high availability (multiple NATS servers)
353
    cluster:
354
      # Whether to enable NATS clustering (disabled for single-node setups)
355
356
      enabled: false

357
    # JetStream - persistent messaging and streaming capabilities
358
    jetstream:
359
      # Whether to enable JetStream (recommended for persistent messaging)
360
361
      enabled: true

362
      # File-based storage for JetStream streams and consumers
363
      fileStore:
364
        # Whether to enable file storage (persistent across restarts)
365
        enabled: true
366
        # Directory path for JetStream file storage
367
368
369
        dir: /data

        ############################################################
370
        # Persistent Volume Claim for JetStream file storage
371
372
        ############################################################
        pvc:
373
          # Whether to create a PVC for JetStream storage
374
          enabled: true
375
          # Size of the persistent volume for JetStream data
376
          size: 10Gi
377
          # Storage class name (leave empty for default)
378
379
          storageClassName:

380
          # Advanced PVC configuration (merge additional fields)
381
382
383
          # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#persistentvolumeclaim-v1-core
          merge: {}
          patch: []
384
          # PVC name (defaults to "{{ include "nats.fullname" $ }}-js")
385
386
          name:

387
        # Maximum size for JetStream file storage (defaults to PVC size)
388
389
        maxSize:

390
      # Memory-based storage for JetStream (non-persistent)
391
      memoryStore:
392
        # Whether to enable memory storage (faster but not persistent)
393
394
        enabled: false

395
396
      # Advanced JetStream configuration
      # For options see: https://docs.nats.io/running-a-nats-service/configuration#jetstream
397
398
399
      merge: {}
      patch: []

400
    # Core NATS server settings
401
    nats:
402
      # Port for NATS client connections
403
      port: 4222
404
405

      # TLS configuration for encrypted connections
406
      tls:
407
        # Whether to enable TLS encryption
408
        enabled: false
409
410
        # Advanced TLS configuration
        # For options see: https://docs.nats.io/running-a-nats-service/configuration/securing_nats/tls
411
412
413
        merge: {}
        patch: []

414
    # Leaf nodes for creating NATS topologies and remote connections
415
    leafnodes:
416
      # Whether to enable leaf node connections
417
418
      enabled: false

419
    # WebSocket support for browser-based NATS clients
420
    websocket:
421
      # Whether to enable WebSocket protocol support
422
423
      enabled: false

424
    # MQTT protocol bridge for IoT device connectivity
425
    mqtt:
426
      # Whether to enable MQTT protocol support
427
428
      enabled: false

429
    # Gateway connections for multi-cluster NATS deployments
430
    gateway:
431
      # Whether to enable gateway connections
432
433
      enabled: false

434
    # HTTP monitoring endpoint for NATS server metrics
435
    monitor:
436
      # Whether to enable HTTP monitoring interface
437
      enabled: true
438
      # Port for monitoring HTTP endpoint
439
      port: 8222
440
441

      # TLS configuration for monitoring endpoint
442
      tls:
443
444
        # Whether to enable HTTPS for monitoring (requires config.nats.tls enabled)
        # When enabled, monitoring port will use HTTPS with the options from config.nats.tls
445
446
        enabled: false

447
    # Go pprof profiling endpoint for performance debugging
448
    profiling:
449
      # Whether to enable profiling endpoint (for debugging only)
450
      enabled: false
451
      # Port for profiling endpoint
452
453
      port: 65432

454
    # Account resolver for multi-tenant NATS deployments
455
    resolver:
456
      # Whether to enable account resolution (for advanced multi-tenancy)
457
458
      enabled: false

459
460
461
    # Server naming configuration
    # Adds a prefix to the server name, which defaults to the pod name
    # Helpful for ensuring server name is unique in a super cluster
462
463
    serverNamePrefix: ""

464
465
466
    # Advanced NATS configuration merging and patching
    # For complete options see: https://docs.nats.io/running-a-nats-service/configuration
    # Special rules apply:
467
468
469
470
471
    #  1. strings that start with << and end with >> will be unquoted
    #     use this for variables and numbers with units
    #  2. keys ending in $include will be switched to include directives
    #     keys are sorted alphabetically, use prefix before $includes to control includes ordering
    #     paths should be relative to /etc/nats-config/nats.conf
472
    # Example:
473
474
475
476
477
478
479
480
    #   merge:
    #     $include: ./my-config.conf
    #     zzz$include: ./my-config-last.conf
    #     server_name: nats
    #     authorization:
    #       token: << $TOKEN >>
    #     jetstream:
    #       max_memory_store: << 1GB >>
481
    merge:
482
483
484
      # 15MB to accommodate prompt embeddings: 10MB decoded → ~13.3MB base64-encoded + metadata
      # Also allows larger context: 256K tokens (int32 - 4 bytes each) = 1MB
      max_payload: 15728640
485
486
487
    patch: []

  ############################################################
488
  # NATS container configuration in StatefulSet
489
490
  ############################################################
  container:
491
    # NATS server container image configuration
492
    image:
493
      # Official NATS server repository
494
      repository: nats
495
      # NATS server version (Alpine-based for smaller size)
496
      tag: 2.10.21-alpine
497
      # Image pull policy (leave empty for chart default)
498
      pullPolicy:
499
      # Custom registry URL (leave empty for Docker Hub)
500
501
      registry:

502
503
    # Container port configuration
    # Note: Ports must also be enabled in the config section above
504
505
    # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#containerport-v1-core
    ports:
506
      # Main NATS client connection port
507
      nats: {}
508
      # Leaf node connection port
509
      leafnodes: {}
510
      # WebSocket connection port
511
      websocket: {}
512
      # MQTT protocol port
513
      mqtt: {}
514
      # Cluster communication port
515
      cluster: {}
516
      # Gateway connection port
517
      gateway: {}
518
      # HTTP monitoring port
519
      monitor: {}
520
      # Go profiling port
521
522
      profiling: {}

523
524
525
    # Environment variables for the NATS container
    # Map with key as env var name, value can be string or map
    # Example:
526
527
528
529
530
531
532
533
534
    #   env:
    #     GOMEMLIMIT: 7GiB
    #     TOKEN:
    #       valueFrom:
    #         secretKeyRef:
    #           name: nats-auth
    #           key: token
    env: {}

535
    # Advanced container configuration merging and patching
536
537
538
539
540
    # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#container-v1-core
    merge: {}
    patch: []

  ############################################################
541
  # Configuration reloader container for hot config updates
542
543
  ############################################################
  reloader:
544
    # Whether to enable the config reloader sidecar container
545
    enabled: true
546
547

    # Config reloader container image
548
    image:
549
      # Official NATS config reloader repository
550
      repository: natsio/nats-server-config-reloader
551
      # Config reloader version
552
      tag: 0.16.0
553
      # Image pull policy (leave empty for chart default)
554
      pullPolicy:
555
      # Custom registry URL (leave empty for Docker Hub)
556
557
      registry:

558
    # Environment variables for the reloader container
559
560
    env: {}

561
562
    # Volume mount prefixes from NATS container to share with reloader
    # All NATS container volume mounts with these prefixes will be mounted into the reloader
563
564
565
    natsVolumeMountPrefixes:
    - /etc/

566
    # Advanced reloader container configuration
567
568
569
570
571
    # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#container-v1-core
    merge: {}
    patch: []

  ############################################################
572
  # Prometheus metrics exporter container (optional)
573
  ############################################################
574
  # Note: config.monitor must be enabled for this to work
575
  promExporter:
576
    # Whether to enable Prometheus metrics exporter sidecar
577
578
579
    enabled: false

  ############################################################
580
  # Kubernetes Service for NATS access
581
582
  ############################################################
  service:
583
    # Whether to create a Kubernetes Service for NATS
584
585
    enabled: true

586
587
588
    # Service port configuration
    # Additional boolean field 'enabled' controls whether port is exposed in the service
    # Note: Ports must also be enabled in the config section above
589
590
    # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#serviceport-v1-core
    ports:
591
      # Main NATS client connection port
592
593
      nats:
        enabled: true
594
      # Leaf node connection port
595
596
      leafnodes:
        enabled: true
597
      # WebSocket connection port
598
599
      websocket:
        enabled: true
600
      # MQTT protocol port
601
602
      mqtt:
        enabled: true
603
      # Cluster communication port (typically internal only)
604
605
      cluster:
        enabled: false
606
      # Gateway connection port (typically internal only)
607
608
      gateway:
        enabled: false
609
      # HTTP monitoring port (typically internal only)
610
611
      monitor:
        enabled: false
612
      # Go profiling port (typically internal only)
613
614
615
      profiling:
        enabled: false

616
    # Advanced service configuration
617
618
619
    # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#service-v1-core
    merge: {}
    patch: []
620
    # Service name (defaults to "{{ include "nats.fullname" $ }}")
621
622
623
    name:

  ############################################################
624
  # Advanced NATS Kubernetes resource configuration
625
626
  ############################################################

627
  # StatefulSet configuration for NATS server persistence
628
  statefulSet:
629
    # Advanced StatefulSet configuration merging and patching
630
631
632
    # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#statefulset-v1-apps
    merge: {}
    patch: []
633
    # StatefulSet name (defaults to "{{ include "nats.fullname" $ }}")
634
635
    name:

636
  # Pod template configuration for NATS StatefulSet
637
  podTemplate:
638
639
    # Whether to add a hash of the ConfigMap as a pod annotation
    # This will cause the StatefulSet to roll when the ConfigMap is updated
640
641
    configChecksumAnnotation: true

642
643
644
645
646
647
648
    # Pod topology spread constraints for better distribution across nodes
    # Map of topologyKey: topologySpreadConstraint
    # labelSelector will be added automatically to match StatefulSet pods
    # Example:
    #   topologySpreadConstraints:
    #     kubernetes.io/hostname:
    #       maxSkew: 1
649
650
    topologySpreadConstraints: {}

651
    # Advanced pod template configuration
652
    # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#pod-v1-core
653
654
    merge:
      spec:
655
        # Node tolerations for NATS pods (allows scheduling on specific nodes)
656
        tolerations: []
657
658
        # Affinity for NATS pods
        affinity: {}
659
660
    patch: []

661
  # Headless service for StatefulSet pod discovery
662
  headlessService:
663
    # Advanced headless service configuration
664
665
666
    # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#service-v1-core
    merge: {}
    patch: []
667
    # Headless service name (defaults to "{{ include "nats.fullname" $ }}-headless")
668
669
    name:

670
  # ConfigMap for NATS server configuration
671
  configMap:
672
    # Advanced ConfigMap configuration
673
674
675
    # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#configmap-v1-core
    merge: {}
    patch: []
676
    # ConfigMap name (defaults to "{{ include "nats.fullname" $ }}-config")
677
678
    name:

679
  # Pod Disruption Budget for controlled rolling updates
680
  podDisruptionBudget:
681
    # Whether to create a PodDisruptionBudget (recommended for production)
682
683
    enabled: true

684
  # Service Account for NATS server pods
685
  serviceAccount:
686
    # Whether to create and use a dedicated service account
687
688
689
    enabled: false

  ############################################################
690
691
  # NATS Box - CLI tools and debugging container
  # NATS Box provides CLI tools for interacting with NATS server
692
693
  ############################################################
  natsBox:
694
    # Whether to deploy NATS Box for CLI access and debugging
695
    enabled: false
696
697

    ############################################################
698
    # NATS client contexts for authentication and connection
699
700
    ############################################################
    contexts:
701
      # Default context configuration
702
      default:
703
        # Credentials-based authentication
704
        creds:
705
          # Inline credentials file contents (base64 encoded)
706
          contents:
707
          # Name of existing secret containing credentials file
708
          secretName:
709
          # Directory to mount credentials (defaults to /etc/nats-creds/<context-name>)
710
          dir:
711
          # Key name in secret for credentials file
712
          key: nats.creds
713
714

        # NKey-based authentication (public/private key pairs)
715
        nkey:
716
          # Inline NKey file contents (base64 encoded)
717
          contents:
718
          # Name of existing secret containing NKey file
719
          secretName:
720
          # Directory to mount NKey (defaults to /etc/nats-nkeys/<context-name>)
721
          dir:
722
          # Key name in secret for NKey file
723
          key: nats.nk
724
725

        # TLS client certificate authentication
726
        tls:
727
          # Name of existing secret containing TLS client certificates
728
          secretName:
729
          # Directory to mount certificates (defaults to /etc/nats-certs/<context-name>)
730
          dir:
731
          # Certificate file name in secret
732
          cert: tls.crt
733
          # Private key file name in secret
734
735
          key: tls.key

736
737
        # Advanced context configuration
        # For options see: https://docs.nats.io/using-nats/nats-tools/nats_cli#nats-contexts
738
739
740
        merge: {}
        patch: []

741
    # Name of context to select by default for NATS CLI operations
742
743
744
    defaultContextName: default

    ############################################################
745
    # NATS Box container configuration
746
747
    ############################################################
    container:
748
      # NATS Box container image
749
      image:
750
        # Official NATS Box repository with CLI tools
751
        repository: natsio/nats-box
752
        # NATS Box version
753
        tag: 0.14.5
754
        # Image pull policy (leave empty for chart default)
755
        pullPolicy:
756
        # Custom registry URL (leave empty for Docker Hub)
757
758
        registry:

759
      # Environment variables for NATS Box container
760
761
      env: {}

762
      # Advanced container configuration
763
764
765
      # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#container-v1-core
      merge: {}
      patch: []
766
767

    # Service Account for NATS Box deployment
768
    serviceAccount:
769
      # Whether to create and use a dedicated service account for NATS Box
770
      enabled: false
771

772
    # Pod template configuration for NATS Box deployment
773
774
775
    podTemplate:
      merge:
        spec:
776
          # Node tolerations for NATS Box pods
777
          tolerations: []
778
779
          # Affinity for NATS Box pods
          affinity: {}
780
      patch: []