@@ -149,41 +149,25 @@ The chart includes built-in validation to prevent all operator conflicts:
...
@@ -149,41 +149,25 @@ The chart includes built-in validation to prevent all operator conflicts:
| dynamo-operator.webhook.certManager.certificate.renewBefore | string | `"360h"` | Time before certificate expiration to trigger renewal (e.g., "360h" for 15 days). cert-manager will attempt to renew the certificate when this threshold is reached. |
| dynamo-operator.webhook.certManager.certificate.renewBefore | string | `"360h"` | Time before certificate expiration to trigger renewal (e.g., "360h" for 15 days). cert-manager will attempt to renew the certificate when this threshold is reached. |
| dynamo-operator.webhook.certManager.certificate.rootCA.duration | string | `"87600h"` | Duration for the root CA certificate (e.g., "87600h" for 10 years). The root CA typically has a much longer lifetime than the leaf certificates it signs. |
| dynamo-operator.webhook.certManager.certificate.rootCA.duration | string | `"87600h"` | Duration for the root CA certificate (e.g., "87600h" for 10 years). The root CA typically has a much longer lifetime than the leaf certificates it signs. |
| dynamo-operator.webhook.certManager.certificate.rootCA.renewBefore | string | `"720h"` | Time before root CA expiration to trigger renewal (e.g., "720h" for 30 days). Renewing a CA can be disruptive as all signed certificates must be reissued. |
| dynamo-operator.webhook.certManager.certificate.rootCA.renewBefore | string | `"720h"` | Time before root CA expiration to trigger renewal (e.g., "720h" for 30 days). Renewing a CA can be disruptive as all signed certificates must be reissued. |
| dynamo-operator.checkpoint.enabled | bool | `false` | Whether to enable checkpoint/restore functionality. When enabled, deploys the checkpoint-agent DaemonSet for creating container checkpoints. |
| dynamo-operator.checkpoint.storage.signalHostPath | string | `"/var/lib/chrek/signals"` | Host path for signal files used for communication between checkpoint job pods and the DaemonSet. Both components mount this path to coordinate checkpoint completion. |
| dynamo-operator.checkpoint.storage.signalHostPath | string | `"/var/lib/chrek/signals"` | Host path for signal files (communication between checkpoint pod and DaemonSet) |
| dynamo-operator.checkpoint.storage.pvc.pvcName | string | `"checkpoint-storage"` | Name of an existing PVC for storing checkpoint tar files. This PVC must be created separately with RWX (ReadWriteMany) access mode to allow multiple nodes to read checkpoints. |
| dynamo-operator.checkpoint.storage.pvc.pvcName | string | `"chrek-pvc"` | Name of the PVC created by the chrek chart |
| dynamo-operator.checkpoint.storage.pvc.basePath | string | `"/checkpoints"` | Base path within the PVC for storing checkpoint tar files. Each checkpoint is stored as {basePath}/{identityHash}.tar |
| dynamo-operator.checkpoint.storage.pvc.basePath | string | `"/checkpoints"` | Base path within the PVC for storing checkpoints |
| dynamo-operator.checkpoint.storage.s3.uri | string | `""` | S3 URI in format: s3://[endpoint/]bucket/prefix. Examples: "s3://my-bucket/checkpoints" (AWS S3), "s3://minio.example.com/my-bucket/checkpoints" (MinIO) |
| dynamo-operator.checkpoint.storage.s3.uri | string | `""` | S3 URI in format: s3://[endpoint/]bucket/prefix |
| dynamo-operator.checkpoint.storage.s3.credentialsSecretRef | string | `""` | Reference to a secret containing AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, and optionally AWS_REGION. If not provided, uses IRSA/Workload Identity for authentication. |
| dynamo-operator.checkpoint.storage.s3.credentialsSecretRef | string | `""` | Reference to a secret containing AWS credentials |
| dynamo-operator.checkpoint.storage.oci.uri | string | `""` | OCI URI in format: oci://registry/repository. Examples: "oci://myregistry.io/checkpoints", "oci://ghcr.io/myorg/checkpoints" |
| dynamo-operator.checkpoint.storage.oci.uri | string | `""` | OCI URI in format: oci://registry/repository |
| dynamo-operator.checkpoint.storage.oci.credentialsSecretRef | string | `""` | Reference to a docker config secret for registry authentication |
| dynamo-operator.checkpoint.storage.oci.credentialsSecretRef | string | `""` | Reference to a docker config secret for registry authentication |
| dynamo-operator.checkpoint.agent.image.repository | string | `"nvcr.io/nvidia/ai-dynamo/checkpoint-agent"` | Container image repository for the checkpoint agent |
| dynamo-operator.checkpoint.agent.image.tag | string | `"latest"` | Container image tag for the checkpoint agent |
| dynamo-operator.checkpoint.agent.image.pullPolicy | string | `"IfNotPresent"` | Image pull policy for the checkpoint agent |
| dynamo-operator.checkpoint.agent.resources | object | `{"limits":{"cpu":"500m","memory":"512Mi"},"requests":{"cpu":"100m","memory":"128Mi"}}` | Resource limits and requests for checkpoint agent containers |
| dynamo-operator.checkpoint.agent.nodeSelector | object | `{}` | Node selector for checkpoint agent pods. Use this to restrict checkpoint agents to specific nodes (e.g., GPU nodes). |
| dynamo-operator.checkpoint.agent.tolerations | list | `[]` | Node tolerations for checkpoint agent pods |
| dynamo-operator.checkpoint.agent.podLabels | object | `{}` | Additional labels to add to checkpoint agent pods |
| dynamo-operator.checkpoint.agent.podAnnotations | object | `{}` | Additional annotations to add to checkpoint agent pods |
| dynamo-operator.checkpoint.agent.imagePullSecrets | list | `[]` | Image pull secrets for the checkpoint agent container image |
| dynamo-operator.checkpoint.agent.containerRuntimeSocket | string | `"/run/containerd/containerd.sock"` | Path to the container runtime socket. The checkpoint agent needs access to the container runtime to perform checkpoint operations. Change to /var/run/docker.sock for Docker runtime. |
| grove.enabled | bool | `false` | Whether to enable Grove for multi-node inference coordination, if enabled, the Grove operator will be deployed cluster-wide |
| grove.enabled | bool | `false` | Whether to enable Grove for multi-node inference coordination, if enabled, the Grove operator will be deployed cluster-wide |
| grove.tolerations | list | `[]` | Node tolerations for Grove pods |
| grove.tolerations | list | `[]` | Node tolerations for Grove pods |
| grove.affinity | object | `{}` | Affinity rules for Grove pods |
| grove.affinity | object | `{}` | Affinity for Grove pods |
| kai-scheduler.enabled | bool | `false` | Whether to enable Kai Scheduler for intelligent resource allocation, if enabled, the Kai Scheduler operator will be deployed cluster-wide |
| kai-scheduler.enabled | bool | `false` | Whether to enable Kai Scheduler for intelligent resource allocation, if enabled, the Kai Scheduler operator will be deployed cluster-wide |
| kai-scheduler.global.tolerations | list | `[]` | Node tolerations for kai-scheduler pods |
| kai-scheduler.global.tolerations | list | `[]` | Node tolerations for kai-scheduler pods |
| etcd.enabled | bool | `true` | Whether to enable etcd deployment, disable if you want to use an external etcd instance. For complete configuration options, see: https://github.com/bitnami/charts/tree/main/bitnami/etcd , all etcd settings should be prefixed with "etcd." |
| etcd.enabled | bool | `true` | Whether to enable etcd deployment, disable if you want to use an external etcd instance. For complete configuration options, see: https://github.com/bitnami/charts/tree/main/bitnami/etcd , all etcd settings should be prefixed with "etcd." |
| etcd.image.repository | string | `"bitnamilegacy/etcd"` | following bitnami announcement for brownout - https://github.com/bitnami/charts/tree/main/bitnami/etcd#%EF%B8%8F-important-notice-upcoming-changes-to-the-bitnami-catalog, we need to use the legacy repository until we migrate to the new "secure" repository |
| etcd.image.repository | string | `"bitnamilegacy/etcd"` | following bitnami announcement for brownout - https://github.com/bitnami/charts/tree/main/bitnami/etcd#%EF%B8%8F-important-notice-upcoming-changes-to-the-bitnami-catalog, we need to use the legacy repository until we migrate to the new "secure" repository |
| etcd.tolerations | list | `[]` | Node tolerations for etcd pods |
| nats.enabled | bool | `true` | Whether to enable NATS deployment, disable if you want to use an external NATS instance. For complete configuration options, see: https://github.com/nats-io/k8s/tree/main/helm/charts/nats , all nats settings should be prefixed with "nats." |
| nats.enabled | bool | `true` | Whether to enable NATS deployment, disable if you want to use an external NATS instance. For complete configuration options, see: https://github.com/nats-io/k8s/tree/main/helm/charts/nats , all nats settings should be prefixed with "nats." |
| nats.podTemplate.merge.spec.tolerations | list | `[]` | Node tolerations for NATS pods |