"lib/vscode:/vscode.git/clone" did not exist on "68fb3d95be436f431408a13831b5cf1aff3e22cd"
Unverified Commit 73b0cdb4 authored by Julien Mancuso's avatar Julien Mancuso Committed by GitHub
Browse files

fix: fix some defaults (#3417)


Signed-off-by: default avatarJulien Mancuso <jmancuso@nvidia.com>
parent d809906e
......@@ -113,7 +113,7 @@ The chart includes built-in validation to prevent all operator conflicts:
| dynamo-operator.controllerManager.manager.args[0] | string | `"--health-probe-bind-address=:8081"` | Health probe endpoint for Kubernetes health checks |
| dynamo-operator.controllerManager.manager.args[1] | string | `"--metrics-bind-address=127.0.0.1:8080"` | Metrics endpoint for Prometheus scraping (localhost only for security) |
| dynamo-operator.imagePullSecrets | list | `[]` | Secrets for pulling private container images |
| dynamo-operator.dynamo.groveTerminationDelay | string | `"15m"` | How long to wait before forcefully terminating Grove instances |
| dynamo-operator.dynamo.groveTerminationDelay | string | `"4h"` | How long to wait before forcefully terminating Grove instances |
| dynamo-operator.dynamo.internalImages.debugger | string | `"python:3.12-slim"` | Debugger image for troubleshooting deployments |
| dynamo-operator.dynamo.enableRestrictedSecurityContext | bool | `false` | Whether to enable restricted security contexts for enhanced security |
| dynamo-operator.dynamo.dockerRegistry.useKubernetesSecret | bool | `false` | Whether to use Kubernetes secrets for registry authentication |
......@@ -134,9 +134,9 @@ The chart includes built-in validation to prevent all operator conflicts:
| dynamo-operator.dynamo.mpiRun.sshKeygen.enabled | bool | `true` | Whether to enable SSH key generation for MPI Run |
| grove.enabled | bool | `false` | Whether to enable Grove for multi-node inference coordination, if enabled, the Grove operator will be deployed cluster-wide |
| kai-scheduler.enabled | bool | `false` | Whether to enable Kai Scheduler for intelligent resource allocation, if enabled, the Kai Scheduler operator will be deployed cluster-wide |
| etcd.enabled | bool | `true` | Whether to enable etcd deployment, disable if you want to use an external etcd instance |
| etcd.enabled | bool | `true` | Whether to enable etcd deployment, disable if you want to use an external etcd instance. For complete configuration options, see: https://github.com/bitnami/charts/tree/main/bitnami/etcd , all etcd settings should be prefixed with "etcd." |
| etcd.image.repository | string | `"bitnamilegacy/etcd"` | following bitnami announcement for brownout - https://github.com/bitnami/charts/tree/main/bitnami/etcd#%EF%B8%8F-important-notice-upcoming-changes-to-the-bitnami-catalog, we need to use the legacy repository until we migrate to the new "secure" repository |
| nats.enabled | bool | `true` | Whether to enable NATS deployment, disable if you want to use an external NATS instance |
| nats.enabled | bool | `true` | Whether to enable NATS deployment, disable if you want to use an external NATS instance. For complete configuration options, see: https://github.com/nats-io/k8s/tree/main/helm/charts/nats , all nats settings should be prefixed with "nats." |
### NATS Configuration
......
......@@ -74,7 +74,7 @@ dynamo-operator:
# Core Dynamo platform configuration
dynamo:
# -- How long to wait before forcefully terminating Grove instances
groveTerminationDelay: 15m
groveTerminationDelay: 4h
# Internal utility images used by the platform
internalImages:
......@@ -147,10 +147,9 @@ kai-scheduler:
enabled: false
# etcd configuration - distributed key-value store for operator state
# For complete configuration options, see: https://github.com/bitnami/charts/tree/main/bitnami/etcd
etcd:
# -- Whether to enable etcd deployment, disable if you want to use an external etcd instance
# -- Whether to enable etcd deployment, disable if you want to use an external etcd instance. For complete configuration options, see: https://github.com/bitnami/charts/tree/main/bitnami/etcd , all etcd settings should be prefixed with "etcd."
enabled: true
image:
......@@ -195,9 +194,8 @@ etcd:
tolerations: []
# NATS configuration - messaging system for operator communication
# For complete configuration options, see: https://github.com/nats-io/k8s/tree/main/helm/charts/nats
nats:
# -- Whether to enable NATS deployment, disable if you want to use an external NATS instance
# -- Whether to enable NATS deployment, disable if you want to use an external NATS instance. For complete configuration options, see: https://github.com/nats-io/k8s/tree/main/helm/charts/nats , all nats settings should be prefixed with "nats."
enabled: true
# TLS Certificate Authority configuration for secure communication
......@@ -338,7 +336,9 @@ nats:
# token: << $TOKEN >>
# jetstream:
# max_memory_store: << 1GB >>
merge: {}
merge:
# 10MB which allows for larger context size : The default NATS max payload size is 1MB, and 256K tokens (with tokens being int32 - 4 bytes each) tips over that 1MB max.
max_payload: 10485760
patch: []
############################################################
......
......@@ -887,7 +887,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
TimeoutSeconds: 5,
PeriodSeconds: 10,
SuccessThreshold: 0,
FailureThreshold: 60,
FailureThreshold: 720,
},
},
},
......
......@@ -67,7 +67,7 @@ func (w *WorkerDefaults) GetBaseContainer(context ComponentContext) (corev1.Cont
},
PeriodSeconds: 10,
TimeoutSeconds: 5,
FailureThreshold: 60,
FailureThreshold: 720, // 10s * 720 = 7200s = 2h
}
container.Env = append(container.Env, []corev1.EnvVar{
......
......@@ -1937,7 +1937,7 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
TimeoutSeconds: 5,
PeriodSeconds: 10,
SuccessThreshold: 0,
FailureThreshold: 60,
FailureThreshold: 720,
},
},
},
......@@ -4721,7 +4721,7 @@ func TestGenerateBasePodSpec_Worker(t *testing.T) {
},
PeriodSeconds: 10,
TimeoutSeconds: 5,
FailureThreshold: 60,
FailureThreshold: 720,
},
Ports: []corev1.ContainerPort{
{
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment