Unverified Commit e43b3050 authored by Julien Mancuso's avatar Julien Mancuso Committed by GitHub
Browse files

feat: move webhook certificate management and CA injection from Helm hooks...


feat: move webhook certificate management and CA injection from Helm hooks into the operator (#6839)
Signed-off-by: default avatarJulien Mancuso <jmancuso@nvidia.com>
parent 2128d48a
...@@ -139,3 +139,6 @@ spec: ...@@ -139,3 +139,6 @@ spec:
secret: secret:
defaultMode: 420 defaultMode: 420
secretName: {{ .Values.webhook.certificateSecret.name }} secretName: {{ .Values.webhook.certificateSecret.name }}
# Allow the pod to start before the secret exists so the
# in-operator cert-controller can bootstrap it at runtime.
optional: true
...@@ -26,6 +26,10 @@ data: ...@@ -26,6 +26,10 @@ data:
server: server:
metrics: metrics:
bindAddress: "127.0.0.1" bindAddress: "127.0.0.1"
webhook:
certProvisionMode: {{ if or .Values.webhook.certManager.enabled .Values.webhook.certificateSecret.external }}"manual"{{ else }}"auto"{{ end }}
secretName: {{ .Values.webhook.certificateSecret.name | quote }}
serviceName: {{ include "dynamo-operator.fullname" . }}-webhook-service
leaderElection: leaderElection:
enabled: true enabled: true
id: {{ default "dynamo.nvidia.com" .Values.controllerManager.leaderElection.id | quote }} id: {{ default "dynamo.nvidia.com" .Values.controllerManager.leaderElection.id | quote }}
......
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
{{- if and (not .Values.webhook.certManager.enabled) (not .Values.webhook.certificateSecret.external) }}
---
# ServiceAccount for CA bundle injection job
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ include "dynamo-operator.fullname" . }}-webhook-ca-inject
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/component: webhook
app.kubernetes.io/created-by: dynamo-operator
app.kubernetes.io/part-of: dynamo-operator
{{- include "dynamo-operator.labels" . | nindent 4 }}
annotations:
"helm.sh/hook": post-install,post-upgrade
"helm.sh/hook-weight": "1"
"helm.sh/hook-delete-policy": before-hook-creation
---
# Role to read the certificate secret
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: {{ include "dynamo-operator.fullname" . }}-webhook-ca-inject
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/component: webhook
app.kubernetes.io/created-by: dynamo-operator
app.kubernetes.io/part-of: dynamo-operator
{{- include "dynamo-operator.labels" . | nindent 4 }}
annotations:
"helm.sh/hook": post-install,post-upgrade
"helm.sh/hook-weight": "1"
"helm.sh/hook-delete-policy": before-hook-creation
rules:
- apiGroups: [""]
resources: ["secrets"]
resourceNames: ["{{ .Values.webhook.certificateSecret.name }}"]
verbs: ["get"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: {{ include "dynamo-operator.fullname" . }}-webhook-ca-inject
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/component: webhook
app.kubernetes.io/created-by: dynamo-operator
app.kubernetes.io/part-of: dynamo-operator
{{- include "dynamo-operator.labels" . | nindent 4 }}
annotations:
"helm.sh/hook": post-install,post-upgrade
"helm.sh/hook-weight": "1"
"helm.sh/hook-delete-policy": before-hook-creation
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: {{ include "dynamo-operator.fullname" . }}-webhook-ca-inject
subjects:
- kind: ServiceAccount
name: {{ include "dynamo-operator.fullname" . }}-webhook-ca-inject
namespace: {{ .Release.Namespace }}
---
# ClusterRole to patch ValidatingWebhookConfiguration
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: {{ include "dynamo-operator.fullname" . }}-{{ .Release.Namespace }}-webhook-ca-inject
labels:
app.kubernetes.io/component: webhook
app.kubernetes.io/created-by: dynamo-operator
app.kubernetes.io/part-of: dynamo-operator
{{- include "dynamo-operator.labels" . | nindent 4 }}
annotations:
"helm.sh/hook": post-install,post-upgrade
"helm.sh/hook-weight": "1"
"helm.sh/hook-delete-policy": before-hook-creation
rules:
- apiGroups: ["admissionregistration.k8s.io"]
resources: ["validatingwebhookconfigurations"]
{{- if .Values.namespaceRestriction.enabled }}
resourceNames: ["{{ include "dynamo-operator.fullname" . }}-validating-{{ .Release.Namespace }}"]
{{- else }}
resourceNames: ["{{ include "dynamo-operator.fullname" . }}-validating"]
{{- end }}
verbs: ["get", "patch"]
- apiGroups: ["admissionregistration.k8s.io"]
resources: ["mutatingwebhookconfigurations"]
{{- if .Values.namespaceRestriction.enabled }}
resourceNames: ["{{ include "dynamo-operator.fullname" . }}-mutating-{{ .Release.Namespace }}"]
{{- else }}
resourceNames: ["{{ include "dynamo-operator.fullname" . }}-mutating"]
{{- end }}
verbs: ["get", "patch"]
- apiGroups: ["apiextensions.k8s.io"]
resources: ["customresourcedefinitions"]
resourceNames: ["dynamographdeploymentrequests.nvidia.com"]
verbs: ["get", "patch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: {{ include "dynamo-operator.fullname" . }}-{{ .Release.Namespace }}-webhook-ca-inject
labels:
app.kubernetes.io/component: webhook
app.kubernetes.io/created-by: dynamo-operator
app.kubernetes.io/part-of: dynamo-operator
{{- include "dynamo-operator.labels" . | nindent 4 }}
annotations:
"helm.sh/hook": post-install,post-upgrade
"helm.sh/hook-weight": "1"
"helm.sh/hook-delete-policy": before-hook-creation
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: {{ include "dynamo-operator.fullname" . }}-{{ .Release.Namespace }}-webhook-ca-inject
subjects:
- kind: ServiceAccount
name: {{ include "dynamo-operator.fullname" . }}-webhook-ca-inject
namespace: {{ .Release.Namespace }}
---
# Job to inject CA bundle into ValidatingWebhookConfiguration
apiVersion: batch/v1
kind: Job
metadata:
name: {{ include "dynamo-operator.fullname" . }}-webhook-ca-inject-{{ .Release.Revision }}
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/component: webhook
app.kubernetes.io/created-by: dynamo-operator
app.kubernetes.io/part-of: dynamo-operator
{{- include "dynamo-operator.labels" . | nindent 4 }}
annotations:
"helm.sh/hook": post-install,post-upgrade
"helm.sh/hook-weight": "2"
"helm.sh/hook-delete-policy": before-hook-creation
spec:
backoffLimit: 5
template:
metadata:
name: {{ include "dynamo-operator.fullname" . }}-webhook-ca-inject
labels:
app.kubernetes.io/component: webhook
app.kubernetes.io/created-by: dynamo-operator
app.kubernetes.io/part-of: dynamo-operator
{{- include "dynamo-operator.labels" . | nindent 8 }}
spec:
serviceAccountName: {{ include "dynamo-operator.fullname" . }}-webhook-ca-inject
restartPolicy: OnFailure
{{- with .Values.controllerManager.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.controllerManager.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
containers:
- name: ca-injector
image: {{ .Values.webhook.certGenerator.image.repository }}:{{ .Values.webhook.certGenerator.image.tag }}
imagePullPolicy: {{ .Values.webhook.certGenerator.image.pullPolicy }}
env:
- name: OPENSSL_CONF
value: /tmp/openssl.cnf
command:
- /bin/bash
- -c
- |
set -e
echo "🔐 Injecting CA bundle into webhook configuration..."
# Configuration
SECRET_NAME="{{ .Values.webhook.certificateSecret.name }}"
NAMESPACE="{{ .Release.Namespace }}"
{{- if .Values.namespaceRestriction.enabled }}
VALIDATING_WEBHOOK_NAME="{{ include "dynamo-operator.fullname" . }}-validating-{{ .Release.Namespace }}"
MUTATING_WEBHOOK_NAME="{{ include "dynamo-operator.fullname" . }}-mutating-{{ .Release.Namespace }}"
{{- else }}
VALIDATING_WEBHOOK_NAME="{{ include "dynamo-operator.fullname" . }}-validating"
MUTATING_WEBHOOK_NAME="{{ include "dynamo-operator.fullname" . }}-mutating"
{{- end }}
echo "⏳ Waiting for certificate secret to be available..."
# Wait for secret (up to 5 minutes)
for i in $(seq 1 60); do
if kubectl get secret ${SECRET_NAME} -n ${NAMESPACE} >/dev/null 2>&1; then
echo "✅ Secret found!"
break
fi
if [ $i -eq 60 ]; then
echo "❌ ERROR: Secret ${SECRET_NAME} not found after 5 minutes"
exit 1
fi
echo " Waiting for secret... ($i/60)"
sleep 5
done
echo "📝 Extracting CA bundle from secret..."
CA_BUNDLE=$(kubectl get secret ${SECRET_NAME} \
-n ${NAMESPACE} \
-o jsonpath='{.data.ca\.crt}')
if [ -z "$CA_BUNDLE" ]; then
echo "❌ ERROR: ca.crt not found in secret ${SECRET_NAME}"
exit 1
fi
echo "📝 Patching ValidatingWebhookConfiguration..."
# Patch all validating webhooks (DynamoComponentDeployment, DynamoGraphDeployment, DynamoModel, DynamoGraphDeploymentRequest)
kubectl patch validatingwebhookconfiguration ${VALIDATING_WEBHOOK_NAME} \
--type='json' -p="[
{
\"op\": \"add\",
\"path\": \"/webhooks/0/clientConfig/caBundle\",
\"value\": \"${CA_BUNDLE}\"
},
{
\"op\": \"add\",
\"path\": \"/webhooks/1/clientConfig/caBundle\",
\"value\": \"${CA_BUNDLE}\"
},
{
\"op\": \"add\",
\"path\": \"/webhooks/2/clientConfig/caBundle\",
\"value\": \"${CA_BUNDLE}\"
},
{
\"op\": \"add\",
\"path\": \"/webhooks/3/clientConfig/caBundle\",
\"value\": \"${CA_BUNDLE}\"
}
]"
echo "📝 Patching MutatingWebhookConfiguration..."
# Patch all mutating webhooks:
# 0: mdynamographdeployment.kb.io (DGD defaulting)
# 1: mdynamographdeploymentrequestv1beta1.kb.io (DGDR defaulting)
kubectl patch mutatingwebhookconfiguration ${MUTATING_WEBHOOK_NAME} \
--type='json' -p="[
{
\"op\": \"add\",
\"path\": \"/webhooks/0/clientConfig/caBundle\",
\"value\": \"${CA_BUNDLE}\"
},
{
\"op\": \"add\",
\"path\": \"/webhooks/1/clientConfig/caBundle\",
\"value\": \"${CA_BUNDLE}\"
}
]"
echo "📝 Patching DGDR CRD spec.conversion..."
# Set the full conversion block (strategy + service + caBundle) in one merge patch.
# This is idempotent: safe to run on install and upgrade.
kubectl patch crd dynamographdeploymentrequests.nvidia.com \
--type merge --patch "{
\"spec\": {
\"conversion\": {
\"strategy\": \"Webhook\",
\"webhook\": {
\"clientConfig\": {
\"service\": {
\"name\": \"{{ include "dynamo-operator.fullname" . }}-webhook-service\",
\"namespace\": \"{{ .Release.Namespace }}\",
\"path\": \"/convert\"
},
\"caBundle\": \"${CA_BUNDLE}\"
},
\"conversionReviewVersions\": [\"v1\"]
}
}
}
}"
echo "✅ CA bundle injected successfully!"
echo "🎉 Webhook configuration complete!"
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1001
{{- end }}
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
{{- if and (not .Values.webhook.certManager.enabled) (not .Values.webhook.certificateSecret.external) }}
---
# ServiceAccount for certificate generation job
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ include "dynamo-operator.fullname" . }}-webhook-cert-gen
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/component: webhook
app.kubernetes.io/created-by: dynamo-operator
app.kubernetes.io/part-of: dynamo-operator
{{- include "dynamo-operator.labels" . | nindent 4 }}
annotations:
"helm.sh/hook": pre-install,pre-upgrade
"helm.sh/hook-weight": "-5"
"helm.sh/hook-delete-policy": before-hook-creation
---
# Role to create/update the certificate secret
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: {{ include "dynamo-operator.fullname" . }}-webhook-cert-gen
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/component: webhook
app.kubernetes.io/created-by: dynamo-operator
app.kubernetes.io/part-of: dynamo-operator
{{- include "dynamo-operator.labels" . | nindent 4 }}
annotations:
"helm.sh/hook": pre-install,pre-upgrade
"helm.sh/hook-weight": "-5"
"helm.sh/hook-delete-policy": before-hook-creation
rules:
- apiGroups: [""]
resources: ["secrets"]
verbs: ["create", "update", "patch", "get"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: {{ include "dynamo-operator.fullname" . }}-webhook-cert-gen
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/component: webhook
app.kubernetes.io/created-by: dynamo-operator
app.kubernetes.io/part-of: dynamo-operator
{{- include "dynamo-operator.labels" . | nindent 4 }}
annotations:
"helm.sh/hook": pre-install,pre-upgrade
"helm.sh/hook-weight": "-5"
"helm.sh/hook-delete-policy": before-hook-creation
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: {{ include "dynamo-operator.fullname" . }}-webhook-cert-gen
subjects:
- kind: ServiceAccount
name: {{ include "dynamo-operator.fullname" . }}-webhook-cert-gen
namespace: {{ .Release.Namespace }}
---
# Job to generate certificates and create secret
apiVersion: batch/v1
kind: Job
metadata:
name: {{ include "dynamo-operator.fullname" . }}-webhook-cert-gen-{{ .Release.Revision }}
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/component: webhook
app.kubernetes.io/created-by: dynamo-operator
app.kubernetes.io/part-of: dynamo-operator
{{- include "dynamo-operator.labels" . | nindent 4 }}
annotations:
"helm.sh/hook": pre-install,pre-upgrade
"helm.sh/hook-weight": "-4"
"helm.sh/hook-delete-policy": before-hook-creation
spec:
backoffLimit: 3
template:
metadata:
name: {{ include "dynamo-operator.fullname" . }}-webhook-cert-gen
labels:
app.kubernetes.io/component: webhook
app.kubernetes.io/created-by: dynamo-operator
app.kubernetes.io/part-of: dynamo-operator
{{- include "dynamo-operator.labels" . | nindent 8 }}
spec:
serviceAccountName: {{ include "dynamo-operator.fullname" . }}-webhook-cert-gen
restartPolicy: OnFailure
{{- with .Values.controllerManager.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.controllerManager.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
containers:
- name: cert-generator
image: {{ .Values.webhook.certGenerator.image.repository }}:{{ .Values.webhook.certGenerator.image.tag }}
imagePullPolicy: {{ .Values.webhook.certGenerator.image.pullPolicy }}
env:
- name: OPENSSL_CONF
value: /tmp/openssl.cnf
command:
- /bin/bash
- -c
- |
set -e
echo "🔐 Generating webhook certificates..."
# Create OpenSSL config without FIPS
cat > /tmp/openssl.cnf <<'OPENSSLEOF'
openssl_conf = openssl_init
[openssl_init]
providers = provider_sect
[provider_sect]
default = default_sect
[default_sect]
activate = 1
OPENSSLEOF
# Configuration
SERVICE_NAME="{{ include "dynamo-operator.fullname" . }}-webhook-service"
NAMESPACE="{{ .Release.Namespace }}"
SECRET_NAME="{{ .Values.webhook.certificateSecret.name }}"
CERT_VALIDITY_DAYS="{{ .Values.webhook.certificateValidity }}"
# Check if valid certificates already exist
echo "🔍 Checking if valid certificates already exist..."
NEED_NEW_CERTS=false
if kubectl get secret "${SECRET_NAME}" -n "${NAMESPACE}" >/dev/null 2>&1; then
echo "Secret exists, checking certificate validity..."
# Extract and verify the certificate
mkdir -p /tmp/verify
kubectl get secret "${SECRET_NAME}" -n "${NAMESPACE}" -o jsonpath='{.data.tls\.crt}' | base64 -d > /tmp/verify/tls.crt
kubectl get secret "${SECRET_NAME}" -n "${NAMESPACE}" -o jsonpath='{.data.ca\.crt}' | base64 -d > /tmp/verify/ca.crt
# Check if certificate is valid for at least 30 more days
if openssl x509 -checkend 2592000 -noout -in /tmp/verify/tls.crt 2>/dev/null; then
echo "✅ Certificate is valid for at least 30 more days"
# Verify SANs match the service name
CERT_SANS=$(openssl x509 -in /tmp/verify/tls.crt -noout -text 2>/dev/null | grep -A1 "Subject Alternative Name" | tail -1)
if echo "$CERT_SANS" | grep -q "${SERVICE_NAME}.${NAMESPACE}.svc"; then
echo "✅ Certificate has correct SANs"
echo "🎉 Existing certificates are valid, skipping generation"
else
echo "⚠️ Certificate SANs don't match, need to regenerate"
NEED_NEW_CERTS=true
fi
else
echo "⚠️ Certificate expires soon or is invalid, need to regenerate"
NEED_NEW_CERTS=true
fi
rm -rf /tmp/verify
else
echo "Secret doesn't exist, need to generate certificates"
NEED_NEW_CERTS=true
fi
# Only generate certificates if needed
if [ "$NEED_NEW_CERTS" = false ]; then
echo "✅ Using existing valid certificates"
exit 0
fi
echo "🔐 Generating new webhook certificates..."
# Create working directory
mkdir -p /tmp/certs
cd /tmp/certs
echo "📝 Generating CA key and certificate..."
# Generate CA key (4096-bit RSA)
openssl genrsa -out ca.key 4096
# Generate self-signed CA certificate
cat > ca.cnf <<EOF
[req]
prompt = no
distinguished_name = dn
x509_extensions = v3_ca
[dn]
CN = Dynamo Webhook CA
O = NVIDIA
[v3_ca]
basicConstraints = critical,CA:TRUE
keyUsage = critical,digitalSignature,keyCertSign,cRLSign
subjectKeyIdentifier = hash
EOF
openssl req -x509 -new -nodes \
-key ca.key \
-sha256 \
-days ${CERT_VALIDITY_DAYS} \
-out ca.crt \
-config ca.cnf
echo "📝 Generating server key and certificate..."
# Generate server key (4096-bit RSA)
openssl genrsa -out tls.key 4096
# Generate server certificate signing request with SANs
cat > server.cnf <<EOF
[req]
prompt = no
distinguished_name = dn
req_extensions = v3_req
[dn]
CN = ${SERVICE_NAME}
O = NVIDIA
[v3_req]
basicConstraints = CA:FALSE
keyUsage = critical,digitalSignature,keyEncipherment
extendedKeyUsage = serverAuth
subjectAltName = @alt_names
[alt_names]
DNS.1 = ${SERVICE_NAME}
DNS.2 = ${SERVICE_NAME}.${NAMESPACE}
DNS.3 = ${SERVICE_NAME}.${NAMESPACE}.svc
DNS.4 = ${SERVICE_NAME}.${NAMESPACE}.svc.{{ .Values.kubernetesClusterDomain }}
EOF
openssl req -new -key tls.key -out server.csr -config server.cnf
# Sign server certificate with CA
openssl x509 -req \
-in server.csr \
-CA ca.crt \
-CAkey ca.key \
-CAcreateserial \
-out tls.crt \
-days ${CERT_VALIDITY_DAYS} \
-extensions v3_req \
-extfile server.cnf
echo "✅ Certificates generated successfully!"
echo "📦 Creating Kubernetes secret..."
# Check if secret exists
if kubectl get secret ${SECRET_NAME} -n ${NAMESPACE} >/dev/null 2>&1; then
echo "Secret exists, updating..."
kubectl create secret generic ${SECRET_NAME} \
--from-file=tls.crt=tls.crt \
--from-file=tls.key=tls.key \
--from-file=ca.crt=ca.crt \
--dry-run=client -o yaml | \
kubectl apply -f -
else
echo "Creating new secret..."
kubectl create secret generic ${SECRET_NAME} \
--from-file=tls.crt=tls.crt \
--from-file=tls.key=tls.key \
--from-file=ca.crt=ca.crt \
-n ${NAMESPACE}
fi
echo "✅ Secret '${SECRET_NAME}' created successfully!"
echo "🎉 Webhook certificate generation complete!"
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: false
runAsNonRoot: true
runAsUser: 1001
{{- end }}
...@@ -26,6 +26,7 @@ metadata: ...@@ -26,6 +26,7 @@ metadata:
app.kubernetes.io/component: webhook app.kubernetes.io/component: webhook
app.kubernetes.io/created-by: dynamo-operator app.kubernetes.io/created-by: dynamo-operator
app.kubernetes.io/part-of: dynamo-operator app.kubernetes.io/part-of: dynamo-operator
nvidia.com/dynamo-operator-namespace: {{ .Release.Namespace }}
{{- include "dynamo-operator.labels" . | nindent 4 }} {{- include "dynamo-operator.labels" . | nindent 4 }}
{{- if .Values.webhook.certManager.enabled }} {{- if .Values.webhook.certManager.enabled }}
annotations: annotations:
...@@ -181,6 +182,7 @@ metadata: ...@@ -181,6 +182,7 @@ metadata:
app.kubernetes.io/component: webhook app.kubernetes.io/component: webhook
app.kubernetes.io/created-by: dynamo-operator app.kubernetes.io/created-by: dynamo-operator
app.kubernetes.io/part-of: dynamo-operator app.kubernetes.io/part-of: dynamo-operator
nvidia.com/dynamo-operator-namespace: {{ .Release.Namespace }}
{{- include "dynamo-operator.labels" . | nindent 4 }} {{- include "dynamo-operator.labels" . | nindent 4 }}
{{- if .Values.webhook.certManager.enabled }} {{- if .Values.webhook.certManager.enabled }}
annotations: annotations:
......
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
{{- if .Values.webhook.certManager.enabled }}
---
# ServiceAccount for the CRD conversion patch job
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ include "dynamo-operator.fullname" . }}-crd-conversion-patch
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/component: webhook
app.kubernetes.io/created-by: dynamo-operator
app.kubernetes.io/part-of: dynamo-operator
{{- include "dynamo-operator.labels" . | nindent 4 }}
annotations:
"helm.sh/hook": post-install,post-upgrade
"helm.sh/hook-weight": "1"
"helm.sh/hook-delete-policy": before-hook-creation
---
# ClusterRole to patch the DGDR CRD
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: {{ include "dynamo-operator.fullname" . }}-{{ .Release.Namespace }}-crd-conversion-patch
labels:
app.kubernetes.io/component: webhook
app.kubernetes.io/created-by: dynamo-operator
app.kubernetes.io/part-of: dynamo-operator
{{- include "dynamo-operator.labels" . | nindent 4 }}
annotations:
"helm.sh/hook": post-install,post-upgrade
"helm.sh/hook-weight": "1"
"helm.sh/hook-delete-policy": before-hook-creation
rules:
- apiGroups: ["apiextensions.k8s.io"]
resources: ["customresourcedefinitions"]
resourceNames: ["dynamographdeploymentrequests.nvidia.com"]
verbs: ["get", "patch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: {{ include "dynamo-operator.fullname" . }}-{{ .Release.Namespace }}-crd-conversion-patch
labels:
app.kubernetes.io/component: webhook
app.kubernetes.io/created-by: dynamo-operator
app.kubernetes.io/part-of: dynamo-operator
{{- include "dynamo-operator.labels" . | nindent 4 }}
annotations:
"helm.sh/hook": post-install,post-upgrade
"helm.sh/hook-weight": "1"
"helm.sh/hook-delete-policy": before-hook-creation
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: {{ include "dynamo-operator.fullname" . }}-{{ .Release.Namespace }}-crd-conversion-patch
subjects:
- kind: ServiceAccount
name: {{ include "dynamo-operator.fullname" . }}-crd-conversion-patch
namespace: {{ .Release.Namespace }}
---
# Job to patch the DGDR CRD with spec.conversion and cert-manager CA injection annotation.
# The cert-manager ca-injector then watches the CRD annotation and keeps the caBundle
# in spec.conversion.webhook.clientConfig.caBundle up to date automatically.
apiVersion: batch/v1
kind: Job
metadata:
name: {{ include "dynamo-operator.fullname" . }}-crd-conversion-patch-{{ .Release.Revision }}
namespace: {{ .Release.Namespace }}
labels:
app.kubernetes.io/component: webhook
app.kubernetes.io/created-by: dynamo-operator
app.kubernetes.io/part-of: dynamo-operator
{{- include "dynamo-operator.labels" . | nindent 4 }}
annotations:
"helm.sh/hook": post-install,post-upgrade
"helm.sh/hook-weight": "2"
"helm.sh/hook-delete-policy": before-hook-creation
spec:
backoffLimit: 5
template:
metadata:
name: {{ include "dynamo-operator.fullname" . }}-crd-conversion-patch
labels:
app.kubernetes.io/component: webhook
app.kubernetes.io/created-by: dynamo-operator
app.kubernetes.io/part-of: dynamo-operator
{{- include "dynamo-operator.labels" . | nindent 8 }}
spec:
serviceAccountName: {{ include "dynamo-operator.fullname" . }}-crd-conversion-patch
restartPolicy: OnFailure
{{- with .Values.controllerManager.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.controllerManager.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
containers:
- name: crd-conversion-patch
image: {{ .Values.webhook.certGenerator.image.repository }}:{{ .Values.webhook.certGenerator.image.tag }}
imagePullPolicy: {{ .Values.webhook.certGenerator.image.pullPolicy }}
command:
- /bin/bash
- -c
- |
set -e
echo "📝 Patching DGDR CRD with spec.conversion and cert-manager CA injection annotation..."
# Set spec.conversion (caBundle left empty — cert-manager ca-injector fills it in
# automatically once it sees the cert-manager.io/inject-ca-from annotation below).
kubectl patch crd dynamographdeploymentrequests.nvidia.com \
--type merge --patch '{
"spec": {
"conversion": {
"strategy": "Webhook",
"webhook": {
"clientConfig": {
"service": {
"name": "{{ include "dynamo-operator.fullname" . }}-webhook-service",
"namespace": "{{ .Release.Namespace }}",
"path": "/convert"
}
},
"conversionReviewVersions": ["v1"]
}
}
}
}'
# Add the cert-manager annotation so the ca-injector keeps the caBundle current.
kubectl annotate crd dynamographdeploymentrequests.nvidia.com \
--overwrite \
cert-manager.io/inject-ca-from="{{ .Release.Namespace }}/{{ include "dynamo-operator.fullname" . }}-serving-cert"
echo "✅ DGDR CRD conversion webhook configured."
echo " cert-manager ca-injector will populate the caBundle automatically."
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1001
{{- end }}
...@@ -14,11 +14,11 @@ ...@@ -14,11 +14,11 @@
# limitations under the License. # limitations under the License.
--- ---
# Role to read the webhook certificate secret # Role to manage the webhook certificate secret (create placeholder, read, update)
apiVersion: rbac.authorization.k8s.io/v1 apiVersion: rbac.authorization.k8s.io/v1
kind: Role kind: Role
metadata: metadata:
name: {{ include "dynamo-operator.fullname" . }}-webhook-cert-reader name: {{ include "dynamo-operator.fullname" . }}-webhook-cert-manager
namespace: {{ .Release.Namespace }} namespace: {{ .Release.Namespace }}
labels: labels:
app.kubernetes.io/component: webhook app.kubernetes.io/component: webhook
...@@ -36,11 +36,19 @@ rules: ...@@ -36,11 +36,19 @@ rules:
- get - get
- list - list
- watch - watch
- update
- patch
- apiGroups:
- ""
resources:
- secrets
verbs:
- create
--- ---
apiVersion: rbac.authorization.k8s.io/v1 apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding kind: RoleBinding
metadata: metadata:
name: {{ include "dynamo-operator.fullname" . }}-webhook-cert-reader name: {{ include "dynamo-operator.fullname" . }}-webhook-cert-manager
namespace: {{ .Release.Namespace }} namespace: {{ .Release.Namespace }}
labels: labels:
app.kubernetes.io/component: webhook app.kubernetes.io/component: webhook
...@@ -50,9 +58,63 @@ metadata: ...@@ -50,9 +58,63 @@ metadata:
roleRef: roleRef:
apiGroup: rbac.authorization.k8s.io apiGroup: rbac.authorization.k8s.io
kind: Role kind: Role
name: {{ include "dynamo-operator.fullname" . }}-webhook-cert-reader name: {{ include "dynamo-operator.fullname" . }}-webhook-cert-manager
subjects: subjects:
- kind: ServiceAccount - kind: ServiceAccount
name: {{ include "dynamo-operator.fullname" . }}-controller-manager name: {{ include "dynamo-operator.fullname" . }}-controller-manager
namespace: {{ .Release.Namespace }} namespace: {{ .Release.Namespace }}
{{- if .Values.namespaceRestriction.enabled }}
---
# ClusterRole for patching webhook configurations and CRDs (cluster-scoped resources).
# Only needed in namespace-restricted mode; in cluster-wide mode the manager
# ClusterRole already has full access.
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: {{ include "dynamo-operator.fullname" . }}-{{ .Release.Namespace }}-webhook-injector
labels:
app.kubernetes.io/component: webhook
app.kubernetes.io/created-by: dynamo-operator
app.kubernetes.io/part-of: dynamo-operator
{{- include "dynamo-operator.labels" . | nindent 4 }}
rules:
- apiGroups:
- admissionregistration.k8s.io
resources:
- validatingwebhookconfigurations
- mutatingwebhookconfigurations
verbs:
- get
- list
- watch
- update
- patch
- apiGroups:
- apiextensions.k8s.io
resources:
- customresourcedefinitions
verbs:
- get
- list
- watch
- update
- patch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: {{ include "dynamo-operator.fullname" . }}-{{ .Release.Namespace }}-webhook-injector-binding
labels:
app.kubernetes.io/component: webhook
app.kubernetes.io/created-by: dynamo-operator
app.kubernetes.io/part-of: dynamo-operator
{{- include "dynamo-operator.labels" . | nindent 4 }}
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: {{ include "dynamo-operator.fullname" . }}-{{ .Release.Namespace }}-webhook-injector
subjects:
- kind: ServiceAccount
name: {{ include "dynamo-operator.fullname" . }}-controller-manager
namespace: {{ .Release.Namespace }}
{{- end }}
...@@ -210,29 +210,20 @@ checkpoint: ...@@ -210,29 +210,20 @@ checkpoint:
webhook: webhook:
# Certificate configuration # Certificate configuration
certificateSecret: certificateSecret:
# Name of the secret containing webhook TLS certificates # Name of the Kubernetes Secret holding webhook TLS certificates.
# The secret must contain: tls.crt, tls.key, and ca.crt # In auto mode, the operator creates and manages this secret at runtime.
# In cert-manager mode, cert-manager provisions it.
# In external mode, you must create and populate it yourself.
name: webhook-server-cert name: webhook-server-cert
# Set to true if you're managing the certificate secret externally # Set to true if you're managing the certificate secret externally
# When false (default), certificates are auto-generated via Helm hooks # (e.g., provisioned by an admin or another pipeline).
# When true, the operator operates in manual cert mode and expects
# the secret to exist with tls.crt, tls.key, and ca.crt.
external: false external: false
# Certificate validity duration (in days) for auto-generated certificates
# Only used when certManager.enabled=false and certificateSecret.external=false
certificateValidity: 365
# Container image for certificate generation and CA injection jobs
# Only used when certManager.enabled=false and certificateSecret.external=false
certGenerator:
image:
repository: bitnami/kubectl
tag: latest
pullPolicy: IfNotPresent
# CA bundle for webhook (base64 encoded) # CA bundle for webhook (base64 encoded)
# Only used when certificateSecret.external=true # Only used when certificateSecret.external=true and cert-manager is not enabled.
# For automatic mode or cert-manager, leave empty
caBundle: "" caBundle: ""
# Webhook failure policy # Webhook failure policy
...@@ -256,11 +247,12 @@ webhook: ...@@ -256,11 +247,12 @@ webhook:
# For NAMESPACE-RESTRICTED operators: Leave empty (auto-configured) # For NAMESPACE-RESTRICTED operators: Leave empty (auto-configured)
namespaceSelector: {} namespaceSelector: {}
# cert-manager integration (optional) # cert-manager integration (optional, alternative to built-in cert-controller)
# When enabled, the operator runs in manual cert mode and relies on
# cert-manager to provision and rotate the webhook TLS secret.
certManager: certManager:
# Enable cert-manager for automatic certificate management # Enable cert-manager for certificate management
# Requires cert-manager to be installed in the cluster # Requires cert-manager to be installed in the cluster
# When enabled, disables automatic certificate generation
enabled: false enabled: false
# Certificate configuration for cert-manager # Certificate configuration for cert-manager
......
...@@ -50,6 +50,12 @@ func SetDefaultsOperatorConfiguration(obj *OperatorConfiguration) { ...@@ -50,6 +50,12 @@ func SetDefaultsOperatorConfiguration(obj *OperatorConfiguration) {
if obj.Server.Webhook.CertDir == "" { if obj.Server.Webhook.CertDir == "" {
obj.Server.Webhook.CertDir = "/tmp/k8s-webhook-server/serving-certs" obj.Server.Webhook.CertDir = "/tmp/k8s-webhook-server/serving-certs"
} }
if obj.Server.Webhook.CertProvisionMode == "" {
obj.Server.Webhook.CertProvisionMode = CertProvisionModeAuto
}
if obj.Server.Webhook.SecretName == "" {
obj.Server.Webhook.SecretName = "webhook-server-cert"
}
// Orchestrator defaults // Orchestrator defaults
if obj.Orchestrators.Grove.TerminationDelay.Duration == 0 { if obj.Orchestrators.Grove.TerminationDelay.Duration == 0 {
......
...@@ -102,6 +102,16 @@ type MetricsServer struct { ...@@ -102,6 +102,16 @@ type MetricsServer struct {
Secure bool `json:"secure"` Secure bool `json:"secure"`
} }
// CertProvisionMode controls how webhook TLS certificates are managed.
type CertProvisionMode string
const (
// CertProvisionModeAuto uses the built-in cert-controller to generate and rotate certificates.
CertProvisionModeAuto CertProvisionMode = "auto"
// CertProvisionModeManual expects certificates to be provided externally (e.g., cert-manager, admin).
CertProvisionModeManual CertProvisionMode = "manual"
)
// WebhookServer extends Server with host and certificate directory. // WebhookServer extends Server with host and certificate directory.
type WebhookServer struct { type WebhookServer struct {
Server `json:",inline"` Server `json:",inline"`
...@@ -109,6 +119,15 @@ type WebhookServer struct { ...@@ -109,6 +119,15 @@ type WebhookServer struct {
Host string `json:"host"` Host string `json:"host"`
// CertDir is the directory containing TLS certificates // CertDir is the directory containing TLS certificates
CertDir string `json:"certDir"` CertDir string `json:"certDir"`
// CertProvisionMode controls certificate management: "auto" (built-in cert-controller) or "manual" (external)
// +kubebuilder:default="auto"
CertProvisionMode CertProvisionMode `json:"certProvisionMode"`
// SecretName is the name of the Kubernetes Secret holding webhook TLS certificates
// +kubebuilder:default="webhook-server-cert"
SecretName string `json:"secretName"`
// ServiceName is the name of the Kubernetes Service fronting the webhook server.
// Used to generate certificate SANs. Set by the Helm chart.
ServiceName string `json:"serviceName"`
} }
// LeaderElectionConfiguration holds leader election settings. // LeaderElectionConfiguration holds leader election settings.
......
...@@ -24,12 +24,14 @@ import ( ...@@ -24,12 +24,14 @@ import (
"crypto/tls" "crypto/tls"
"flag" "flag"
"fmt" "fmt"
"net/http"
"os" "os"
"time" "time"
// Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.) // Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.)
// to ensure that exec-entrypoint and run can make use of them. // to ensure that exec-entrypoint and run can make use of them.
admissionregistrationv1 "k8s.io/api/admissionregistration/v1"
corev1 "k8s.io/api/core/v1" corev1 "k8s.io/api/core/v1"
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
"k8s.io/client-go/discovery/cached/memory" "k8s.io/client-go/discovery/cached/memory"
...@@ -42,6 +44,7 @@ import ( ...@@ -42,6 +44,7 @@ import (
k8sCache "k8s.io/client-go/tools/cache" k8sCache "k8s.io/client-go/tools/cache"
"k8s.io/utils/ptr" "k8s.io/utils/ptr"
"sigs.k8s.io/controller-runtime/pkg/cache" "sigs.k8s.io/controller-runtime/pkg/cache"
"sigs.k8s.io/controller-runtime/pkg/client"
k8sruntime "k8s.io/apimachinery/pkg/runtime" k8sruntime "k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/serializer" "k8s.io/apimachinery/pkg/runtime/serializer"
...@@ -61,6 +64,7 @@ import ( ...@@ -61,6 +64,7 @@ import (
configvalidation "github.com/ai-dynamo/dynamo/deploy/operator/api/config/validation" configvalidation "github.com/ai-dynamo/dynamo/deploy/operator/api/config/validation"
nvidiacomv1alpha1 "github.com/ai-dynamo/dynamo/deploy/operator/api/v1alpha1" nvidiacomv1alpha1 "github.com/ai-dynamo/dynamo/deploy/operator/api/v1alpha1"
nvidiacomv1beta1 "github.com/ai-dynamo/dynamo/deploy/operator/api/v1beta1" nvidiacomv1beta1 "github.com/ai-dynamo/dynamo/deploy/operator/api/v1beta1"
internalcert "github.com/ai-dynamo/dynamo/deploy/operator/internal/cert"
"github.com/ai-dynamo/dynamo/deploy/operator/internal/controller" "github.com/ai-dynamo/dynamo/deploy/operator/internal/controller"
commonController "github.com/ai-dynamo/dynamo/deploy/operator/internal/controller_common" commonController "github.com/ai-dynamo/dynamo/deploy/operator/internal/controller_common"
"github.com/ai-dynamo/dynamo/deploy/operator/internal/modelendpoint" "github.com/ai-dynamo/dynamo/deploy/operator/internal/modelendpoint"
...@@ -149,6 +153,8 @@ func initCRDSchemes() { ...@@ -149,6 +153,8 @@ func initCRDSchemes() {
utilruntime.Must(apiextensionsv1.AddToScheme(crdScheme)) utilruntime.Must(apiextensionsv1.AddToScheme(crdScheme))
utilruntime.Must(admissionregistrationv1.AddToScheme(crdScheme))
utilruntime.Must(istioclientsetscheme.AddToScheme(crdScheme)) utilruntime.Must(istioclientsetscheme.AddToScheme(crdScheme))
utilruntime.Must(gaiev1.Install(crdScheme)) utilruntime.Must(gaiev1.Install(crdScheme))
...@@ -263,6 +269,23 @@ func main() { ...@@ -263,6 +269,23 @@ func main() {
setupLog.Info("Initializing observability metrics") setupLog.Info("Initializing observability metrics")
observability.InitMetrics() observability.InitMetrics()
// Set up webhook certificate management.
// A direct (non-cached) client is needed because the manager's cache isn't started yet.
directClient, err := client.New(mgr.GetConfig(), client.Options{Scheme: crdScheme})
if err != nil {
setupLog.Error(err, "unable to create direct client for cert management")
os.Exit(1)
}
certMgr, err := internalcert.NewCertManager(directClient, &operatorCfg.Server.Webhook)
if err != nil {
setupLog.Error(err, "unable to create cert manager")
os.Exit(1)
}
if err = certMgr.Setup(mainCtx, mgr); err != nil {
setupLog.Error(err, "failed to setup webhook certificate management")
os.Exit(1)
}
// Initialize namespace scope mechanism // Initialize namespace scope mechanism
var leaseManager *namespace_scope.LeaseManager var leaseManager *namespace_scope.LeaseManager
var leaseWatcher *namespace_scope.LeaseWatcher var leaseWatcher *namespace_scope.LeaseWatcher
...@@ -505,24 +528,86 @@ func main() { ...@@ -505,24 +528,86 @@ func main() {
operatorCfg.MPI.SSHSecretName, operatorCfg.MPI.SSHSecretName,
) )
if err = (&controller.DynamoComponentDeploymentReconciler{ if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
setupLog.Error(err, "unable to set up health check")
os.Exit(1)
}
webhooksReady := make(chan struct{})
if err := mgr.AddReadyzCheck("readyz", func(req *http.Request) error {
select {
case <-webhooksReady:
return nil
default:
return fmt.Errorf("webhook handlers not yet registered")
}
}); err != nil {
setupLog.Error(err, "unable to set up ready check")
os.Exit(1)
}
// Register controllers synchronously before mgr.Start().
// Controllers don't depend on TLS certificates.
if err := registerControllers(
mgr, operatorCfg, runtimeConfig,
dockerSecretRetriever, mpiSecretReplicator,
); err != nil {
setupLog.Error(err, "failed to register controllers")
os.Exit(1)
}
// Webhooks require TLS certificates to serve HTTPS. Register them in a
// goroutine that blocks until the cert-controller has written the certs.
go func() {
certMgr.WaitReady()
if operatorCfg.Server.Webhook.CertProvisionMode == configv1alpha1.CertProvisionModeAuto {
injector, err := internalcert.NewCABundleInjector(mgr.GetClient(), operatorCfg)
if err != nil {
setupLog.Error(err, "unable to create CA bundle injector")
os.Exit(1)
}
if err := injector.InjectAll(mainCtx); err != nil {
setupLog.Error(err, "failed to inject CA bundles into webhook configurations")
os.Exit(1)
}
}
if err := registerWebhooks(mgr, operatorCfg, runtimeConfig, operatorVersion); err != nil {
setupLog.Error(err, "failed to register webhooks")
os.Exit(1)
}
close(webhooksReady)
}()
setupLog.Info("starting manager")
if err := mgr.Start(mainCtx); err != nil {
setupLog.Error(err, "problem running manager")
os.Exit(1)
}
}
func registerControllers(
mgr ctrl.Manager,
operatorCfg *configv1alpha1.OperatorConfiguration,
runtimeConfig *commonController.RuntimeConfig,
dockerSecretRetriever *secrets.DockerSecretIndexer,
mpiSecretReplicator *secret.SecretReplicator,
) error {
if err := (&controller.DynamoComponentDeploymentReconciler{
Client: mgr.GetClient(), Client: mgr.GetClient(),
Recorder: mgr.GetEventRecorderFor("dynamocomponentdeployment"), Recorder: mgr.GetEventRecorderFor("dynamocomponentdeployment"),
Config: operatorCfg, Config: operatorCfg,
RuntimeConfig: runtimeConfig, RuntimeConfig: runtimeConfig,
DockerSecretRetriever: dockerSecretRetriever, DockerSecretRetriever: dockerSecretRetriever,
}).SetupWithManager(mgr); err != nil { }).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "DynamoComponentDeployment") return fmt.Errorf("unable to create DynamoComponentDeployment controller: %w", err)
os.Exit(1)
} }
// Create scale client for Grove resource scaling
scaleClient, err := createScalesGetter(mgr) scaleClient, err := createScalesGetter(mgr)
if err != nil { if err != nil {
setupLog.Error(err, "unable to create scale client") return fmt.Errorf("unable to create scale client: %w", err)
os.Exit(1)
} }
// Initialize RBAC manager for cross-namespace resource management
rbacManager := rbac.NewManager(mgr.GetClient()) rbacManager := rbac.NewManager(mgr.GetClient())
if err = (&controller.DynamoGraphDeploymentReconciler{ if err = (&controller.DynamoGraphDeploymentReconciler{
...@@ -535,8 +620,7 @@ func main() { ...@@ -535,8 +620,7 @@ func main() {
MPISecretReplicator: mpiSecretReplicator, MPISecretReplicator: mpiSecretReplicator,
RBACManager: rbacManager, RBACManager: rbacManager,
}).SetupWithManager(mgr); err != nil { }).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "DynamoGraphDeployment") return fmt.Errorf("unable to create DynamoGraphDeployment controller: %w", err)
os.Exit(1)
} }
if err = (&controller.DynamoGraphDeploymentScalingAdapterReconciler{ if err = (&controller.DynamoGraphDeploymentScalingAdapterReconciler{
...@@ -546,8 +630,7 @@ func main() { ...@@ -546,8 +630,7 @@ func main() {
Config: operatorCfg, Config: operatorCfg,
RuntimeConfig: runtimeConfig, RuntimeConfig: runtimeConfig,
}).SetupWithManager(mgr); err != nil { }).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "DGDScalingAdapter") return fmt.Errorf("unable to create DGDScalingAdapter controller: %w", err)
os.Exit(1)
} }
if err = (&controller.DynamoGraphDeploymentRequestReconciler{ if err = (&controller.DynamoGraphDeploymentRequestReconciler{
...@@ -558,8 +641,7 @@ func main() { ...@@ -558,8 +641,7 @@ func main() {
RuntimeConfig: runtimeConfig, RuntimeConfig: runtimeConfig,
RBACManager: rbacManager, RBACManager: rbacManager,
}).SetupWithManager(mgr); err != nil { }).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "DynamoGraphDeploymentRequest") return fmt.Errorf("unable to create DynamoGraphDeploymentRequest controller: %w", err)
os.Exit(1)
} }
if err = (&controller.DynamoModelReconciler{ if err = (&controller.DynamoModelReconciler{
...@@ -569,8 +651,7 @@ func main() { ...@@ -569,8 +651,7 @@ func main() {
Config: operatorCfg, Config: operatorCfg,
RuntimeConfig: runtimeConfig, RuntimeConfig: runtimeConfig,
}).SetupWithManager(mgr); err != nil { }).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "DynamoModel") return fmt.Errorf("unable to create DynamoModel controller: %w", err)
os.Exit(1)
} }
if err = (&controller.CheckpointReconciler{ if err = (&controller.CheckpointReconciler{
...@@ -579,11 +660,19 @@ func main() { ...@@ -579,11 +660,19 @@ func main() {
RuntimeConfig: runtimeConfig, RuntimeConfig: runtimeConfig,
Recorder: mgr.GetEventRecorderFor("checkpoint"), Recorder: mgr.GetEventRecorderFor("checkpoint"),
}).SetupWithManager(mgr); err != nil { }).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "DynamoCheckpoint") return fmt.Errorf("unable to create DynamoCheckpoint controller: %w", err)
os.Exit(1)
} }
// Configure webhooks with lease-based namespace exclusion setupLog.Info("Controllers registered successfully")
return nil
}
func registerWebhooks(
mgr ctrl.Manager,
operatorCfg *configv1alpha1.OperatorConfiguration,
runtimeConfig *commonController.RuntimeConfig,
operatorVersion string,
) error {
isClusterWide := operatorCfg.Namespace.Restricted == "" isClusterWide := operatorCfg.Namespace.Restricted == ""
if isClusterWide { if isClusterWide {
setupLog.Info("Configuring webhooks with lease-based namespace exclusion for cluster-wide mode") setupLog.Info("Configuring webhooks with lease-based namespace exclusion for cluster-wide mode")
...@@ -594,75 +683,48 @@ func main() { ...@@ -594,75 +683,48 @@ func main() {
internalwebhook.SetExcludedNamespaces(nil) internalwebhook.SetExcludedNamespaces(nil)
} }
// Register validation webhook handlers
setupLog.Info("Registering validation webhooks") setupLog.Info("Registering validation webhooks")
dcdHandler := webhookvalidation.NewDynamoComponentDeploymentHandler() dcdHandler := webhookvalidation.NewDynamoComponentDeploymentHandler()
if err = dcdHandler.RegisterWithManager(mgr); err != nil { if err := dcdHandler.RegisterWithManager(mgr); err != nil {
setupLog.Error(err, "unable to register webhook", "webhook", "DynamoComponentDeployment") return fmt.Errorf("unable to register DynamoComponentDeployment webhook: %w", err)
os.Exit(1)
} }
dgdHandler := webhookvalidation.NewDynamoGraphDeploymentHandler(mgr) dgdHandler := webhookvalidation.NewDynamoGraphDeploymentHandler(mgr)
if err = dgdHandler.RegisterWithManager(mgr); err != nil { if err := dgdHandler.RegisterWithManager(mgr); err != nil {
setupLog.Error(err, "unable to register webhook", "webhook", "DynamoGraphDeployment") return fmt.Errorf("unable to register DynamoGraphDeployment webhook: %w", err)
os.Exit(1)
} }
dmHandler := webhookvalidation.NewDynamoModelHandler() dmHandler := webhookvalidation.NewDynamoModelHandler()
if err = dmHandler.RegisterWithManager(mgr); err != nil { if err := dmHandler.RegisterWithManager(mgr); err != nil {
setupLog.Error(err, "unable to register webhook", "webhook", "DynamoModel") return fmt.Errorf("unable to register DynamoModel webhook: %w", err)
os.Exit(1)
} }
dgdrHandler := webhookvalidation.NewDynamoGraphDeploymentRequestHandler( dgdrHandler := webhookvalidation.NewDynamoGraphDeploymentRequestHandler(
isClusterWide, ptr.Deref(operatorCfg.GPU.DiscoveryEnabled, true), isClusterWide, ptr.Deref(operatorCfg.GPU.DiscoveryEnabled, true),
) )
if err = dgdrHandler.RegisterWithManager(mgr); err != nil { if err := dgdrHandler.RegisterWithManager(mgr); err != nil {
setupLog.Error(err, "unable to register webhook", "webhook", "DynamoGraphDeploymentRequest") return fmt.Errorf("unable to register DynamoGraphDeploymentRequest webhook: %w", err)
os.Exit(1)
} }
// Register the DGDR conversion webhook using the hub version (v1beta1). if err := ctrl.NewWebhookManagedBy(mgr).
if err = ctrl.NewWebhookManagedBy(mgr).
For(&nvidiacomv1beta1.DynamoGraphDeploymentRequest{}). For(&nvidiacomv1beta1.DynamoGraphDeploymentRequest{}).
Complete(); err != nil { Complete(); err != nil {
setupLog.Error(err, "unable to register conversion webhook", "webhook", "DynamoGraphDeploymentRequest-conversion") return fmt.Errorf("unable to register DynamoGraphDeploymentRequest conversion webhook: %w", err)
os.Exit(1)
} }
setupLog.Info("Validation webhooks registered successfully")
// Register defaulting (mutating) webhook handlers
setupLog.Info("Registering defaulting webhooks") setupLog.Info("Registering defaulting webhooks")
dgdDefaulter := webhookdefaulting.NewDGDDefaulter(operatorVersion) dgdDefaulter := webhookdefaulting.NewDGDDefaulter(operatorVersion)
if err = dgdDefaulter.RegisterWithManager(mgr); err != nil { if err := dgdDefaulter.RegisterWithManager(mgr); err != nil {
setupLog.Error(err, "unable to register webhook", "webhook", "DynamoGraphDeployment-defaulting") return fmt.Errorf("unable to register DynamoGraphDeployment defaulting webhook: %w", err)
os.Exit(1)
} }
dgdrDefaulter := webhookdefaulting.NewDGDRDefaulter(operatorVersion) dgdrDefaulter := webhookdefaulting.NewDGDRDefaulter(operatorVersion)
if err = dgdrDefaulter.RegisterWithManager(mgr); err != nil { if err := dgdrDefaulter.RegisterWithManager(mgr); err != nil {
setupLog.Error(err, "unable to register webhook", "webhook", "DynamoGraphDeploymentRequest-defaulting") return fmt.Errorf("unable to register DynamoGraphDeploymentRequest defaulting webhook: %w", err)
os.Exit(1)
}
setupLog.Info("Defaulting webhooks registered successfully")
//+kubebuilder:scaffold:builder
if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
setupLog.Error(err, "unable to set up health check")
os.Exit(1)
}
if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil {
setupLog.Error(err, "unable to set up ready check")
os.Exit(1)
} }
setupLog.Info("starting manager") setupLog.Info("Webhooks registered successfully")
if err := mgr.Start(mainCtx); err != nil { return nil
setupLog.Error(err, "problem running manager")
os.Exit(1)
}
} }
...@@ -12,6 +12,7 @@ require ( ...@@ -12,6 +12,7 @@ require (
github.com/imdario/mergo v0.3.16 github.com/imdario/mergo v0.3.16
github.com/onsi/ginkgo/v2 v2.27.3 github.com/onsi/ginkgo/v2 v2.27.3
github.com/onsi/gomega v1.38.3 github.com/onsi/gomega v1.38.3
github.com/open-policy-agent/cert-controller v0.14.0
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.71.2 github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.71.2
github.com/prometheus/client_golang v1.23.2 github.com/prometheus/client_golang v1.23.2
github.com/stretchr/testify v1.11.1 github.com/stretchr/testify v1.11.1
...@@ -62,6 +63,7 @@ require ( ...@@ -62,6 +63,7 @@ require (
github.com/prometheus/procfs v0.17.0 // indirect github.com/prometheus/procfs v0.17.0 // indirect
github.com/spf13/pflag v1.0.10 // indirect github.com/spf13/pflag v1.0.10 // indirect
github.com/x448/float16 v0.8.4 // indirect github.com/x448/float16 v0.8.4 // indirect
go.uber.org/atomic v1.11.0 // indirect
go.uber.org/multierr v1.11.0 // indirect go.uber.org/multierr v1.11.0 // indirect
go.uber.org/zap v1.27.1 // indirect go.uber.org/zap v1.27.1 // indirect
go.yaml.in/yaml/v2 v2.4.3 // indirect go.yaml.in/yaml/v2 v2.4.3 // indirect
......
...@@ -97,6 +97,10 @@ github.com/onsi/ginkgo/v2 v2.27.3 h1:ICsZJ8JoYafeXFFlFAG75a7CxMsJHwgKwtO+82SE9L8 ...@@ -97,6 +97,10 @@ github.com/onsi/ginkgo/v2 v2.27.3 h1:ICsZJ8JoYafeXFFlFAG75a7CxMsJHwgKwtO+82SE9L8
github.com/onsi/ginkgo/v2 v2.27.3/go.mod h1:ArE1D/XhNXBXCBkKOLkbsb2c81dQHCRcF5zwn/ykDRo= github.com/onsi/ginkgo/v2 v2.27.3/go.mod h1:ArE1D/XhNXBXCBkKOLkbsb2c81dQHCRcF5zwn/ykDRo=
github.com/onsi/gomega v1.38.3 h1:eTX+W6dobAYfFeGC2PV6RwXRu/MyT+cQguijutvkpSM= github.com/onsi/gomega v1.38.3 h1:eTX+W6dobAYfFeGC2PV6RwXRu/MyT+cQguijutvkpSM=
github.com/onsi/gomega v1.38.3/go.mod h1:ZCU1pkQcXDO5Sl9/VVEGlDyp+zm0m1cmeG5TOzLgdh4= github.com/onsi/gomega v1.38.3/go.mod h1:ZCU1pkQcXDO5Sl9/VVEGlDyp+zm0m1cmeG5TOzLgdh4=
github.com/open-policy-agent/cert-controller v0.14.0 h1:TPc19BOHOs4tARruTT5o4bzir7Ed6FF+j3EXP/nmZBs=
github.com/open-policy-agent/cert-controller v0.14.0/go.mod h1:UhE/FU54DnKo+Rt0Yf3r+oKjgy6kqSH8Vsjo+5bGrSo=
github.com/open-policy-agent/frameworks/constraint v0.0.0-20241101234656-e78c8abd754a h1:gQtOJ50XFyL2Xh3lDD9zP4KQ2PY4mZKQ9hDcWc81Sp8=
github.com/open-policy-agent/frameworks/constraint v0.0.0-20241101234656-e78c8abd754a/go.mod h1:tI7nc6H6os2UYZRvSm9Y7bq4oMoXqhwA0WfnqKpoAgc=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
...@@ -135,6 +139,8 @@ github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcY ...@@ -135,6 +139,8 @@ github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcY
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE=
go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0=
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU= go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU=
...@@ -218,6 +224,8 @@ k8s.io/client-go v0.34.3 h1:wtYtpzy/OPNYf7WyNBTj3iUA0XaBHVqhv4Iv3tbrF5A= ...@@ -218,6 +224,8 @@ k8s.io/client-go v0.34.3 h1:wtYtpzy/OPNYf7WyNBTj3iUA0XaBHVqhv4Iv3tbrF5A=
k8s.io/client-go v0.34.3/go.mod h1:OxxeYagaP9Kdf78UrKLa3YZixMCfP6bgPwPwNBQBzpM= k8s.io/client-go v0.34.3/go.mod h1:OxxeYagaP9Kdf78UrKLa3YZixMCfP6bgPwPwNBQBzpM=
k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
k8s.io/kube-aggregator v0.33.4 h1:TdIJKHb0/bLpby7FblXIaVEzyA1jGEjzt/n9cRvwq8U=
k8s.io/kube-aggregator v0.33.4/go.mod h1:wZuctdRvGde5bwzxkZRs0GYj2KOpCNgx8rRGVoNb62k=
k8s.io/kube-openapi v0.0.0-20250814151709-d7b6acb124c3 h1:liMHz39T5dJO1aOKHLvwaCjDbf07wVh6yaUlTpunnkE= k8s.io/kube-openapi v0.0.0-20250814151709-d7b6acb124c3 h1:liMHz39T5dJO1aOKHLvwaCjDbf07wVh6yaUlTpunnkE=
k8s.io/kube-openapi v0.0.0-20250814151709-d7b6acb124c3/go.mod h1:UZ2yyWbFTpuhSbFhv24aGNOdoRdJZgsIObGBUaYVsts= k8s.io/kube-openapi v0.0.0-20250814151709-d7b6acb124c3/go.mod h1:UZ2yyWbFTpuhSbFhv24aGNOdoRdJZgsIObGBUaYVsts=
k8s.io/utils v0.0.0-20251002143259-bc988d571ff4 h1:SjGebBtkBqHFOli+05xYbK8YF1Dzkbzn+gDM4X9T4Ck= k8s.io/utils v0.0.0-20251002143259-bc988d571ff4 h1:SjGebBtkBqHFOli+05xYbK8YF1Dzkbzn+gDM4X9T4Ck=
......
/*
* SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*/
package cert
import (
"context"
"fmt"
"os"
"strings"
configv1alpha1 "github.com/ai-dynamo/dynamo/deploy/operator/api/config/v1alpha1"
"github.com/go-logr/logr"
certrotator "github.com/open-policy-agent/cert-controller/pkg/rotator"
admissionregistrationv1 "k8s.io/api/admissionregistration/v1"
corev1 "k8s.io/api/core/v1"
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
)
const (
certificateAuthorityName = "Dynamo-Webhook-CA"
certificateAuthorityOrganization = "NVIDIA"
namespaceFile = "/var/run/secrets/kubernetes.io/serviceaccount/namespace"
dgdrCRDName = "dynamographdeploymentrequests.nvidia.com"
partOfLabel = "app.kubernetes.io/part-of"
partOfValue = "dynamo-operator"
operatorNamespaceLabel = "nvidia.com/dynamo-operator-namespace"
)
// CertProvisioner abstracts the mechanism that adds a certificate rotator to
// the controller-runtime manager. The default implementation delegates to the
// OPA cert-controller; tests can substitute a stub.
type CertProvisioner interface {
AddRotator(mgr ctrl.Manager, rotator *certrotator.CertRotator) error
}
// opaCertProvisioner is the production implementation backed by the OPA
// cert-controller library.
type opaCertProvisioner struct{}
func (opaCertProvisioner) AddRotator(mgr ctrl.Manager, rotator *certrotator.CertRotator) error {
return certrotator.AddRotator(mgr, rotator)
}
// CertManager manages webhook TLS certificate lifecycle.
// In auto mode it uses a CertProvisioner for generation and rotation.
// In manual mode it expects externally provided certificates and signals
// readiness immediately.
type CertManager struct {
client client.Client
cfg *configv1alpha1.WebhookServer
namespace string
ready chan struct{}
logger logr.Logger
provisioner CertProvisioner
}
// NewCertManager creates a CertManager. The client should be a direct
// (non-cached) client because the manager's cache isn't started yet when
// Setup is called. Only used to create the placeholder secret in auto mode;
// RBAC is the actual access boundary.
func NewCertManager(cl client.Client, cfg *configv1alpha1.WebhookServer) (*CertManager, error) {
ns, err := getOperatorNamespace()
if err != nil {
return nil, fmt.Errorf("reading operator namespace: %w", err)
}
return &CertManager{
client: cl,
cfg: cfg,
namespace: ns,
ready: make(chan struct{}),
logger: ctrl.Log.WithName("cert-manager"),
provisioner: opaCertProvisioner{},
}, nil
}
// Setup configures certificate management and adds the cert-controller to
// the manager (auto mode) or closes the ready channel immediately (manual mode).
func (cm *CertManager) Setup(ctx context.Context, mgr ctrl.Manager) error {
switch cm.cfg.CertProvisionMode {
case configv1alpha1.CertProvisionModeManual:
cm.logger.Info("Using externally provided certificates (manual mode)",
"certDir", cm.cfg.CertDir, "secretName", cm.cfg.SecretName)
close(cm.ready)
return nil
case configv1alpha1.CertProvisionModeAuto:
return cm.setupAutoProvisioning(ctx, mgr)
default:
return fmt.Errorf("unsupported cert provision mode: %q", cm.cfg.CertProvisionMode)
}
}
// WaitReady blocks until certificates have been written to the cert directory.
func (cm *CertManager) WaitReady() {
cm.logger.Info("Waiting for webhook certificates to be ready")
<-cm.ready
cm.logger.Info("Webhook certificates are ready")
}
func (cm *CertManager) setupAutoProvisioning(ctx context.Context, mgr ctrl.Manager) error {
if err := cm.createPlaceholderSecretIfNotExists(ctx); err != nil {
return fmt.Errorf("ensuring webhook TLS secret exists: %w", err)
}
dnsName := fmt.Sprintf("%s.%s.svc", cm.cfg.ServiceName, cm.namespace)
cm.logger.Info("Auto-provisioning certificates using cert-controller",
"secretName", cm.cfg.SecretName, "dnsName", dnsName)
rotator := &certrotator.CertRotator{
SecretKey: types.NamespacedName{
Namespace: cm.namespace,
Name: cm.cfg.SecretName,
},
CertDir: cm.cfg.CertDir,
CAName: certificateAuthorityName,
CAOrganization: certificateAuthorityOrganization,
IsReady: cm.ready,
DNSName: dnsName,
ExtraDNSNames: []string{
cm.cfg.ServiceName,
fmt.Sprintf("%s.%s", cm.cfg.ServiceName, cm.namespace),
fmt.Sprintf("%s.%s.svc.cluster.local", cm.cfg.ServiceName, cm.namespace),
},
EnableReadinessCheck: true,
RestartOnSecretRefresh: true,
}
return cm.provisioner.AddRotator(mgr, rotator)
}
// createPlaceholderSecretIfNotExists creates the webhook TLS secret if it does
// not already exist. The OPA cert-controller can only Update existing secrets,
// not Create them. If the secret already exists it is left untouched.
func (cm *CertManager) createPlaceholderSecretIfNotExists(ctx context.Context) error {
err := cm.client.Get(ctx, types.NamespacedName{Namespace: cm.namespace, Name: cm.cfg.SecretName}, &corev1.Secret{})
if !apierrors.IsNotFound(err) {
return err
}
secret := &corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
Namespace: cm.namespace,
Name: cm.cfg.SecretName,
Labels: map[string]string{
"app.kubernetes.io/managed-by": "dynamo-operator",
"app.kubernetes.io/component": "webhook",
partOfLabel: partOfValue,
},
},
Type: corev1.SecretTypeTLS,
Data: map[string][]byte{
"tls.crt": {},
"tls.key": {},
"ca.crt": {},
},
}
if err := cm.client.Create(ctx, secret); err != nil {
if apierrors.IsAlreadyExists(err) {
return nil
}
return fmt.Errorf("creating webhook TLS secret: %w", err)
}
cm.logger.Info("Created webhook TLS secret", "namespace", cm.namespace, "name", cm.cfg.SecretName)
return nil
}
// CABundleInjector discovers webhook configurations owned by this operator
// instance and patches them with the CA bundle from the cert secret.
type CABundleInjector struct {
client client.Client
cfg *configv1alpha1.OperatorConfiguration
namespace string
logger logr.Logger
}
// NewCABundleInjector creates a CABundleInjector. The client should be the
// manager's cached client (available after mgr.Start).
func NewCABundleInjector(cl client.Client, cfg *configv1alpha1.OperatorConfiguration) (*CABundleInjector, error) {
ns, err := getOperatorNamespace()
if err != nil {
return nil, fmt.Errorf("reading operator namespace: %w", err)
}
return &CABundleInjector{
client: cl,
cfg: cfg,
namespace: ns,
logger: ctrl.Log.WithName("ca-bundle-injector"),
}, nil
}
// InjectAll reads the CA bundle from the cert secret and injects it into all
// webhook configurations owned by this operator instance (scoped by namespace
// label), and into the DGDR CRD conversion webhook.
func (i *CABundleInjector) InjectAll(ctx context.Context) error {
caBundle, err := i.readCABundle(ctx)
if err != nil {
return fmt.Errorf("reading CA bundle from secret %s/%s: %w", i.namespace, i.cfg.Server.Webhook.SecretName, err)
}
if err := i.injectIntoValidatingWebhooks(ctx, caBundle); err != nil {
return err
}
if err := i.injectIntoMutatingWebhooks(ctx, caBundle); err != nil {
return err
}
if err := i.ensureCRDConversion(ctx, caBundle); err != nil {
return err
}
i.logger.Info("CA bundle injected into all webhook configurations")
return nil
}
func (i *CABundleInjector) readCABundle(ctx context.Context) ([]byte, error) {
secret := &corev1.Secret{}
if err := i.client.Get(ctx, types.NamespacedName{Namespace: i.namespace, Name: i.cfg.Server.Webhook.SecretName}, secret); err != nil {
return nil, err
}
ca, ok := secret.Data["ca.crt"]
if !ok || len(ca) == 0 {
return nil, fmt.Errorf("ca.crt not found or empty in secret %s/%s", i.namespace, i.cfg.Server.Webhook.SecretName)
}
return ca, nil
}
func (i *CABundleInjector) webhookLabels() client.MatchingLabels {
return client.MatchingLabels{
partOfLabel: partOfValue,
operatorNamespaceLabel: i.namespace,
}
}
func (i *CABundleInjector) injectIntoValidatingWebhooks(ctx context.Context, caBundle []byte) error {
list := &admissionregistrationv1.ValidatingWebhookConfigurationList{}
if err := i.client.List(ctx, list, i.webhookLabels()); err != nil {
return fmt.Errorf("listing validating webhook configurations: %w", err)
}
for idx := range list.Items {
wc := &list.Items[idx]
original := wc.DeepCopy()
for j := range wc.Webhooks {
wc.Webhooks[j].ClientConfig.CABundle = caBundle
}
if err := i.client.Patch(ctx, wc, client.MergeFrom(original)); err != nil {
return fmt.Errorf("patching validating webhook config %s: %w", wc.Name, err)
}
i.logger.Info("Injected CA bundle into ValidatingWebhookConfiguration", "name", wc.Name)
}
return nil
}
func (i *CABundleInjector) injectIntoMutatingWebhooks(ctx context.Context, caBundle []byte) error {
list := &admissionregistrationv1.MutatingWebhookConfigurationList{}
if err := i.client.List(ctx, list, i.webhookLabels()); err != nil {
return fmt.Errorf("listing mutating webhook configurations: %w", err)
}
for idx := range list.Items {
wc := &list.Items[idx]
original := wc.DeepCopy()
for j := range wc.Webhooks {
wc.Webhooks[j].ClientConfig.CABundle = caBundle
}
if err := i.client.Patch(ctx, wc, client.MergeFrom(original)); err != nil {
return fmt.Errorf("patching mutating webhook config %s: %w", wc.Name, err)
}
i.logger.Info("Injected CA bundle into MutatingWebhookConfiguration", "name", wc.Name)
}
return nil
}
// ensureCRDConversion patches the DGDR CRD with the conversion webhook config,
// setting the caBundle and service reference to this operator's webhook service.
func (i *CABundleInjector) ensureCRDConversion(ctx context.Context, caBundle []byte) error {
crd := &apiextensionsv1.CustomResourceDefinition{}
if err := i.client.Get(ctx, types.NamespacedName{Name: dgdrCRDName}, crd); err != nil {
if apierrors.IsNotFound(err) {
i.logger.Info("DGDR CRD not found, skipping conversion webhook setup")
return nil
}
return fmt.Errorf("getting CRD %s: %w", dgdrCRDName, err)
}
original := crd.DeepCopy()
path := "/convert"
crd.Spec.Conversion = &apiextensionsv1.CustomResourceConversion{
Strategy: apiextensionsv1.WebhookConverter,
Webhook: &apiextensionsv1.WebhookConversion{
ClientConfig: &apiextensionsv1.WebhookClientConfig{
Service: &apiextensionsv1.ServiceReference{
Name: i.cfg.Server.Webhook.ServiceName,
Namespace: i.namespace,
Path: &path,
},
CABundle: caBundle,
},
ConversionReviewVersions: []string{"v1"},
},
}
if err := i.client.Patch(ctx, crd, client.MergeFrom(original)); err != nil {
return fmt.Errorf("patching CRD %s conversion config: %w", dgdrCRDName, err)
}
i.logger.Info("Configured CRD conversion webhook", "crd", dgdrCRDName)
return nil
}
func getOperatorNamespace() (string, error) {
data, err := os.ReadFile(namespaceFile)
if err != nil {
return "", fmt.Errorf("reading namespace from %s: %w", namespaceFile, err)
}
ns := strings.TrimSpace(string(data))
if len(ns) == 0 {
return "", fmt.Errorf("operator namespace is empty")
}
return ns, nil
}
/*
* SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*/
package cert
import (
"context"
"fmt"
"reflect"
"testing"
"time"
configv1alpha1 "github.com/ai-dynamo/dynamo/deploy/operator/api/config/v1alpha1"
"github.com/go-logr/logr"
certrotator "github.com/open-policy-agent/cert-controller/pkg/rotator"
admissionregistrationv1 "k8s.io/api/admissionregistration/v1"
corev1 "k8s.io/api/core/v1"
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/utils/ptr"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
)
// fakeCertProvisioner captures the rotator config passed to AddRotator and
// optionally simulates readiness by closing the IsReady channel.
type fakeCertProvisioner struct {
called bool
capturedArgs *certrotator.CertRotator
simulateReady bool
err error
}
func (f *fakeCertProvisioner) AddRotator(_ ctrl.Manager, rotator *certrotator.CertRotator) error {
f.called = true
f.capturedArgs = rotator
if f.simulateReady && rotator.IsReady != nil {
close(rotator.IsReady)
}
return f.err
}
const (
testSecretName = "webhook-cert"
testServiceName = "my-operator-webhook-service"
testNamespace = "test-ns"
)
func newScheme() *runtime.Scheme {
s := runtime.NewScheme()
_ = corev1.AddToScheme(s)
_ = admissionregistrationv1.AddToScheme(s)
_ = apiextensionsv1.AddToScheme(s)
return s
}
func newTestCertManager(cl *fake.ClientBuilder, cfg *configv1alpha1.WebhookServer) *CertManager {
return &CertManager{
client: cl.Build(),
cfg: cfg,
namespace: testNamespace,
ready: make(chan struct{}),
logger: logr.Discard(),
}
}
func newTestInjector(cl *fake.ClientBuilder, cfg *configv1alpha1.OperatorConfiguration) *CABundleInjector {
return &CABundleInjector{
client: cl.Build(),
cfg: cfg,
namespace: testNamespace,
logger: logr.Discard(),
}
}
func TestCreatePlaceholderSecretIfNotExists_CreatesWhenMissing(t *testing.T) {
cfg := &configv1alpha1.WebhookServer{SecretName: testSecretName}
cm := newTestCertManager(fake.NewClientBuilder().WithScheme(newScheme()), cfg)
ctx := context.Background()
if err := cm.createPlaceholderSecretIfNotExists(ctx); err != nil {
t.Fatalf("unexpected error: %v", err)
}
secret := &corev1.Secret{}
if err := cm.client.Get(ctx, types.NamespacedName{Namespace: testNamespace, Name: testSecretName}, secret); err != nil {
t.Fatalf("secret should exist: %v", err)
}
if secret.Type != corev1.SecretTypeTLS {
t.Errorf("expected TLS secret type, got %s", secret.Type)
}
if secret.Labels[partOfLabel] != partOfValue {
t.Errorf("expected label %s=%s, got %s", partOfLabel, partOfValue, secret.Labels[partOfLabel])
}
if _, ok := secret.Data["ca.crt"]; !ok {
t.Error("expected ca.crt key in secret data")
}
}
func TestCreatePlaceholderSecretIfNotExists_NoopWhenExists(t *testing.T) {
existing := &corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
Namespace: testNamespace,
Name: testSecretName,
},
Type: corev1.SecretTypeTLS,
Data: map[string][]byte{
"tls.crt": []byte("existing-cert"),
"tls.key": []byte("existing-key"),
"ca.crt": []byte("existing-ca"),
},
}
cfg := &configv1alpha1.WebhookServer{SecretName: testSecretName}
cm := newTestCertManager(fake.NewClientBuilder().WithScheme(newScheme()).WithObjects(existing), cfg)
ctx := context.Background()
if err := cm.createPlaceholderSecretIfNotExists(ctx); err != nil {
t.Fatalf("unexpected error: %v", err)
}
secret := &corev1.Secret{}
if err := cm.client.Get(ctx, types.NamespacedName{Namespace: testNamespace, Name: testSecretName}, secret); err != nil {
t.Fatalf("secret should exist: %v", err)
}
if string(secret.Data["tls.crt"]) != "existing-cert" {
t.Error("existing secret data should not be overwritten")
}
}
func TestCertManager_ManualModeClosesChannelImmediately(t *testing.T) {
cfg := &configv1alpha1.WebhookServer{
CertProvisionMode: configv1alpha1.CertProvisionModeManual,
CertDir: "/tmp/certs",
SecretName: testSecretName,
}
cm := newTestCertManager(fake.NewClientBuilder().WithScheme(newScheme()), cfg)
if err := cm.Setup(context.Background(), nil); err != nil {
t.Fatalf("unexpected error: %v", err)
}
select {
case <-cm.ready:
case <-time.After(time.Second):
t.Fatal("ready channel should be closed immediately in manual mode")
}
}
func TestCertManager_AutoModeConfiguresRotator(t *testing.T) {
cfg := &configv1alpha1.WebhookServer{
CertProvisionMode: configv1alpha1.CertProvisionModeAuto,
CertDir: "/tmp/certs",
SecretName: testSecretName,
ServiceName: testServiceName,
}
prov := &fakeCertProvisioner{simulateReady: true}
cm := newTestCertManager(fake.NewClientBuilder().WithScheme(newScheme()), cfg)
cm.provisioner = prov
if err := cm.Setup(context.Background(), nil); err != nil {
t.Fatalf("unexpected error: %v", err)
}
if !prov.called {
t.Fatal("expected provisioner.AddRotator to be called")
}
expected := &certrotator.CertRotator{
SecretKey: types.NamespacedName{
Namespace: testNamespace,
Name: testSecretName,
},
CertDir: "/tmp/certs",
CAName: certificateAuthorityName,
CAOrganization: certificateAuthorityOrganization,
IsReady: cm.ready,
DNSName: fmt.Sprintf("%s.%s.svc", testServiceName, testNamespace),
ExtraDNSNames: []string{
testServiceName,
fmt.Sprintf("%s.%s", testServiceName, testNamespace),
fmt.Sprintf("%s.%s.svc.cluster.local", testServiceName, testNamespace),
},
EnableReadinessCheck: true,
RestartOnSecretRefresh: true,
}
if !reflect.DeepEqual(prov.capturedArgs, expected) {
t.Errorf("rotator config mismatch\ngot: %+v\nwant: %+v", prov.capturedArgs, expected)
}
// Verify placeholder secret was created
secret := &corev1.Secret{}
if err := cm.client.Get(context.Background(), types.NamespacedName{Namespace: testNamespace, Name: testSecretName}, secret); err != nil {
t.Fatalf("placeholder secret should exist: %v", err)
}
}
func TestCertManager_AutoModeProvisionerError(t *testing.T) {
cfg := &configv1alpha1.WebhookServer{
CertProvisionMode: configv1alpha1.CertProvisionModeAuto,
SecretName: testSecretName,
ServiceName: testServiceName,
}
prov := &fakeCertProvisioner{err: fmt.Errorf("rotator setup failed")}
cm := newTestCertManager(fake.NewClientBuilder().WithScheme(newScheme()), cfg)
cm.provisioner = prov
err := cm.Setup(context.Background(), nil)
if err == nil {
t.Fatal("expected error from provisioner")
}
if !prov.called {
t.Fatal("expected provisioner.AddRotator to be called")
}
}
func TestInjectIntoValidatingWebhooks(t *testing.T) {
wc := &admissionregistrationv1.ValidatingWebhookConfiguration{
ObjectMeta: metav1.ObjectMeta{
Name: "test-validating",
Labels: map[string]string{partOfLabel: partOfValue, operatorNamespaceLabel: testNamespace},
},
Webhooks: []admissionregistrationv1.ValidatingWebhook{
{
Name: "test.webhook.io",
ClientConfig: admissionregistrationv1.WebhookClientConfig{},
SideEffects: ptr.To(admissionregistrationv1.SideEffectClassNone),
AdmissionReviewVersions: []string{"v1"},
},
},
}
cfg := &configv1alpha1.OperatorConfiguration{}
cfg.Server.Webhook.SecretName = testSecretName
injector := newTestInjector(fake.NewClientBuilder().WithScheme(newScheme()).WithObjects(wc), cfg)
ctx := context.Background()
caBundle := []byte("test-ca-data")
if err := injector.injectIntoValidatingWebhooks(ctx, caBundle); err != nil {
t.Fatalf("unexpected error: %v", err)
}
updated := &admissionregistrationv1.ValidatingWebhookConfiguration{}
if err := injector.client.Get(ctx, types.NamespacedName{Name: "test-validating"}, updated); err != nil {
t.Fatalf("failed to get webhook config: %v", err)
}
if string(updated.Webhooks[0].ClientConfig.CABundle) != "test-ca-data" {
t.Errorf("expected CA bundle to be injected, got %q", string(updated.Webhooks[0].ClientConfig.CABundle))
}
}
func TestInjectIntoValidatingWebhooks_SkipsNonMatchingLabels(t *testing.T) {
wc := &admissionregistrationv1.ValidatingWebhookConfiguration{
ObjectMeta: metav1.ObjectMeta{
Name: "other-validating",
Labels: map[string]string{"app.kubernetes.io/part-of": "other-operator"},
},
Webhooks: []admissionregistrationv1.ValidatingWebhook{
{
Name: "other.webhook.io",
ClientConfig: admissionregistrationv1.WebhookClientConfig{},
SideEffects: ptr.To(admissionregistrationv1.SideEffectClassNone),
AdmissionReviewVersions: []string{"v1"},
},
},
}
cfg := &configv1alpha1.OperatorConfiguration{}
injector := &CABundleInjector{
client: fake.NewClientBuilder().WithScheme(newScheme()).WithObjects(wc).Build(),
cfg: cfg,
namespace: "my-ns",
logger: logr.Discard(),
}
ctx := context.Background()
if err := injector.injectIntoValidatingWebhooks(ctx, []byte("test-ca")); err != nil {
t.Fatalf("unexpected error: %v", err)
}
updated := &admissionregistrationv1.ValidatingWebhookConfiguration{}
if err := injector.client.Get(ctx, types.NamespacedName{Name: "other-validating"}, updated); err != nil {
t.Fatalf("failed to get webhook config: %v", err)
}
if updated.Webhooks[0].ClientConfig.CABundle != nil {
t.Error("non-matching webhook config should not be patched")
}
}
func TestInjectIntoValidatingWebhooks_SkipsDifferentNamespace(t *testing.T) {
wc := &admissionregistrationv1.ValidatingWebhookConfiguration{
ObjectMeta: metav1.ObjectMeta{
Name: "other-ns-validating",
Labels: map[string]string{partOfLabel: partOfValue, operatorNamespaceLabel: "other-ns"},
},
Webhooks: []admissionregistrationv1.ValidatingWebhook{
{
Name: "test.webhook.io",
ClientConfig: admissionregistrationv1.WebhookClientConfig{},
SideEffects: ptr.To(admissionregistrationv1.SideEffectClassNone),
AdmissionReviewVersions: []string{"v1"},
},
},
}
cfg := &configv1alpha1.OperatorConfiguration{}
injector := &CABundleInjector{
client: fake.NewClientBuilder().WithScheme(newScheme()).WithObjects(wc).Build(),
cfg: cfg,
namespace: "my-ns",
logger: logr.Discard(),
}
ctx := context.Background()
if err := injector.injectIntoValidatingWebhooks(ctx, []byte("test-ca")); err != nil {
t.Fatalf("unexpected error: %v", err)
}
updated := &admissionregistrationv1.ValidatingWebhookConfiguration{}
if err := injector.client.Get(ctx, types.NamespacedName{Name: "other-ns-validating"}, updated); err != nil {
t.Fatalf("failed to get webhook config: %v", err)
}
if updated.Webhooks[0].ClientConfig.CABundle != nil {
t.Error("webhook config from different operator namespace should not be patched")
}
}
func TestInjectIntoMutatingWebhooks(t *testing.T) {
wc := &admissionregistrationv1.MutatingWebhookConfiguration{
ObjectMeta: metav1.ObjectMeta{
Name: "test-mutating",
Labels: map[string]string{partOfLabel: partOfValue, operatorNamespaceLabel: testNamespace},
},
Webhooks: []admissionregistrationv1.MutatingWebhook{
{
Name: "mutate.webhook.io",
ClientConfig: admissionregistrationv1.WebhookClientConfig{},
SideEffects: ptr.To(admissionregistrationv1.SideEffectClassNone),
AdmissionReviewVersions: []string{"v1"},
},
},
}
cfg := &configv1alpha1.OperatorConfiguration{}
cfg.Server.Webhook.SecretName = testSecretName
injector := newTestInjector(fake.NewClientBuilder().WithScheme(newScheme()).WithObjects(wc), cfg)
ctx := context.Background()
caBundle := []byte("test-ca-data")
if err := injector.injectIntoMutatingWebhooks(ctx, caBundle); err != nil {
t.Fatalf("unexpected error: %v", err)
}
updated := &admissionregistrationv1.MutatingWebhookConfiguration{}
if err := injector.client.Get(ctx, types.NamespacedName{Name: "test-mutating"}, updated); err != nil {
t.Fatalf("failed to get webhook config: %v", err)
}
if string(updated.Webhooks[0].ClientConfig.CABundle) != "test-ca-data" {
t.Errorf("expected CA bundle to be injected, got %q", string(updated.Webhooks[0].ClientConfig.CABundle))
}
}
func TestEnsureCRDConversion(t *testing.T) {
crd := &apiextensionsv1.CustomResourceDefinition{
ObjectMeta: metav1.ObjectMeta{
Name: dgdrCRDName,
},
Spec: apiextensionsv1.CustomResourceDefinitionSpec{
Group: "nvidia.com",
Names: apiextensionsv1.CustomResourceDefinitionNames{
Plural: "dynamographdeploymentrequests",
Singular: "dynamographdeploymentrequest",
Kind: "DynamoGraphDeploymentRequest",
},
Scope: apiextensionsv1.NamespaceScoped,
Versions: []apiextensionsv1.CustomResourceDefinitionVersion{
{Name: "v1beta1", Served: true, Storage: true},
},
},
}
cfg := &configv1alpha1.OperatorConfiguration{}
cfg.Server.Webhook.ServiceName = testServiceName
injector := newTestInjector(fake.NewClientBuilder().WithScheme(newScheme()).WithObjects(crd), cfg)
ctx := context.Background()
if err := injector.ensureCRDConversion(ctx, []byte("test-ca")); err != nil {
t.Fatalf("unexpected error: %v", err)
}
updated := &apiextensionsv1.CustomResourceDefinition{}
if err := injector.client.Get(ctx, types.NamespacedName{Name: dgdrCRDName}, updated); err != nil {
t.Fatalf("failed to get CRD: %v", err)
}
if updated.Spec.Conversion == nil {
t.Fatal("expected conversion config to be set")
}
if updated.Spec.Conversion.Strategy != apiextensionsv1.WebhookConverter {
t.Errorf("expected Webhook strategy, got %s", updated.Spec.Conversion.Strategy)
}
if updated.Spec.Conversion.Webhook.ClientConfig.Service.Name != testServiceName {
t.Errorf("expected service name my-operator-webhook-service, got %s",
updated.Spec.Conversion.Webhook.ClientConfig.Service.Name)
}
if string(updated.Spec.Conversion.Webhook.ClientConfig.CABundle) != "test-ca" {
t.Errorf("expected CA bundle, got %q", string(updated.Spec.Conversion.Webhook.ClientConfig.CABundle))
}
}
func TestEnsureCRDConversion_SkipsWhenCRDNotFound(t *testing.T) {
cfg := &configv1alpha1.OperatorConfiguration{}
cfg.Server.Webhook.ServiceName = testServiceName
injector := newTestInjector(fake.NewClientBuilder().WithScheme(newScheme()), cfg)
ctx := context.Background()
if err := injector.ensureCRDConversion(ctx, []byte("test-ca")); err != nil {
t.Fatalf("expected no error when CRD not found, got: %v", err)
}
}
func TestReadCABundle(t *testing.T) {
secret := &corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
Namespace: testNamespace,
Name: testSecretName,
},
Data: map[string][]byte{
"tls.crt": []byte("cert-data"),
"tls.key": []byte("key-data"),
"ca.crt": []byte("ca-data"),
},
}
cfg := &configv1alpha1.OperatorConfiguration{}
cfg.Server.Webhook.SecretName = testSecretName
injector := newTestInjector(fake.NewClientBuilder().WithScheme(newScheme()).WithObjects(secret), cfg)
ca, err := injector.readCABundle(context.Background())
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if string(ca) != "ca-data" {
t.Errorf("expected ca-data, got %q", string(ca))
}
}
func TestReadCABundle_ErrorOnMissingCA(t *testing.T) {
secret := &corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
Namespace: testNamespace,
Name: testSecretName,
},
Data: map[string][]byte{
"tls.crt": []byte("cert-data"),
"tls.key": []byte("key-data"),
},
}
cfg := &configv1alpha1.OperatorConfiguration{}
cfg.Server.Webhook.SecretName = testSecretName
injector := newTestInjector(fake.NewClientBuilder().WithScheme(newScheme()).WithObjects(secret), cfg)
if _, err := injector.readCABundle(context.Background()); err == nil {
t.Fatal("expected error when ca.crt is missing")
}
}
...@@ -1618,6 +1618,23 @@ _Appears in:_ ...@@ -1618,6 +1618,23 @@ _Appears in:_
#### CertProvisionMode
_Underlying type:_ _string_
CertProvisionMode controls how webhook TLS certificates are managed.
_Appears in:_
- [WebhookServer](#webhookserver)
| Field | Description |
| --- | --- |
| `auto` | CertProvisionModeAuto uses the built-in cert-controller to generate and rotate certificates.<br /> |
| `manual` | CertProvisionModeManual expects certificates to be provided externally (e.g., cert-manager, admin).<br /> |
#### CheckpointConfiguration #### CheckpointConfiguration
...@@ -2081,6 +2098,9 @@ _Appears in:_ ...@@ -2081,6 +2098,9 @@ _Appears in:_
| `port` _integer_ | Port is the port the server listens on | | | | `port` _integer_ | Port is the port the server listens on | | |
| `host` _string_ | Host is the address the webhook server binds to | | | | `host` _string_ | Host is the address the webhook server binds to | | |
| `certDir` _string_ | CertDir is the directory containing TLS certificates | | | | `certDir` _string_ | CertDir is the directory containing TLS certificates | | |
| `certProvisionMode` _[CertProvisionMode](#certprovisionmode)_ | CertProvisionMode controls certificate management: "auto" (built-in cert-controller) or "manual" (external) | auto | |
| `secretName` _string_ | SecretName is the name of the Kubernetes Secret holding webhook TLS certificates | webhook-server-cert | |
| `serviceName` _string_ | ServiceName is the name of the Kubernetes Service fronting the webhook server.<br />Used to generate certificate SANs. Set by the Helm chart. | | |
# Operator Default Values Injection # Operator Default Values Injection
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment