"vllm/reasoning/abs_reasoning_parsers.py" did not exist on "cfaa6008e666d4e9bb5131ece68f8609b6f94ee4"
Unverified Commit 7a341f86 authored by julienmancuso's avatar julienmancuso Committed by GitHub
Browse files

feat: simplify k8s deployment (#1708)

parent 5505507b
......@@ -19,6 +19,7 @@ kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.16.4
helm.sh/resource-policy: keep
name: dynamocomponentdeployments.nvidia.com
spec:
group: nvidia.com
......@@ -391,6 +392,8 @@ spec:
minReplicas:
type: integer
type: object
componentType:
type: string
dynamoComponent:
type: string
dynamoNamespace:
......@@ -2281,8 +2284,7 @@ spec:
x-kubernetes-list-type: map
workingDir:
type: string
required:
- name
required: []
type: object
nodeSelector:
additionalProperties:
......@@ -2619,9 +2621,6 @@ spec:
type: object
serviceName:
type: string
required:
- dynamoComponent
- dynamoTag
type: object
status:
properties:
......
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.16.4
name: dynamocomponents.nvidia.com
spec:
group: nvidia.com
names:
kind: DynamoComponent
listKind: DynamoComponentList
plural: dynamocomponents
shortNames:
- dc
singular: dynamocomponent
scope: Namespaced
versions:
- additionalPrinterColumns:
- description: Dynamo component
jsonPath: .spec.dynamoComponent
name: DynamoComponent
type: string
- description: Image Exists
jsonPath: .status.conditions[?(@.type=='ImageExists')].status
name: Image-Exists
type: string
- jsonPath: .metadata.creationTimestamp
name: Age
type: date
name: v1alpha1
schema:
openAPIV3Schema:
properties:
apiVersion:
type: string
kind:
type: string
metadata:
type: object
spec:
properties:
buildArgs:
items:
type: string
type: array
dockerConfigJsonSecretName:
type: string
downloadUrl:
type: string
downloaderContainerEnvFrom:
items:
properties:
configMapRef:
properties:
name:
default: ""
type: string
optional:
type: boolean
type: object
x-kubernetes-map-type: atomic
prefix:
type: string
secretRef:
properties:
name:
default: ""
type: string
optional:
type: boolean
type: object
x-kubernetes-map-type: atomic
type: object
type: array
dynamoComponent:
type: string
image:
type: string
imageBuildTimeout:
format: int64
type: integer
imageBuilderContainerResources:
properties:
claims:
items:
properties:
name:
type: string
request:
type: string
required:
- name
type: object
type: array
x-kubernetes-list-map-keys:
- name
x-kubernetes-list-type: map
limits:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
type: object
requests:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
type: object
type: object
imageBuilderExtraContainerEnv:
items:
properties:
name:
type: string
value:
type: string
valueFrom:
properties:
configMapKeyRef:
properties:
key:
type: string
name:
default: ""
type: string
optional:
type: boolean
required:
- key
type: object
x-kubernetes-map-type: atomic
fieldRef:
properties:
apiVersion:
type: string
fieldPath:
type: string
required:
- fieldPath
type: object
x-kubernetes-map-type: atomic
resourceFieldRef:
properties:
containerName:
type: string
divisor:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource:
type: string
required:
- resource
type: object
x-kubernetes-map-type: atomic
secretKeyRef:
properties:
key:
type: string
name:
default: ""
type: string
optional:
type: boolean
required:
- key
type: object
x-kubernetes-map-type: atomic
type: object
required:
- name
type: object
type: array
imageBuilderExtraPodMetadata:
properties:
annotations:
additionalProperties:
type: string
type: object
labels:
additionalProperties:
type: string
type: object
type: object
imageBuilderExtraPodSpec:
properties:
affinity:
properties:
nodeAffinity:
properties:
preferredDuringSchedulingIgnoredDuringExecution:
items:
properties:
preference:
properties:
matchExpressions:
items:
properties:
key:
type: string
operator:
type: string
values:
items:
type: string
type: array
x-kubernetes-list-type: atomic
required:
- key
- operator
type: object
type: array
x-kubernetes-list-type: atomic
matchFields:
items:
properties:
key:
type: string
operator:
type: string
values:
items:
type: string
type: array
x-kubernetes-list-type: atomic
required:
- key
- operator
type: object
type: array
x-kubernetes-list-type: atomic
type: object
x-kubernetes-map-type: atomic
weight:
format: int32
type: integer
required:
- preference
- weight
type: object
type: array
x-kubernetes-list-type: atomic
requiredDuringSchedulingIgnoredDuringExecution:
properties:
nodeSelectorTerms:
items:
properties:
matchExpressions:
items:
properties:
key:
type: string
operator:
type: string
values:
items:
type: string
type: array
x-kubernetes-list-type: atomic
required:
- key
- operator
type: object
type: array
x-kubernetes-list-type: atomic
matchFields:
items:
properties:
key:
type: string
operator:
type: string
values:
items:
type: string
type: array
x-kubernetes-list-type: atomic
required:
- key
- operator
type: object
type: array
x-kubernetes-list-type: atomic
type: object
x-kubernetes-map-type: atomic
type: array
x-kubernetes-list-type: atomic
required:
- nodeSelectorTerms
type: object
x-kubernetes-map-type: atomic
type: object
podAffinity:
properties:
preferredDuringSchedulingIgnoredDuringExecution:
items:
properties:
podAffinityTerm:
properties:
labelSelector:
properties:
matchExpressions:
items:
properties:
key:
type: string
operator:
type: string
values:
items:
type: string
type: array
x-kubernetes-list-type: atomic
required:
- key
- operator
type: object
type: array
x-kubernetes-list-type: atomic
matchLabels:
additionalProperties:
type: string
type: object
type: object
x-kubernetes-map-type: atomic
matchLabelKeys:
items:
type: string
type: array
x-kubernetes-list-type: atomic
mismatchLabelKeys:
items:
type: string
type: array
x-kubernetes-list-type: atomic
namespaceSelector:
properties:
matchExpressions:
items:
properties:
key:
type: string
operator:
type: string
values:
items:
type: string
type: array
x-kubernetes-list-type: atomic
required:
- key
- operator
type: object
type: array
x-kubernetes-list-type: atomic
matchLabels:
additionalProperties:
type: string
type: object
type: object
x-kubernetes-map-type: atomic
namespaces:
items:
type: string
type: array
x-kubernetes-list-type: atomic
topologyKey:
type: string
required:
- topologyKey
type: object
weight:
format: int32
type: integer
required:
- podAffinityTerm
- weight
type: object
type: array
x-kubernetes-list-type: atomic
requiredDuringSchedulingIgnoredDuringExecution:
items:
properties:
labelSelector:
properties:
matchExpressions:
items:
properties:
key:
type: string
operator:
type: string
values:
items:
type: string
type: array
x-kubernetes-list-type: atomic
required:
- key
- operator
type: object
type: array
x-kubernetes-list-type: atomic
matchLabels:
additionalProperties:
type: string
type: object
type: object
x-kubernetes-map-type: atomic
matchLabelKeys:
items:
type: string
type: array
x-kubernetes-list-type: atomic
mismatchLabelKeys:
items:
type: string
type: array
x-kubernetes-list-type: atomic
namespaceSelector:
properties:
matchExpressions:
items:
properties:
key:
type: string
operator:
type: string
values:
items:
type: string
type: array
x-kubernetes-list-type: atomic
required:
- key
- operator
type: object
type: array
x-kubernetes-list-type: atomic
matchLabels:
additionalProperties:
type: string
type: object
type: object
x-kubernetes-map-type: atomic
namespaces:
items:
type: string
type: array
x-kubernetes-list-type: atomic
topologyKey:
type: string
required:
- topologyKey
type: object
type: array
x-kubernetes-list-type: atomic
type: object
podAntiAffinity:
properties:
preferredDuringSchedulingIgnoredDuringExecution:
items:
properties:
podAffinityTerm:
properties:
labelSelector:
properties:
matchExpressions:
items:
properties:
key:
type: string
operator:
type: string
values:
items:
type: string
type: array
x-kubernetes-list-type: atomic
required:
- key
- operator
type: object
type: array
x-kubernetes-list-type: atomic
matchLabels:
additionalProperties:
type: string
type: object
type: object
x-kubernetes-map-type: atomic
matchLabelKeys:
items:
type: string
type: array
x-kubernetes-list-type: atomic
mismatchLabelKeys:
items:
type: string
type: array
x-kubernetes-list-type: atomic
namespaceSelector:
properties:
matchExpressions:
items:
properties:
key:
type: string
operator:
type: string
values:
items:
type: string
type: array
x-kubernetes-list-type: atomic
required:
- key
- operator
type: object
type: array
x-kubernetes-list-type: atomic
matchLabels:
additionalProperties:
type: string
type: object
type: object
x-kubernetes-map-type: atomic
namespaces:
items:
type: string
type: array
x-kubernetes-list-type: atomic
topologyKey:
type: string
required:
- topologyKey
type: object
weight:
format: int32
type: integer
required:
- podAffinityTerm
- weight
type: object
type: array
x-kubernetes-list-type: atomic
requiredDuringSchedulingIgnoredDuringExecution:
items:
properties:
labelSelector:
properties:
matchExpressions:
items:
properties:
key:
type: string
operator:
type: string
values:
items:
type: string
type: array
x-kubernetes-list-type: atomic
required:
- key
- operator
type: object
type: array
x-kubernetes-list-type: atomic
matchLabels:
additionalProperties:
type: string
type: object
type: object
x-kubernetes-map-type: atomic
matchLabelKeys:
items:
type: string
type: array
x-kubernetes-list-type: atomic
mismatchLabelKeys:
items:
type: string
type: array
x-kubernetes-list-type: atomic
namespaceSelector:
properties:
matchExpressions:
items:
properties:
key:
type: string
operator:
type: string
values:
items:
type: string
type: array
x-kubernetes-list-type: atomic
required:
- key
- operator
type: object
type: array
x-kubernetes-list-type: atomic
matchLabels:
additionalProperties:
type: string
type: object
type: object
x-kubernetes-map-type: atomic
namespaces:
items:
type: string
type: array
x-kubernetes-list-type: atomic
topologyKey:
type: string
required:
- topologyKey
type: object
type: array
x-kubernetes-list-type: atomic
type: object
type: object
containers:
items:
properties:
args:
items:
type: string
type: array
x-kubernetes-list-type: atomic
command:
items:
type: string
type: array
x-kubernetes-list-type: atomic
env:
items:
properties:
name:
type: string
value:
type: string
valueFrom:
properties:
configMapKeyRef:
properties:
key:
type: string
name:
default: ""
type: string
optional:
type: boolean
required:
- key
type: object
x-kubernetes-map-type: atomic
fieldRef:
properties:
apiVersion:
type: string
fieldPath:
type: string
required:
- fieldPath
type: object
x-kubernetes-map-type: atomic
resourceFieldRef:
properties:
containerName:
type: string
divisor:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource:
type: string
required:
- resource
type: object
x-kubernetes-map-type: atomic
secretKeyRef:
properties:
key:
type: string
name:
default: ""
type: string
optional:
type: boolean
required:
- key
type: object
x-kubernetes-map-type: atomic
type: object
required:
- name
type: object
type: array
x-kubernetes-list-map-keys:
- name
x-kubernetes-list-type: map
envFrom:
items:
properties:
configMapRef:
properties:
name:
default: ""
type: string
optional:
type: boolean
type: object
x-kubernetes-map-type: atomic
prefix:
type: string
secretRef:
properties:
name:
default: ""
type: string
optional:
type: boolean
type: object
x-kubernetes-map-type: atomic
type: object
type: array
x-kubernetes-list-type: atomic
image:
type: string
imagePullPolicy:
type: string
lifecycle:
properties:
postStart:
properties:
exec:
properties:
command:
items:
type: string
type: array
x-kubernetes-list-type: atomic
type: object
httpGet:
properties:
host:
type: string
httpHeaders:
items:
properties:
name:
type: string
value:
type: string
required:
- name
- value
type: object
type: array
x-kubernetes-list-type: atomic
path:
type: string
port:
anyOf:
- type: integer
- type: string
x-kubernetes-int-or-string: true
scheme:
type: string
required:
- port
type: object
sleep:
properties:
seconds:
format: int64
type: integer
required:
- seconds
type: object
tcpSocket:
properties:
host:
type: string
port:
anyOf:
- type: integer
- type: string
x-kubernetes-int-or-string: true
required:
- port
type: object
type: object
preStop:
properties:
exec:
properties:
command:
items:
type: string
type: array
x-kubernetes-list-type: atomic
type: object
httpGet:
properties:
host:
type: string
httpHeaders:
items:
properties:
name:
type: string
value:
type: string
required:
- name
- value
type: object
type: array
x-kubernetes-list-type: atomic
path:
type: string
port:
anyOf:
- type: integer
- type: string
x-kubernetes-int-or-string: true
scheme:
type: string
required:
- port
type: object
sleep:
properties:
seconds:
format: int64
type: integer
required:
- seconds
type: object
tcpSocket:
properties:
host:
type: string
port:
anyOf:
- type: integer
- type: string
x-kubernetes-int-or-string: true
required:
- port
type: object
type: object
type: object
livenessProbe:
properties:
exec:
properties:
command:
items:
type: string
type: array
x-kubernetes-list-type: atomic
type: object
failureThreshold:
format: int32
type: integer
grpc:
properties:
port:
format: int32
type: integer
service:
default: ""
type: string
required:
- port
type: object
httpGet:
properties:
host:
type: string
httpHeaders:
items:
properties:
name:
type: string
value:
type: string
required:
- name
- value
type: object
type: array
x-kubernetes-list-type: atomic
path:
type: string
port:
anyOf:
- type: integer
- type: string
x-kubernetes-int-or-string: true
scheme:
type: string
required:
- port
type: object
initialDelaySeconds:
format: int32
type: integer
periodSeconds:
format: int32
type: integer
successThreshold:
format: int32
type: integer
tcpSocket:
properties:
host:
type: string
port:
anyOf:
- type: integer
- type: string
x-kubernetes-int-or-string: true
required:
- port
type: object
terminationGracePeriodSeconds:
format: int64
type: integer
timeoutSeconds:
format: int32
type: integer
type: object
name:
type: string
ports:
items:
properties:
containerPort:
format: int32
type: integer
hostIP:
type: string
hostPort:
format: int32
type: integer
name:
type: string
protocol:
default: TCP
type: string
required:
- containerPort
type: object
type: array
x-kubernetes-list-map-keys:
- containerPort
- protocol
x-kubernetes-list-type: map
readinessProbe:
properties:
exec:
properties:
command:
items:
type: string
type: array
x-kubernetes-list-type: atomic
type: object
failureThreshold:
format: int32
type: integer
grpc:
properties:
port:
format: int32
type: integer
service:
default: ""
type: string
required:
- port
type: object
httpGet:
properties:
host:
type: string
httpHeaders:
items:
properties:
name:
type: string
value:
type: string
required:
- name
- value
type: object
type: array
x-kubernetes-list-type: atomic
path:
type: string
port:
anyOf:
- type: integer
- type: string
x-kubernetes-int-or-string: true
scheme:
type: string
required:
- port
type: object
initialDelaySeconds:
format: int32
type: integer
periodSeconds:
format: int32
type: integer
successThreshold:
format: int32
type: integer
tcpSocket:
properties:
host:
type: string
port:
anyOf:
- type: integer
- type: string
x-kubernetes-int-or-string: true
required:
- port
type: object
terminationGracePeriodSeconds:
format: int64
type: integer
timeoutSeconds:
format: int32
type: integer
type: object
resizePolicy:
items:
properties:
resourceName:
type: string
restartPolicy:
type: string
required:
- resourceName
- restartPolicy
type: object
type: array
x-kubernetes-list-type: atomic
resources:
properties:
claims:
items:
properties:
name:
type: string
request:
type: string
required:
- name
type: object
type: array
x-kubernetes-list-map-keys:
- name
x-kubernetes-list-type: map
limits:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
type: object
requests:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
type: object
type: object
restartPolicy:
type: string
securityContext:
properties:
allowPrivilegeEscalation:
type: boolean
appArmorProfile:
properties:
localhostProfile:
type: string
type:
type: string
required:
- type
type: object
capabilities:
properties:
add:
items:
type: string
type: array
x-kubernetes-list-type: atomic
drop:
items:
type: string
type: array
x-kubernetes-list-type: atomic
type: object
privileged:
type: boolean
procMount:
type: string
readOnlyRootFilesystem:
type: boolean
runAsGroup:
format: int64
type: integer
runAsNonRoot:
type: boolean
runAsUser:
format: int64
type: integer
seLinuxOptions:
properties:
level:
type: string
role:
type: string
type:
type: string
user:
type: string
type: object
seccompProfile:
properties:
localhostProfile:
type: string
type:
type: string
required:
- type
type: object
windowsOptions:
properties:
gmsaCredentialSpec:
type: string
gmsaCredentialSpecName:
type: string
hostProcess:
type: boolean
runAsUserName:
type: string
type: object
type: object
startupProbe:
properties:
exec:
properties:
command:
items:
type: string
type: array
x-kubernetes-list-type: atomic
type: object
failureThreshold:
format: int32
type: integer
grpc:
properties:
port:
format: int32
type: integer
service:
default: ""
type: string
required:
- port
type: object
httpGet:
properties:
host:
type: string
httpHeaders:
items:
properties:
name:
type: string
value:
type: string
required:
- name
- value
type: object
type: array
x-kubernetes-list-type: atomic
path:
type: string
port:
anyOf:
- type: integer
- type: string
x-kubernetes-int-or-string: true
scheme:
type: string
required:
- port
type: object
initialDelaySeconds:
format: int32
type: integer
periodSeconds:
format: int32
type: integer
successThreshold:
format: int32
type: integer
tcpSocket:
properties:
host:
type: string
port:
anyOf:
- type: integer
- type: string
x-kubernetes-int-or-string: true
required:
- port
type: object
terminationGracePeriodSeconds:
format: int64
type: integer
timeoutSeconds:
format: int32
type: integer
type: object
stdin:
type: boolean
stdinOnce:
type: boolean
terminationMessagePath:
type: string
terminationMessagePolicy:
type: string
tty:
type: boolean
volumeDevices:
items:
properties:
devicePath:
type: string
name:
type: string
required:
- devicePath
- name
type: object
type: array
x-kubernetes-list-map-keys:
- devicePath
x-kubernetes-list-type: map
volumeMounts:
items:
properties:
mountPath:
type: string
mountPropagation:
type: string
name:
type: string
readOnly:
type: boolean
recursiveReadOnly:
type: string
subPath:
type: string
subPathExpr:
type: string
required:
- mountPath
- name
type: object
type: array
x-kubernetes-list-map-keys:
- mountPath
x-kubernetes-list-type: map
workingDir:
type: string
required:
- name
type: object
type: array
mainContainer:
properties:
args:
items:
type: string
type: array
x-kubernetes-list-type: atomic
command:
items:
type: string
type: array
x-kubernetes-list-type: atomic
env:
items:
properties:
name:
type: string
value:
type: string
valueFrom:
properties:
configMapKeyRef:
properties:
key:
type: string
name:
default: ""
type: string
optional:
type: boolean
required:
- key
type: object
x-kubernetes-map-type: atomic
fieldRef:
properties:
apiVersion:
type: string
fieldPath:
type: string
required:
- fieldPath
type: object
x-kubernetes-map-type: atomic
resourceFieldRef:
properties:
containerName:
type: string
divisor:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource:
type: string
required:
- resource
type: object
x-kubernetes-map-type: atomic
secretKeyRef:
properties:
key:
type: string
name:
default: ""
type: string
optional:
type: boolean
required:
- key
type: object
x-kubernetes-map-type: atomic
type: object
required:
- name
type: object
type: array
x-kubernetes-list-map-keys:
- name
x-kubernetes-list-type: map
envFrom:
items:
properties:
configMapRef:
properties:
name:
default: ""
type: string
optional:
type: boolean
type: object
x-kubernetes-map-type: atomic
prefix:
type: string
secretRef:
properties:
name:
default: ""
type: string
optional:
type: boolean
type: object
x-kubernetes-map-type: atomic
type: object
type: array
x-kubernetes-list-type: atomic
image:
type: string
imagePullPolicy:
type: string
lifecycle:
properties:
postStart:
properties:
exec:
properties:
command:
items:
type: string
type: array
x-kubernetes-list-type: atomic
type: object
httpGet:
properties:
host:
type: string
httpHeaders:
items:
properties:
name:
type: string
value:
type: string
required:
- name
- value
type: object
type: array
x-kubernetes-list-type: atomic
path:
type: string
port:
anyOf:
- type: integer
- type: string
x-kubernetes-int-or-string: true
scheme:
type: string
required:
- port
type: object
sleep:
properties:
seconds:
format: int64
type: integer
required:
- seconds
type: object
tcpSocket:
properties:
host:
type: string
port:
anyOf:
- type: integer
- type: string
x-kubernetes-int-or-string: true
required:
- port
type: object
type: object
preStop:
properties:
exec:
properties:
command:
items:
type: string
type: array
x-kubernetes-list-type: atomic
type: object
httpGet:
properties:
host:
type: string
httpHeaders:
items:
properties:
name:
type: string
value:
type: string
required:
- name
- value
type: object
type: array
x-kubernetes-list-type: atomic
path:
type: string
port:
anyOf:
- type: integer
- type: string
x-kubernetes-int-or-string: true
scheme:
type: string
required:
- port
type: object
sleep:
properties:
seconds:
format: int64
type: integer
required:
- seconds
type: object
tcpSocket:
properties:
host:
type: string
port:
anyOf:
- type: integer
- type: string
x-kubernetes-int-or-string: true
required:
- port
type: object
type: object
type: object
livenessProbe:
properties:
exec:
properties:
command:
items:
type: string
type: array
x-kubernetes-list-type: atomic
type: object
failureThreshold:
format: int32
type: integer
grpc:
properties:
port:
format: int32
type: integer
service:
default: ""
type: string
required:
- port
type: object
httpGet:
properties:
host:
type: string
httpHeaders:
items:
properties:
name:
type: string
value:
type: string
required:
- name
- value
type: object
type: array
x-kubernetes-list-type: atomic
path:
type: string
port:
anyOf:
- type: integer
- type: string
x-kubernetes-int-or-string: true
scheme:
type: string
required:
- port
type: object
initialDelaySeconds:
format: int32
type: integer
periodSeconds:
format: int32
type: integer
successThreshold:
format: int32
type: integer
tcpSocket:
properties:
host:
type: string
port:
anyOf:
- type: integer
- type: string
x-kubernetes-int-or-string: true
required:
- port
type: object
terminationGracePeriodSeconds:
format: int64
type: integer
timeoutSeconds:
format: int32
type: integer
type: object
name:
type: string
ports:
items:
properties:
containerPort:
format: int32
type: integer
hostIP:
type: string
hostPort:
format: int32
type: integer
name:
type: string
protocol:
default: TCP
type: string
required:
- containerPort
type: object
type: array
x-kubernetes-list-map-keys:
- containerPort
- protocol
x-kubernetes-list-type: map
readinessProbe:
properties:
exec:
properties:
command:
items:
type: string
type: array
x-kubernetes-list-type: atomic
type: object
failureThreshold:
format: int32
type: integer
grpc:
properties:
port:
format: int32
type: integer
service:
default: ""
type: string
required:
- port
type: object
httpGet:
properties:
host:
type: string
httpHeaders:
items:
properties:
name:
type: string
value:
type: string
required:
- name
- value
type: object
type: array
x-kubernetes-list-type: atomic
path:
type: string
port:
anyOf:
- type: integer
- type: string
x-kubernetes-int-or-string: true
scheme:
type: string
required:
- port
type: object
initialDelaySeconds:
format: int32
type: integer
periodSeconds:
format: int32
type: integer
successThreshold:
format: int32
type: integer
tcpSocket:
properties:
host:
type: string
port:
anyOf:
- type: integer
- type: string
x-kubernetes-int-or-string: true
required:
- port
type: object
terminationGracePeriodSeconds:
format: int64
type: integer
timeoutSeconds:
format: int32
type: integer
type: object
resizePolicy:
items:
properties:
resourceName:
type: string
restartPolicy:
type: string
required:
- resourceName
- restartPolicy
type: object
type: array
x-kubernetes-list-type: atomic
resources:
properties:
claims:
items:
properties:
name:
type: string
request:
type: string
required:
- name
type: object
type: array
x-kubernetes-list-map-keys:
- name
x-kubernetes-list-type: map
limits:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
type: object
requests:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
type: object
type: object
restartPolicy:
type: string
securityContext:
properties:
allowPrivilegeEscalation:
type: boolean
appArmorProfile:
properties:
localhostProfile:
type: string
type:
type: string
required:
- type
type: object
capabilities:
properties:
add:
items:
type: string
type: array
x-kubernetes-list-type: atomic
drop:
items:
type: string
type: array
x-kubernetes-list-type: atomic
type: object
privileged:
type: boolean
procMount:
type: string
readOnlyRootFilesystem:
type: boolean
runAsGroup:
format: int64
type: integer
runAsNonRoot:
type: boolean
runAsUser:
format: int64
type: integer
seLinuxOptions:
properties:
level:
type: string
role:
type: string
type:
type: string
user:
type: string
type: object
seccompProfile:
properties:
localhostProfile:
type: string
type:
type: string
required:
- type
type: object
windowsOptions:
properties:
gmsaCredentialSpec:
type: string
gmsaCredentialSpecName:
type: string
hostProcess:
type: boolean
runAsUserName:
type: string
type: object
type: object
startupProbe:
properties:
exec:
properties:
command:
items:
type: string
type: array
x-kubernetes-list-type: atomic
type: object
failureThreshold:
format: int32
type: integer
grpc:
properties:
port:
format: int32
type: integer
service:
default: ""
type: string
required:
- port
type: object
httpGet:
properties:
host:
type: string
httpHeaders:
items:
properties:
name:
type: string
value:
type: string
required:
- name
- value
type: object
type: array
x-kubernetes-list-type: atomic
path:
type: string
port:
anyOf:
- type: integer
- type: string
x-kubernetes-int-or-string: true
scheme:
type: string
required:
- port
type: object
initialDelaySeconds:
format: int32
type: integer
periodSeconds:
format: int32
type: integer
successThreshold:
format: int32
type: integer
tcpSocket:
properties:
host:
type: string
port:
anyOf:
- type: integer
- type: string
x-kubernetes-int-or-string: true
required:
- port
type: object
terminationGracePeriodSeconds:
format: int64
type: integer
timeoutSeconds:
format: int32
type: integer
type: object
stdin:
type: boolean
stdinOnce:
type: boolean
terminationMessagePath:
type: string
terminationMessagePolicy:
type: string
tty:
type: boolean
volumeDevices:
items:
properties:
devicePath:
type: string
name:
type: string
required:
- devicePath
- name
type: object
type: array
x-kubernetes-list-map-keys:
- devicePath
x-kubernetes-list-type: map
volumeMounts:
items:
properties:
mountPath:
type: string
mountPropagation:
type: string
name:
type: string
readOnly:
type: boolean
recursiveReadOnly:
type: string
subPath:
type: string
subPathExpr:
type: string
required:
- mountPath
- name
type: object
type: array
x-kubernetes-list-map-keys:
- mountPath
x-kubernetes-list-type: map
workingDir:
type: string
required:
- name
type: object
nodeSelector:
additionalProperties:
type: string
type: object
priorityClassName:
type: string
schedulerName:
type: string
serviceAccountName:
type: string
tolerations:
items:
properties:
effect:
type: string
key:
type: string
operator:
type: string
tolerationSeconds:
format: int64
type: integer
value:
type: string
type: object
type: array
topologySpreadConstraints:
items:
properties:
labelSelector:
properties:
matchExpressions:
items:
properties:
key:
type: string
operator:
type: string
values:
items:
type: string
type: array
x-kubernetes-list-type: atomic
required:
- key
- operator
type: object
type: array
x-kubernetes-list-type: atomic
matchLabels:
additionalProperties:
type: string
type: object
type: object
x-kubernetes-map-type: atomic
matchLabelKeys:
items:
type: string
type: array
x-kubernetes-list-type: atomic
maxSkew:
format: int32
type: integer
minDomains:
format: int32
type: integer
nodeAffinityPolicy:
type: string
nodeTaintsPolicy:
type: string
topologyKey:
type: string
whenUnsatisfiable:
type: string
required:
- maxSkew
- topologyKey
- whenUnsatisfiable
type: object
type: array
type: object
imagePullSecrets:
items:
properties:
name:
default: ""
type: string
type: object
x-kubernetes-map-type: atomic
type: array
serviceName:
type: string
required:
- dynamoComponent
type: object
status:
properties:
conditions:
items:
properties:
lastTransitionTime:
format: date-time
type: string
message:
maxLength: 32768
type: string
observedGeneration:
format: int64
minimum: 0
type: integer
reason:
maxLength: 1024
minLength: 1
pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
type: string
status:
enum:
- "True"
- "False"
- Unknown
type: string
type:
maxLength: 316
pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
type: string
required:
- lastTransitionTime
- message
- reason
- status
- type
type: object
type: array
required:
- conditions
type: object
type: object
served: true
storage: true
subresources:
status: {}
......@@ -12,12 +12,14 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.16.4
helm.sh/resource-policy: keep
name: dynamographdeployments.nvidia.com
spec:
group: nvidia.com
......@@ -449,6 +451,8 @@ spec:
minReplicas:
type: integer
type: object
componentType:
type: string
dynamoNamespace:
type: string
envFromSecret:
......@@ -2335,8 +2339,7 @@ spec:
x-kubernetes-list-type: map
workingDir:
type: string
required:
- name
required: []
type: object
nodeSelector:
additionalProperties:
......@@ -2675,8 +2678,6 @@ spec:
type: string
type: object
type: object
required:
- dynamoGraph
type: object
status:
properties:
......
......@@ -24,8 +24,6 @@ rules:
resources:
- configmaps
- events
- pods
- secrets
- services
verbs:
- create
......@@ -47,7 +45,7 @@ rules:
- apiGroups:
- ""
resources:
- serviceaccounts
- pods
verbs:
- get
- list
......@@ -141,7 +139,6 @@ rules:
- nvidia.com
resources:
- dynamocomponentdeployments
- dynamocomponents
- dynamographdeployments
verbs:
- create
......@@ -155,7 +152,6 @@ rules:
- nvidia.com
resources:
- dynamocomponentdeployments/finalizers
- dynamocomponents/finalizers
- dynamographdeployments/finalizers
verbs:
- update
......@@ -163,7 +159,6 @@ rules:
- nvidia.com
resources:
- dynamocomponentdeployments/status
- dynamocomponents/status
- dynamographdeployments/status
verbs:
- get
......
......@@ -5,23 +5,13 @@ go 1.24.0
toolchain go1.24.3
require (
dario.cat/mergo v1.0.1
emperror.dev/errors v0.8.1
github.com/apparentlymart/go-shquot v0.0.1
github.com/awslabs/amazon-ecr-credential-helper/ecr-login v0.9.1
github.com/bsm/gomega v1.27.10
github.com/chrismellard/docker-credential-acr-env v0.0.0-20230304212654-82a0ddb27589
github.com/goccy/go-yaml v1.18.0
github.com/google/go-cmp v0.7.0
github.com/google/go-containerregistry v0.20.5
github.com/huandu/xstrings v1.4.0
github.com/mitchellh/hashstructure/v2 v2.0.2
github.com/imdario/mergo v0.3.6
github.com/onsi/ginkgo/v2 v2.23.4
github.com/onsi/gomega v1.37.0
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.71.2
github.com/rs/xid v1.4.0
github.com/sergeymakinen/go-quote v1.1.0
github.com/sirupsen/logrus v1.9.3
go.etcd.io/etcd/client/v3 v3.5.16
istio.io/api v1.23.1
istio.io/client-go v1.23.1
......@@ -30,48 +20,17 @@ require (
k8s.io/apimachinery v0.32.3
k8s.io/client-go v0.32.3
k8s.io/utils v0.0.0-20241210054802-24370beab758
resty.dev/v3 v3.0.0-beta.2
sigs.k8s.io/controller-runtime v0.20.4
sigs.k8s.io/lws v0.6.1
volcano.sh/apis v1.11.0
)
require (
cloud.google.com/go/compute/metadata v0.6.0 // indirect
github.com/Azure/azure-sdk-for-go v46.4.0+incompatible // indirect
github.com/Azure/go-autorest v14.2.0+incompatible // indirect
github.com/Azure/go-autorest/autorest v0.11.28 // indirect
github.com/Azure/go-autorest/autorest/adal v0.9.21 // indirect
github.com/Azure/go-autorest/autorest/azure/auth v0.5.11 // indirect
github.com/Azure/go-autorest/autorest/azure/cli v0.4.5 // indirect
github.com/Azure/go-autorest/autorest/date v0.3.0 // indirect
github.com/Azure/go-autorest/logger v0.2.1 // indirect
github.com/Azure/go-autorest/tracing v0.6.0 // indirect
github.com/aws/aws-sdk-go-v2 v1.36.1 // indirect
github.com/aws/aws-sdk-go-v2/config v1.29.6 // indirect
github.com/aws/aws-sdk-go-v2/credentials v1.17.59 // indirect
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.28 // indirect
github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.32 // indirect
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.32 // indirect
github.com/aws/aws-sdk-go-v2/internal/ini v1.8.2 // indirect
github.com/aws/aws-sdk-go-v2/service/ecr v1.40.3 // indirect
github.com/aws/aws-sdk-go-v2/service/ecrpublic v1.31.2 // indirect
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.2 // indirect
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.13 // indirect
github.com/aws/aws-sdk-go-v2/service/sso v1.24.15 // indirect
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.28.14 // indirect
github.com/aws/aws-sdk-go-v2/service/sts v1.33.14 // indirect
github.com/aws/smithy-go v1.22.2 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/containerd/stargz-snapshotter/estargz v0.16.3 // indirect
github.com/coreos/go-semver v0.3.1 // indirect
github.com/coreos/go-systemd/v22 v22.5.0 // indirect
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
github.com/dimchansky/utfbom v1.1.1 // indirect
github.com/docker/cli v28.1.1+incompatible // indirect
github.com/docker/distribution v2.8.3+incompatible // indirect
github.com/docker/docker-credential-helpers v0.9.3 // indirect
github.com/emicklei/go-restful/v3 v3.12.0 // indirect
github.com/evanphx/json-patch v5.7.0+incompatible // indirect
github.com/evanphx/json-patch/v5 v5.9.11 // indirect
......@@ -84,7 +43,6 @@ require (
github.com/go-openapi/swag v0.23.0 // indirect
github.com/go-task/slim-sprig/v3 v3.0.0 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang-jwt/jwt/v4 v4.5.2 // indirect
github.com/golang/protobuf v1.5.4 // indirect
github.com/google/btree v1.1.3 // indirect
github.com/google/gnostic-models v0.6.8 // indirect
......@@ -95,12 +53,9 @@ require (
github.com/json-iterator/go v1.1.12 // indirect
github.com/klauspost/compress v1.18.0 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/mitchellh/go-homedir v1.1.0 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/opencontainers/go-digest v1.0.0 // indirect
github.com/opencontainers/image-spec v1.1.1 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/prometheus/client_golang v1.20.2 // indirect
github.com/prometheus/client_model v0.6.1 // indirect
......@@ -108,7 +63,7 @@ require (
github.com/prometheus/procfs v0.15.1 // indirect
github.com/rogpeppe/go-internal v1.13.1 // indirect
github.com/spf13/pflag v1.0.6 // indirect
github.com/vbatts/tar-split v0.12.1 // indirect
github.com/stretchr/testify v1.10.0 // indirect
github.com/x448/float16 v0.8.4 // indirect
go.etcd.io/etcd/api/v3 v3.5.16 // indirect
go.etcd.io/etcd/client/pkg/v3 v3.5.16 // indirect
......@@ -118,7 +73,6 @@ require (
go.uber.org/automaxprocs v1.6.0 // indirect
go.uber.org/multierr v1.11.0 // indirect
go.uber.org/zap v1.27.0 // indirect
golang.org/x/crypto v0.38.0 // indirect
golang.org/x/net v0.40.0 // indirect
golang.org/x/oauth2 v0.30.0 // indirect
golang.org/x/sync v0.14.0 // indirect
......@@ -135,7 +89,6 @@ require (
gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
gotest.tools/v3 v3.5.2 // indirect
k8s.io/klog/v2 v2.130.1 // indirect
k8s.io/kube-openapi v0.0.0-20241105132330-32ad38e42d3f // indirect
sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 // indirect
......
cloud.google.com/go/compute/metadata v0.6.0 h1:A6hENjEsCDtC1k8byVsgwvVcioamEHvZ4j01OwKxG9I=
cloud.google.com/go/compute/metadata v0.6.0/go.mod h1:FjyFAW1MW0C203CEOMDTu3Dk1FlqW3Rga40jzHL4hfg=
dario.cat/mergo v1.0.1 h1:Ra4+bf83h2ztPIQYNP99R6m+Y7KfnARDfID+a+vLl4s=
dario.cat/mergo v1.0.1/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk=
emperror.dev/errors v0.8.1 h1:UavXZ5cSX/4u9iyvH6aDcuGkVjeexUGJ7Ij7G4VfQT0=
emperror.dev/errors v0.8.1/go.mod h1:YcRvLPh626Ubn2xqtoprejnA5nFha+TJ+2vew48kWuE=
github.com/Azure/azure-sdk-for-go v46.4.0+incompatible h1:fCN6Pi+tEiEwFa8RSmtVlFHRXEZ+DJm9gfx/MKqYWw4=
github.com/Azure/azure-sdk-for-go v46.4.0+incompatible/go.mod h1:9XXNKU+eRnpl9moKnB4QOLf1HestfXbmab5FXxiDBjc=
github.com/Azure/go-autorest v14.2.0+incompatible h1:V5VMDjClD3GiElqLWO7mz2MxNAK/vTfRHdAubSIPRgs=
github.com/Azure/go-autorest v14.2.0+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSWATqVooLgysK6ZNox3g/xq24=
github.com/Azure/go-autorest/autorest v0.11.24/go.mod h1:G6kyRlFnTuSbEYkQGawPfsCswgme4iYf6rfSKUDzbCc=
github.com/Azure/go-autorest/autorest v0.11.28 h1:ndAExarwr5Y+GaHE6VCaY1kyS/HwwGGyuimVhWsHOEM=
github.com/Azure/go-autorest/autorest v0.11.28/go.mod h1:MrkzG3Y3AH668QyF9KRk5neJnGgmhQ6krbhR8Q5eMvA=
github.com/Azure/go-autorest/autorest/adal v0.9.18/go.mod h1:XVVeme+LZwABT8K5Lc3hA4nAe8LDBVle26gTrguhhPQ=
github.com/Azure/go-autorest/autorest/adal v0.9.21 h1:jjQnVFXPfekaqb8vIsv2G1lxshoW+oGv4MDlhRtnYZk=
github.com/Azure/go-autorest/autorest/adal v0.9.21/go.mod h1:zua7mBUaCc5YnSLKYgGJR/w5ePdMDA6H56upLsHzA9U=
github.com/Azure/go-autorest/autorest/azure/auth v0.5.11 h1:P6bYXFoao05z5uhOQzbC3Qd8JqF3jUoocoTeIxkp2cA=
github.com/Azure/go-autorest/autorest/azure/auth v0.5.11/go.mod h1:84w/uV8E37feW2NCJ08uT9VBfjfUHpgLVnG2InYD6cg=
github.com/Azure/go-autorest/autorest/azure/cli v0.4.5 h1:0W/yGmFdTIT77fvdlGZ0LMISoLHFJ7Tx4U0yeB+uFs4=
github.com/Azure/go-autorest/autorest/azure/cli v0.4.5/go.mod h1:ADQAXrkgm7acgWVUNamOgh8YNrv4p27l3Wc55oVfpzg=
github.com/Azure/go-autorest/autorest/date v0.3.0 h1:7gUk1U5M/CQbp9WoqinNzJar+8KY+LPI6wiWrP/myHw=
github.com/Azure/go-autorest/autorest/date v0.3.0/go.mod h1:BI0uouVdmngYNUzGWeSYnokU+TrmwEsOqdt8Y6sso74=
github.com/Azure/go-autorest/autorest/mocks v0.4.1/go.mod h1:LTp+uSrOhSkaKrUy935gNZuuIPPVsHlr9DSOxSayd+k=
github.com/Azure/go-autorest/autorest/mocks v0.4.2 h1:PGN4EDXnuQbojHbU0UWoNvmu9AGVwYHG9/fkDYhtAfw=
github.com/Azure/go-autorest/autorest/mocks v0.4.2/go.mod h1:Vy7OitM9Kei0i1Oj+LvyAWMXJHeKH1MVlzFugfVrmyU=
github.com/Azure/go-autorest/logger v0.2.1 h1:IG7i4p/mDa2Ce4TRyAO8IHnVhAVF3RFU+ZtXWSmf4Tg=
github.com/Azure/go-autorest/logger v0.2.1/go.mod h1:T9E3cAhj2VqvPOtCYAvby9aBXkZmbF5NWuPV8+WeEW8=
github.com/Azure/go-autorest/tracing v0.6.0 h1:TYi4+3m5t6K48TGI9AUdb+IzbnSxvnvUMfuitfgcfuo=
github.com/Azure/go-autorest/tracing v0.6.0/go.mod h1:+vhtPC754Xsa23ID7GlGsrdKBpUA79WCAKPPZVC2DeU=
github.com/apparentlymart/go-shquot v0.0.1 h1:MGV8lwxF4zw75lN7e0MGs7o6AFYn7L6AZaExUpLh0Mo=
github.com/apparentlymart/go-shquot v0.0.1/go.mod h1:lw58XsE5IgUXZ9h0cxnypdx31p9mPFIVEQ9P3c7MlrU=
github.com/aws/aws-sdk-go-v2 v1.36.1 h1:iTDl5U6oAhkNPba0e1t1hrwAo02ZMqbrGq4k5JBWM5E=
github.com/aws/aws-sdk-go-v2 v1.36.1/go.mod h1:5PMILGVKiW32oDzjj6RU52yrNrDPUHcbZQYr1sM7qmM=
github.com/aws/aws-sdk-go-v2/config v1.29.6 h1:fqgqEKK5HaZVWLQoLiC9Q+xDlSp+1LYidp6ybGE2OGg=
github.com/aws/aws-sdk-go-v2/config v1.29.6/go.mod h1:Ft+WLODzDQmCTHDvqAH1JfC2xxbZ0MxpZAcJqmE1LTQ=
github.com/aws/aws-sdk-go-v2/credentials v1.17.59 h1:9btwmrt//Q6JcSdgJOLI98sdr5p7tssS9yAsGe8aKP4=
github.com/aws/aws-sdk-go-v2/credentials v1.17.59/go.mod h1:NM8fM6ovI3zak23UISdWidyZuI1ghNe2xjzUZAyT+08=
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.28 h1:KwsodFKVQTlI5EyhRSugALzsV6mG/SGrdjlMXSZSdso=
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.28/go.mod h1:EY3APf9MzygVhKuPXAc5H+MkGb8k/DOSQjWS0LgkKqI=
github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.32 h1:BjUcr3X3K0wZPGFg2bxOWW3VPN8rkE3/61zhP+IHviA=
github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.32/go.mod h1:80+OGC/bgzzFFTUmcuwD0lb4YutwQeKLFpmt6hoWapU=
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.32 h1:m1GeXHVMJsRsUAqG6HjZWx9dj7F5TR+cF1bjyfYyBd4=
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.32/go.mod h1:IitoQxGfaKdVLNg0hD8/DXmAqNy0H4K2H2Sf91ti8sI=
github.com/aws/aws-sdk-go-v2/internal/ini v1.8.2 h1:Pg9URiobXy85kgFev3og2CuOZ8JZUBENF+dcgWBaYNk=
github.com/aws/aws-sdk-go-v2/internal/ini v1.8.2/go.mod h1:FbtygfRFze9usAadmnGJNc8KsP346kEe+y2/oyhGAGc=
github.com/aws/aws-sdk-go-v2/service/ecr v1.40.3 h1:a+210FCU/pR5hhKRaskRfX/ogcyyzFBrehcTk5DTAyU=
github.com/aws/aws-sdk-go-v2/service/ecr v1.40.3/go.mod h1:dtD3a4sjUjVL86e0NUvaqdGvds5ED6itUiZPDaT+Gh8=
github.com/aws/aws-sdk-go-v2/service/ecrpublic v1.31.2 h1:E6/Myrj9HgLF22medmDrKmbpm4ULsa+cIBNx3phirBk=
github.com/aws/aws-sdk-go-v2/service/ecrpublic v1.31.2/go.mod h1:OQ8NALFcchBJ/qruak6zKUQodovnTKKaReTuCkc5/9Y=
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.2 h1:D4oz8/CzT9bAEYtVhSBmFj2dNOtaHOtMKc2vHBwYizA=
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.2/go.mod h1:Za3IHqTQ+yNcRHxu1OFucBh0ACZT4j4VQFF0BqpZcLY=
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.13 h1:SYVGSFQHlchIcy6e7x12bsrxClCXSP5et8cqVhL8cuw=
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.13/go.mod h1:kizuDaLX37bG5WZaoxGPQR/LNFXpxp0vsUnqfkWXfNE=
github.com/aws/aws-sdk-go-v2/service/sso v1.24.15 h1:/eE3DogBjYlvlbhd2ssWyeuovWunHLxfgw3s/OJa4GQ=
github.com/aws/aws-sdk-go-v2/service/sso v1.24.15/go.mod h1:2PCJYpi7EKeA5SkStAmZlF6fi0uUABuhtF8ILHjGc3Y=
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.28.14 h1:M/zwXiL2iXUrHputuXgmO94TVNmcenPHxgLXLutodKE=
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.28.14/go.mod h1:RVwIw3y/IqxC2YEXSIkAzRDdEU1iRabDPaYjpGCbCGQ=
github.com/aws/aws-sdk-go-v2/service/sts v1.33.14 h1:TzeR06UCMUq+KA3bDkujxK1GVGy+G8qQN/QVYzGLkQE=
github.com/aws/aws-sdk-go-v2/service/sts v1.33.14/go.mod h1:dspXf/oYWGWo6DEvj98wpaTeqt5+DMidZD0A9BYTizc=
github.com/aws/smithy-go v1.22.2 h1:6D9hW43xKFrRx/tXXfAlIZc4JI+yQe6snnWcQyxSyLQ=
github.com/aws/smithy-go v1.22.2/go.mod h1:irrKGvNn1InZwb2d7fkIRNucdfwR8R+Ts3wxYa/cJHg=
github.com/awslabs/amazon-ecr-credential-helper/ecr-login v0.9.1 h1:50sS0RWhGpW/yZx2KcDNEb1u1MANv5BMEkJgcieEDTA=
github.com/awslabs/amazon-ecr-credential-helper/ecr-login v0.9.1/go.mod h1:ErZOtbzuHabipRTDTor0inoRlYwbsV1ovwSxjGs/uJo=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA=
github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0=
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/chrismellard/docker-credential-acr-env v0.0.0-20230304212654-82a0ddb27589 h1:krfRl01rzPzxSxyLyrChD+U+MzsBXbm0OwYYB67uF+4=
github.com/chrismellard/docker-credential-acr-env v0.0.0-20230304212654-82a0ddb27589/go.mod h1:OuDyvmLnMCwa2ep4Jkm6nyA0ocJuZlGyk2gGseVzERM=
github.com/containerd/stargz-snapshotter/estargz v0.16.3 h1:7evrXtoh1mSbGj/pfRccTampEyKpjpOnS3CyiV1Ebr8=
github.com/containerd/stargz-snapshotter/estargz v0.16.3/go.mod h1:uyr4BfYfOj3G9WBVE8cOlQmXAbPN9VEQpBBeJIuOipU=
github.com/coreos/go-semver v0.3.1 h1:yi21YpKnrx1gt5R+la8n5WgS0kCrsPp33dmEyHReZr4=
github.com/coreos/go-semver v0.3.1/go.mod h1:irMmmIw/7yzSRPWryHsK7EYSg09caPQL03VsM8rvUec=
github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs=
......@@ -79,14 +14,6 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dimchansky/utfbom v1.1.1 h1:vV6w1AhK4VMnhBno/TPVCoK9U/LP0PkLCS9tbxHdi/U=
github.com/dimchansky/utfbom v1.1.1/go.mod h1:SxdoEBH5qIqFocHMyGOXVAybYJdr71b1Q/j0mACtrfE=
github.com/docker/cli v28.1.1+incompatible h1:eyUemzeI45DY7eDPuwUcmDyDj1pM98oD5MdSpiItp8k=
github.com/docker/cli v28.1.1+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8=
github.com/docker/distribution v2.8.3+incompatible h1:AtKxIZ36LoNK51+Z6RpzLpddBirtxJnzDrHLEKxTAYk=
github.com/docker/distribution v2.8.3+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w=
github.com/docker/docker-credential-helpers v0.9.3 h1:gAm/VtF9wgqJMoxzT3Gj5p4AqIjCBS4wrsOh9yRqcz8=
github.com/docker/docker-credential-helpers v0.9.3/go.mod h1:x+4Gbw9aGmChi3qTLZj8Dfn0TD20M/fuWy0E5+WDeCo=
github.com/emicklei/go-restful/v3 v3.12.0 h1:y2DdzBAURM29NFF94q6RaY4vjIH1rtwDapwQtU84iWk=
github.com/emicklei/go-restful/v3 v3.12.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
github.com/evanphx/json-patch v5.7.0+incompatible h1:vgGkfT/9f8zE6tvSCe74nfpAVDQ2tG6yudJd8LBksgI=
......@@ -111,15 +38,9 @@ github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+Gr
github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ=
github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
github.com/goccy/go-yaml v1.18.0 h1:8W7wMFS12Pcas7KU+VVkaiCng+kG8QiFeFwzFb+rwuw=
github.com/goccy/go-yaml v1.18.0/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA=
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
github.com/golang-jwt/jwt/v4 v4.0.0/go.mod h1:/xlHOz8bRuivTWchD4jCa+NbatV+wEUSzwAxVc6locg=
github.com/golang-jwt/jwt/v4 v4.2.0/go.mod h1:/xlHOz8bRuivTWchD4jCa+NbatV+wEUSzwAxVc6locg=
github.com/golang-jwt/jwt/v4 v4.5.2 h1:YtQM7lnr8iZ+j5q71MGKkNw9Mn7AjHM68uc9g5fXeUI=
github.com/golang-jwt/jwt/v4 v4.5.2/go.mod h1:m21LjoU+eqJr34lmDMbreY2eSTRJ1cv77w39/MY0Ch0=
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
github.com/google/btree v1.1.3 h1:CVpQJjYgC4VbzxeGVHfvZrv1ctoYCAI8vbl07Fcxlyg=
......@@ -129,8 +50,6 @@ github.com/google/gnostic-models v0.6.8/go.mod h1:5n7qKqH0f5wFt+aWF8CW6pZLLNOfYu
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
github.com/google/go-containerregistry v0.20.5 h1:4RnlYcDs5hoA++CeFjlbZ/U9Yp1EuWr+UhhTyYQjOP0=
github.com/google/go-containerregistry v0.20.5/go.mod h1:Q14vdOOzug02bwnhMkZKD4e30pDaD9W65qzXpyzF49E=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0=
github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
......@@ -138,8 +57,8 @@ github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 h1:BHT72Gu3keYf3ZEu2J
github.com/google/pprof v0.0.0-20250403155104-27863c87afa6/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/huandu/xstrings v1.4.0 h1:D17IlohoQq4UcpqD7fDk80P7l+lwAmlFaBHgOipl2FU=
github.com/huandu/xstrings v1.4.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
github.com/imdario/mergo v0.3.6 h1:xTNEAn+kxVO7dTZGu0CegyqKZmoWFI0rF8UxjlB2d28=
github.com/imdario/mergo v0.3.6/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA=
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
......@@ -156,10 +75,6 @@ github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
github.com/mitchellh/hashstructure/v2 v2.0.2 h1:vGKWl0YJqUNxE8d+h8f6NJLcCJrgbhC4NcD46KavDd4=
github.com/mitchellh/hashstructure/v2 v2.0.2/go.mod h1:MG3aRVU/N29oo/V/IhBX8GR/zz4kQkprJgF2EVszyDE=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
......@@ -171,10 +86,6 @@ github.com/onsi/ginkgo/v2 v2.23.4 h1:ktYTpKJAVZnDT4VjxSbiBenUjmlL/5QkBEocaWXiQus
github.com/onsi/ginkgo/v2 v2.23.4/go.mod h1:Bt66ApGPBFzHyR+JO10Zbt0Gsp4uWxu5mIOTusL46e8=
github.com/onsi/gomega v1.37.0 h1:CdEG8g0S133B4OswTDC/5XPSzE1OeP29QOioj2PID2Y=
github.com/onsi/gomega v1.37.0/go.mod h1:8D9+Txp43QWKhM24yyOBEdpkzN8FvJyAwecBgsU4KU0=
github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJwooC2xJA040=
github.com/opencontainers/image-spec v1.1.1/go.mod h1:qpqAh3Dmcf36wStyyWU+kCeDgrGnAve2nCC8+7h8Q0M=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
......@@ -194,21 +105,12 @@ github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0leargg
github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
github.com/rs/xid v1.4.0 h1:qd7wPTDkN6KQx2VmMBLrpHkiyQwgFXRnkOLacUiaSNY=
github.com/rs/xid v1.4.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
github.com/sergeymakinen/go-quote v1.1.0 h1:mwCRejFVH26bf6TFaBNdXixeB5LtNU1yVHrfsNAmnjc=
github.com/sergeymakinen/go-quote v1.1.0/go.mod h1:AuXYBfIQbIXlzf9KawRyfSxc/YGAyVLtMUUtmc5oGHA=
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o=
github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/vbatts/tar-split v0.12.1 h1:CqKoORW7BUWBe7UL/iqTVvkTBOF8UvOMKOIZykxnnbo=
github.com/vbatts/tar-split v0.12.1/go.mod h1:eF6B6i6ftWQcDqEn3/iGFRFRo8cBIMSJVOpnNdfTMFA=
github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
......@@ -244,19 +146,12 @@ go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.0.0-20211215153901-e495a2d5b3d3/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
golang.org/x/crypto v0.0.0-20220722155217-630584e8d5aa/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
golang.org/x/crypto v0.38.0 h1:jt+WWG8IZlBnVbomuhg2Mdq0+BBQaHbtqHEFEigjUV8=
golang.org/x/crypto v0.38.0/go.mod h1:MvrbAqul58NNYPKnOra203SB9vpuZW0e+RRZV+Ggqjw=
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.40.0 h1:79Xs7wF06Gbdcg4kdCCIQArK11Z1hr5POQ6+fIYHNuY=
golang.org/x/net v0.40.0/go.mod h1:y0hY0exeL2Pku80/zKK7tpntoX23cqL3Oa6njdgRtds=
golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI=
......@@ -269,18 +164,12 @@ golang.org/x/sync v0.14.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw=
golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.32.0 h1:DR4lr0TjUs3epypdhTOkMmuF5CDFJ/8pOnbzMZPQ7bg=
golang.org/x/term v0.32.0/go.mod h1:uZG1FhGx848Sqfsq4/DlJr3xGGsYMu/L5GW4abiaEPQ=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.25.0 h1:qVyWApTSYLk/drJRO5mDlNYskwQznZmkpV2c8q9zls4=
golang.org/x/text v0.25.0/go.mod h1:WEdwpYrmk1qmdHvhkSTNPm3app7v4rsT8F2UD6+VHIA=
golang.org/x/time v0.7.0 h1:ntUhktv3OPE6TgYxXWv9vKvUSJyIFJlyohwbkEwPrKQ=
......@@ -312,11 +201,10 @@ gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSP
gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M=
gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=
gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gotest.tools/v3 v3.5.2 h1:7koQfIKdy+I8UTetycgUqXWSDwpgv193Ka+qRsmBY8Q=
gotest.tools/v3 v3.5.2/go.mod h1:LtdLGcnqToBH83WByAAi/wiwSFCArdFIUV/xxN4pcjA=
istio.io/api v1.23.1 h1:bm2XF0j058FfzWVHUfpmMj4sFDkcD1X609qs5AU97Pc=
istio.io/api v1.23.1/go.mod h1:QPSTGXuIQdnZFEm3myf9NZ5uBMwCdJWUvfj9ZZ+2oBM=
istio.io/client-go v1.23.1 h1:IX2cgUUXnVYo+9H6bFGSp/vuKVLPUkmiN8qk1/mvsYs=
......@@ -335,8 +223,6 @@ k8s.io/kube-openapi v0.0.0-20241105132330-32ad38e42d3f h1:GA7//TjRY9yWGy1poLzYYJ
k8s.io/kube-openapi v0.0.0-20241105132330-32ad38e42d3f/go.mod h1:R/HEjbvWI0qdfb8viZUeVZm0X6IZnxAydC7YU42CMw4=
k8s.io/utils v0.0.0-20241210054802-24370beab758 h1:sdbE21q2nlQtFh65saZY+rRM6x6aJJI8IUa1AmH/qa0=
k8s.io/utils v0.0.0-20241210054802-24370beab758/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
resty.dev/v3 v3.0.0-beta.2 h1:xu4mGAdbCLuc3kbk7eddWfWm4JfhwDtdapwss5nCjnQ=
resty.dev/v3 v3.0.0-beta.2/go.mod h1:OgkqiPvTDtOuV4MGZuUDhwOpkY8enjOsjjMzeOHefy4=
sigs.k8s.io/controller-runtime v0.20.4 h1:X3c+Odnxz+iPTRobG4tp092+CvBU9UK0t/bRf+n0DGU=
sigs.k8s.io/controller-runtime v0.20.4/go.mod h1:xg2XB0K5ShQzAgsoujxuKN4LNXR2LfwwHsPj7Iaw+XY=
sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 h1:/Rv+M11QRah1itp8VhT6HoVx1Ray9eB4DBr+K+/sCJ8=
......
/*
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package archive
import (
"archive/tar"
"bytes"
"fmt"
"io"
"path/filepath"
)
func ExtractFileFromTar(tarData []byte, fileName string) (*bytes.Buffer, error) {
// Create a tar reader
tarReader := tar.NewReader(bytes.NewReader(tarData))
// Iterate through tar archive
for {
header, err := tarReader.Next()
if err == io.EOF {
break // End of archive
}
if err != nil {
return nil, fmt.Errorf("error reading tar file: %w", err)
}
// Check if the current file is the desired YAML file
if header.Typeflag == tar.TypeReg && (header.Name == fileName || filepath.Base(header.Name) == fileName) {
var content bytes.Buffer
_, err = content.ReadFrom(tarReader)
if err != nil {
return nil, fmt.Errorf("error extracting file: %w", err)
}
return &content, nil
}
}
return nil, fmt.Errorf("file %s not found in tar archive", fileName)
}
/*
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package archive
import (
"bytes"
"os"
"reflect"
"testing"
)
func TestExtractFileFromTar(t *testing.T) {
// read test.tar file
// it contains test.yaml at the root
tarData, err := os.ReadFile("test.tar")
if err != nil {
t.Fatalf("Failed to read test.tar: %v", err)
}
// read test2.tar file
// it contains test2.yaml inside a folder
tarData2, err := os.ReadFile("test2.tar")
if err != nil {
t.Fatalf("Failed to read test2.tar: %v", err)
}
type args struct {
tarData []byte
yamlFileName string
}
tests := []struct {
name string
args args
want *bytes.Buffer
wantErr bool
}{
{
name: "Test ExtractFileFromTar",
args: args{
tarData: tarData,
yamlFileName: "test.yaml",
},
want: bytes.NewBufferString("property1: true\n"),
wantErr: false,
},
{
name: "Test ExtractFileFromTar",
args: args{
tarData: tarData2,
yamlFileName: "test.yaml",
},
want: bytes.NewBufferString("property1: true\n"),
wantErr: false,
},
{
name: "Test ExtractFileFromTar, file not found",
args: args{
tarData: tarData,
yamlFileName: "test2.yaml",
},
want: nil,
wantErr: true,
},
{
name: "Test ExtractFileFromTar, invalid content",
args: args{
tarData: []byte("invalid content"),
yamlFileName: "test.yaml",
},
want: nil,
wantErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := ExtractFileFromTar(tt.args.tarData, tt.args.yamlFileName)
if (err != nil) != tt.wantErr {
t.Errorf("ExtractFileFromTar() error = %v, wantErr %v", err, tt.wantErr)
return
}
if !reflect.DeepEqual(got, tt.want) {
t.Errorf("ExtractFileFromTar() = %v, want %v", got, tt.want)
}
})
}
}
package common
import (
"fmt"
"net/url"
"strings"
)
func GetHost(someURL string) (string, error) {
// Add scheme if not present
if !strings.Contains(someURL, "://") {
someURL = "dummy://" + someURL
}
url, err := url.Parse(someURL)
if err != nil {
return "", err
}
if url.Host == "" {
return "", fmt.Errorf("no host found in URL %q", someURL)
}
return url.Host, nil
}
package common
import "testing"
func TestGetHost(t *testing.T) {
type args struct {
someURL string
}
tests := []struct {
name string
args args
want string
wantErr bool
}{
{
name: "docker.io",
args: args{
someURL: "docker.io",
},
want: "docker.io",
wantErr: false,
},
{
name: "gitlab-master.nvidia.com:5005",
args: args{
someURL: "gitlab-master.nvidia.com:5005",
},
want: "gitlab-master.nvidia.com:5005",
wantErr: false,
},
{
name: "gitlab-master.nvidia.com:5005/registry",
args: args{
someURL: "gitlab-master.nvidia.com:5005/registry",
},
want: "gitlab-master.nvidia.com:5005",
wantErr: false,
},
{
name: "https://gitlab-master.nvidia.com",
args: args{
someURL: "https://gitlab-master.nvidia.com",
},
want: "gitlab-master.nvidia.com",
wantErr: false,
},
{
name: "https://gitlab-master.nvidia.com:5005/registry",
args: args{
someURL: "https://gitlab-master.nvidia.com:5005/registry",
},
want: "gitlab-master.nvidia.com:5005",
wantErr: false,
},
{
name: "empty",
args: args{
someURL: "",
},
want: "",
wantErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := GetHost(tt.args.someURL)
if (err != nil) != tt.wantErr {
t.Errorf("GetHost() error = %v, wantErr %v", err, tt.wantErr)
return
}
if got != tt.want {
t.Errorf("GetHost() = %v, want %v", got, tt.want)
}
})
}
}
/*
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package config
import (
"context"
"os"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
)
func GetDynamoImageBuilderNamespace(ctx context.Context) (namespace string, err error) {
return os.Getenv(consts.EnvDynamoImageBuilderNamespace), nil
}
type DockerRegistryConfig struct {
DynamoComponentsRepositoryName string `yaml:"dynamo_components_repository_name"`
Server string `yaml:"server"`
SecretName string `yaml:"secret_name"`
Secure bool `yaml:"secure"`
}
func GetDockerRegistryConfig() *DockerRegistryConfig {
return &DockerRegistryConfig{
DynamoComponentsRepositoryName: os.Getenv(consts.EnvDockerRegistryDynamoComponentsRepositoryName),
Server: os.Getenv(consts.EnvDockerRegistryServer),
SecretName: os.Getenv(consts.EnvDockerRegistrySecret),
Secure: os.Getenv(consts.EnvDockerRegistrySecure) == "true",
}
}
type ApiStoreConfig struct {
Endpoint string `yaml:"endpoint"`
ClusterName string `yaml:"cluster_name"`
ApiToken string `yaml:"api_token"`
}
func GetApiStoreConfig(ctx context.Context) (conf *ApiStoreConfig, err error) {
return &ApiStoreConfig{
Endpoint: os.Getenv(consts.EnvApiStoreEndpoint),
ClusterName: os.Getenv(consts.EnvApiStoreClusterName),
ApiToken: os.Getenv(consts.EnvApiStoreApiToken),
}, nil
}
func getEnv(key, fallback string) string {
if value, ok := os.LookupEnv(key); ok {
return value
}
return fallback
}
type InternalImages struct {
DynamoComponentsDownloader string
Kaniko string
Buildkit string
BuildkitRootless string
}
func GetInternalImages() (conf *InternalImages) {
conf = &InternalImages{}
conf.DynamoComponentsDownloader = getEnv(consts.EnvInternalImagesDynamoComponentsDownloader, consts.InternalImagesDynamoComponentsDownloaderDefault)
conf.Kaniko = getEnv(consts.EnvInternalImagesKaniko, consts.InternalImagesKanikoDefault)
conf.Buildkit = getEnv(consts.EnvInternalImagesBuildkit, consts.InternalImagesBuildkitDefault)
conf.BuildkitRootless = getEnv(consts.EnvInternalImagesBuildkitRootless, consts.InternalImagesBuildkitRootlessDefault)
return
}
......@@ -13,63 +13,24 @@ const (
DynamoHealthPort = 5000
DynamoHealthPortName = "health"
DynamoImageBuilderComponentName = "dynamo-image-builder"
DynamoApiServerComponentName = "api-server"
InternalImagesDynamoComponentsDownloaderDefault = "rapidfort/curl:latest"
InternalImagesKanikoDefault = "gcr.io/kaniko-project/executor:debug"
InternalImagesBuildkitDefault = "moby/buildkit:v0.20.2"
InternalImagesBuildkitRootlessDefault = "moby/buildkit:v0.20.2-rootless"
EnvApiStoreEndpoint = "API_STORE_ENDPOINT"
EnvApiStoreClusterName = "API_STORE_CLUSTER_NAME"
// nolint: gosec
EnvApiStoreApiToken = "API_STORE_API_TOKEN"
EnvDynamoServicePort = "DYNAMO_PORT"
EnvDockerRegistryServer = "DOCKER_REGISTRY_SERVER"
EnvDockerRegistrySecret = "DOCKER_REGISTRY_SECRET_NAME"
EnvDockerRegistrySecure = "DOCKER_REGISTRY_SECURE"
EnvDockerRegistryDynamoComponentsRepositoryName = "DOCKER_REGISTRY_DYNAMO_COMPONENTS_REPOSITORY_NAME"
EnvInternalImagesDynamoComponentsDownloader = "INTERNAL_IMAGES_DYNAMO_COMPONENTS_DOWNLOADER"
EnvInternalImagesKaniko = "INTERNAL_IMAGES_KANIKO"
EnvInternalImagesBuildkit = "INTERNAL_IMAGES_BUILDKIT"
EnvInternalImagesBuildkitRootless = "INTERNAL_IMAGES_BUILDKIT_ROOTLESS"
EnvDynamoSystemNamespace = "DYNAMO_SYSTEM_NAMESPACE"
EnvDynamoImageBuilderNamespace = "DYNAMO_IMAGE_BUILDER_NAMESPACE"
KubeLabelDynamoSelector = "nvidia.com/selector"
KubeLabelDynamoComponent = "nvidia.com/dynamo-component"
KubeLabelDynamoNamespace = "nvidia.com/dynamo-namespace"
KubeLabelDynamoDeploymentTargetType = "nvidia.com/dynamo-deployment-target-type"
KubeLabelDynamoComponentType = "nvidia.com/dynamo-component-type"
KubeLabelIsDynamoImageBuilder = "nvidia.com/is-dynamo-image-builder"
KubeLabelValueFalse = "false"
KubeLabelValueTrue = "true"
KubeLabelDynamoImageBuilderPod = "nvidia.com/dynamo-image-builder-pod"
KubeLabelDynamoComponentPod = "nvidia.com/dynamo-component-pod"
KubeAnnotationDynamoRepository = "nvidia.com/dynamo-repository"
KubeAnnotationDynamoVersion = "nvidia.com/dynamo-version"
KubeAnnotationDynamoDockerRegistryInsecure = "nvidia.com/docker-registry-insecure"
KubeResourceGPUNvidia = "nvidia.com/gpu"
KubeAnnotationDynamoComponentHash = "nvidia.com/dynamo-request-hash"
KubeAnnotationDynamoComponentImageBuiderHash = "nvidia.com/dynamo-request-image-builder-hash"
KubeAnnotationDynamoComponentStorageNS = "nvidia.com/dynamo-storage-namespace"
DynamoDeploymentConfigEnvVar = "DYN_DEPLOYMENT_CONFIG"
DockerConfigVolumeName = "docker-config"
DockerConfigVolumeMountPath = "/docker-config/.docker"
ComponentTypePlanner = "planner"
ComponentTypeMain = "main"
PlannerServiceAccountName = "planner-serviceaccount"
)
......@@ -19,7 +19,6 @@ package controller
import (
"fmt"
"strings"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
corev1 "k8s.io/api/core/v1"
......@@ -64,13 +63,7 @@ func getIngressHost(ingressSpec v1alpha1.IngressSpec) string {
return fmt.Sprintf("%s.%s", host, ingressSuffix)
}
func getK8sName(value string) string {
return strings.ReplaceAll(value, ":", "--")
}
func isGoogleRegistry(host string) bool {
return host == "gcr.io" ||
strings.HasSuffix(host, ".gcr.io") ||
strings.HasSuffix(host, ".pkg.dev") ||
strings.HasSuffix(host, ".google.com")
type dockerSecretRetriever interface {
// returns a list of secret names associated with the docker registry
GetSecrets(namespace, registry string) ([]string, error)
}
/*
* SPDX-FileCopyrightText: Copyright (c) 2022 Atalaya Tech. Inc
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES
*/
package controller
import (
"bytes"
"context"
"crypto/md5"
"encoding/hex"
"fmt"
"os"
"path/filepath"
"strconv"
"strings"
"text/template"
"time"
"emperror.dev/errors"
commonconfig "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/config"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
commonconsts "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/controller_common"
"github.com/apparentlymart/go-shquot/shquot"
"github.com/awslabs/amazon-ecr-credential-helper/ecr-login"
"github.com/chrismellard/docker-credential-acr-env/pkg/credhelper"
"github.com/goccy/go-yaml"
"github.com/google/go-containerregistry/pkg/authn"
"github.com/google/go-containerregistry/pkg/name"
"github.com/google/go-containerregistry/pkg/v1/google"
"github.com/google/go-containerregistry/pkg/v1/remote"
"github.com/huandu/xstrings"
"github.com/mitchellh/hashstructure/v2"
"github.com/rs/xid"
"github.com/sergeymakinen/go-quote/unix"
"github.com/sirupsen/logrus"
batchv1 "k8s.io/api/batch/v1"
corev1 "k8s.io/api/core/v1"
k8serrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/meta"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/tools/record"
"k8s.io/utils/ptr"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/predicate"
apiStoreClient "github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/api_store_client"
dynamoCommon "github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/common"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/schemas"
nvidiacomv1alpha1 "github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
)
// DynamoComponentReconciler reconciles a DynamoComponent object
type DynamoComponentReconciler struct {
client.Client
Scheme *runtime.Scheme
Recorder record.EventRecorder
Config controller_common.Config
}
// +kubebuilder:rbac:groups=nvidia.com,resources=dynamocomponents,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=nvidia.com,resources=dynamocomponents/status,verbs=get;update;patch
// +kubebuilder:rbac:groups=nvidia.com,resources=dynamocomponents/finalizers,verbs=update
//+kubebuilder:rbac:groups=nvidia.com,resources=dynamocomponents,verbs=get;list;watch;create;update;patch;delete
//+kubebuilder:rbac:groups=nvidia.com,resources=dynamocomponents/status,verbs=get;update;patch
//+kubebuilder:rbac:groups=events.k8s.io,resources=events,verbs=get;list;watch;create;update;patch;delete
//+kubebuilder:rbac:groups=core,resources=events,verbs=get;list;watch;create;update;patch;delete
//+kubebuilder:rbac:groups=core,resources=pods,verbs=get;list;watch;create;update;patch;delete
//+kubebuilder:rbac:groups=core,resources=configmaps,verbs=get;list;watch;create;update;patch;delete
//+kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch;create;update;patch;delete
//+kubebuilder:rbac:groups=core,resources=serviceaccounts,verbs=get;list;watch
//+kubebuilder:rbac:groups=coordination.k8s.io,resources=leases,verbs=get;list;watch;create;update;patch;delete
// Reconcile is part of the main kubernetes reconciliation loop which aims to
// move the current state of the cluster closer to the desired state.
// TODO(user): Modify the Reconcile function to compare the state specified by
// the DynamoComponent object against the actual cluster state, and then
// perform operations to make the cluster state reflect the state specified by
// the user.
//
// For more details, check Reconcile and its Result here:
// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.18.2/pkg/reconcile
//
//nolint:gocyclo,nakedret
func (r *DynamoComponentReconciler) Reconcile(ctx context.Context, req ctrl.Request) (result ctrl.Result, err error) {
logs := log.FromContext(ctx)
DynamoComponent := &nvidiacomv1alpha1.DynamoComponent{}
err = r.Get(ctx, req.NamespacedName, DynamoComponent)
if err != nil {
if k8serrors.IsNotFound(err) {
// Object not found, return. Created objects are automatically garbage collected.
// For additional cleanup logic use finalizers.
logs.Info("DynamoComponent resource not found. Ignoring since object must be deleted")
err = nil
return
}
// Error reading the object - requeue the request.
logs.Error(err, "Failed to get DynamoComponent")
return
}
if DynamoComponent.IsReady() {
logs.Info("Skip available DynamoComponent")
return
}
if len(DynamoComponent.Status.Conditions) == 0 {
DynamoComponent, err = r.setStatusConditions(ctx, req,
metav1.Condition{
Type: nvidiacomv1alpha1.DynamoComponentConditionTypeImageBuilding,
Status: metav1.ConditionUnknown,
Reason: "Reconciling",
Message: "Starting to reconcile DynamoComponent",
},
metav1.Condition{
Type: nvidiacomv1alpha1.DynamoComponentConditionTypeImageExists,
Status: metav1.ConditionUnknown,
Reason: "Reconciling",
Message: "Starting to reconcile DynamoComponent",
},
metav1.Condition{
Type: nvidiacomv1alpha1.DynamoComponentConditionTypeDynamoComponentAvailable,
Status: metav1.ConditionUnknown,
Reason: "Reconciling",
Message: "Reconciling",
},
)
if err != nil {
return
}
}
logs = logs.WithValues("DynamoComponent", DynamoComponent.Name, "DynamoComponentNamespace", DynamoComponent.Namespace)
defer func() {
if err == nil {
logs.Info("Reconcile success")
return
}
logs.Error(err, "Failed to reconcile DynamoComponent.")
r.Recorder.Eventf(DynamoComponent, corev1.EventTypeWarning, "ReconcileError", "Failed to reconcile DynamoComponent: %v", err)
_, err_ := r.setStatusConditions(ctx, req,
metav1.Condition{
Type: nvidiacomv1alpha1.DynamoComponentConditionTypeDynamoComponentAvailable,
Status: metav1.ConditionFalse,
Reason: "Reconciling",
Message: err.Error(),
},
)
if err_ != nil {
logs.Error(err_, "Failed to update DynamoComponent status")
return
}
}()
DynamoComponent, _, imageExists, imageExistsResult, err := r.ensureImageExists(ctx, ensureImageExistsOption{
DynamoComponent: DynamoComponent,
req: req,
})
if err != nil {
err = errors.Wrapf(err, "ensure image exists")
return
}
if !imageExists {
result = imageExistsResult
DynamoComponent, err = r.setStatusConditions(ctx, req,
metav1.Condition{
Type: nvidiacomv1alpha1.DynamoComponentConditionTypeDynamoComponentAvailable,
Status: metav1.ConditionUnknown,
Reason: "Reconciling",
Message: "DynamoComponent image is building",
},
)
if err != nil {
return
}
return
}
DynamoComponent, err = r.setStatusConditions(ctx, req,
metav1.Condition{
Type: nvidiacomv1alpha1.DynamoComponentConditionTypeDynamoComponentAvailable,
Status: metav1.ConditionTrue,
Reason: "Reconciling",
Message: "DynamoComponent image is generated",
},
)
if err != nil {
return
}
return
}
func isEstargzEnabled() bool {
return os.Getenv("ESTARGZ_ENABLED") == commonconsts.KubeLabelValueTrue
}
type ensureImageExistsOption struct {
DynamoComponent *nvidiacomv1alpha1.DynamoComponent
req ctrl.Request
}
//nolint:gocyclo,nakedret
func (r *DynamoComponentReconciler) ensureImageExists(ctx context.Context, opt ensureImageExistsOption) (DynamoComponent *nvidiacomv1alpha1.DynamoComponent, imageInfo ImageInfo, imageExists bool, result ctrl.Result, err error) { // nolint: unparam
logs := log.FromContext(ctx)
DynamoComponent = opt.DynamoComponent
req := opt.req
imageInfo = r.getImageInfo(GetImageInfoOption{
DynamoComponent: DynamoComponent,
})
imageExistsCheckedCondition := meta.FindStatusCondition(DynamoComponent.Status.Conditions, nvidiacomv1alpha1.DynamoComponentConditionTypeImageExistsChecked)
imageExistsCondition := meta.FindStatusCondition(DynamoComponent.Status.Conditions, nvidiacomv1alpha1.DynamoComponentConditionTypeImageExists)
if imageExistsCheckedCondition == nil || imageExistsCheckedCondition.Status != metav1.ConditionTrue || imageExistsCheckedCondition.Message != imageInfo.ImageName {
imageExistsCheckedCondition = &metav1.Condition{
Type: nvidiacomv1alpha1.DynamoComponentConditionTypeImageExistsChecked,
Status: metav1.ConditionUnknown,
Reason: "Reconciling",
Message: imageInfo.ImageName,
}
dynamoComponentAvailableCondition := &metav1.Condition{
Type: nvidiacomv1alpha1.DynamoComponentConditionTypeDynamoComponentAvailable,
Status: metav1.ConditionUnknown,
Reason: "Reconciling",
Message: "Checking image exists",
}
DynamoComponent, err = r.setStatusConditions(ctx, req, *imageExistsCheckedCondition, *dynamoComponentAvailableCondition)
if err != nil {
return
}
r.Recorder.Eventf(DynamoComponent, corev1.EventTypeNormal, "CheckingImage", "Checking image exists: %s", imageInfo.ImageName)
imageExists, err = checkImageExists(DynamoComponent, imageInfo.ImageName)
if err != nil {
err = errors.Wrapf(err, "check image %s exists", imageInfo.ImageName)
return
}
err = r.Get(ctx, req.NamespacedName, DynamoComponent)
if err != nil {
logs.Error(err, "Failed to re-fetch DynamoComponent")
return
}
if imageExists {
r.Recorder.Eventf(DynamoComponent, corev1.EventTypeNormal, "CheckingImage", "Image exists: %s", imageInfo.ImageName)
imageExistsCheckedCondition = &metav1.Condition{
Type: nvidiacomv1alpha1.DynamoComponentConditionTypeImageExistsChecked,
Status: metav1.ConditionTrue,
Reason: "Reconciling",
Message: imageInfo.ImageName,
}
imageExistsCondition = &metav1.Condition{
Type: nvidiacomv1alpha1.DynamoComponentConditionTypeImageExists,
Status: metav1.ConditionTrue,
Reason: "Reconciling",
Message: imageInfo.ImageName,
}
DynamoComponent, err = r.setStatusConditions(ctx, req, *imageExistsCondition, *imageExistsCheckedCondition)
if err != nil {
return
}
} else {
r.Recorder.Eventf(DynamoComponent, corev1.EventTypeNormal, "CheckingImage", "Image not exists: %s", imageInfo.ImageName)
imageExistsCheckedCondition = &metav1.Condition{
Type: nvidiacomv1alpha1.DynamoComponentConditionTypeImageExistsChecked,
Status: metav1.ConditionFalse,
Reason: "Reconciling",
Message: fmt.Sprintf("Image not exists: %s", imageInfo.ImageName),
}
imageExistsCondition = &metav1.Condition{
Type: nvidiacomv1alpha1.DynamoComponentConditionTypeImageExists,
Status: metav1.ConditionFalse,
Reason: "Reconciling",
Message: fmt.Sprintf("Image %s is not exists", imageInfo.ImageName),
}
DynamoComponent, err = r.setStatusConditions(ctx, req, *imageExistsCondition, *imageExistsCheckedCondition)
if err != nil {
return
}
}
}
var DynamoComponentHashStr string
DynamoComponentHashStr, err = r.getHashStr(DynamoComponent)
if err != nil {
err = errors.Wrapf(err, "get DynamoComponent %s/%s hash string", DynamoComponent.Namespace, DynamoComponent.Name)
return
}
imageExists = imageExistsCondition != nil && imageExistsCondition.Status == metav1.ConditionTrue && imageExistsCondition.Message == imageInfo.ImageName
if imageExists {
return
}
jobLabels := map[string]string{
commonconsts.KubeLabelDynamoComponent: DynamoComponent.Name,
commonconsts.KubeLabelIsDynamoImageBuilder: commonconsts.KubeLabelValueTrue,
}
jobs := &batchv1.JobList{}
err = r.List(ctx, jobs, client.InNamespace(req.Namespace), client.MatchingLabels(jobLabels))
if err != nil {
err = errors.Wrap(err, "list jobs")
return
}
reservedJobs := make([]*batchv1.Job, 0)
for _, job_ := range jobs.Items {
job_ := job_
oldHash := job_.Annotations[consts.KubeAnnotationDynamoComponentHash]
if oldHash != DynamoComponentHashStr {
logs.Info("Because hash changed, delete old job", "job", job_.Name, "oldHash", oldHash, "newHash", DynamoComponentHashStr)
// --cascade=foreground
err = r.Delete(ctx, &job_, &client.DeleteOptions{
PropagationPolicy: &[]metav1.DeletionPropagation{metav1.DeletePropagationForeground}[0],
})
if err != nil {
err = errors.Wrapf(err, "delete job %s", job_.Name)
return
}
return
} else {
reservedJobs = append(reservedJobs, &job_)
}
}
var job *batchv1.Job
if len(reservedJobs) > 0 {
job = reservedJobs[0]
}
if len(reservedJobs) > 1 {
for _, job_ := range reservedJobs[1:] {
logs.Info("Because has more than one job, delete old job", "job", job_.Name)
// --cascade=foreground
err = r.Delete(ctx, job_, &client.DeleteOptions{
PropagationPolicy: &[]metav1.DeletionPropagation{metav1.DeletePropagationForeground}[0],
})
if err != nil {
err = errors.Wrapf(err, "delete job %s", job_.Name)
return
}
}
}
if job == nil {
job, err = r.generateImageBuilderJob(ctx, GenerateImageBuilderJobOption{
ImageInfo: imageInfo,
DynamoComponent: DynamoComponent,
})
if err != nil {
err = errors.Wrap(err, "generate image builder job")
return
}
r.Recorder.Eventf(DynamoComponent, corev1.EventTypeNormal, "GenerateImageBuilderJob", "Creating image builder job: %s", job.Name)
err = r.Create(ctx, job)
if err != nil {
err = errors.Wrapf(err, "create image builder job %s", job.Name)
return
}
r.Recorder.Eventf(DynamoComponent, corev1.EventTypeNormal, "GenerateImageBuilderJob", "Created image builder job: %s", job.Name)
return
}
r.Recorder.Eventf(DynamoComponent, corev1.EventTypeNormal, "CheckingImageBuilderJob", "Found image builder job: %s", job.Name)
err = r.Get(ctx, req.NamespacedName, DynamoComponent)
if err != nil {
logs.Error(err, "Failed to re-fetch DynamoComponent")
return
}
imageBuildingCondition := meta.FindStatusCondition(DynamoComponent.Status.Conditions, nvidiacomv1alpha1.DynamoComponentConditionTypeImageBuilding)
isJobFailed := false
isJobRunning := true
if job.Spec.Completions != nil {
if job.Status.Succeeded != *job.Spec.Completions {
if job.Status.Failed > 0 {
for _, condition := range job.Status.Conditions {
if condition.Type == batchv1.JobFailed && condition.Status == corev1.ConditionTrue {
isJobFailed = true
break
}
}
}
isJobRunning = !isJobFailed
} else {
isJobRunning = false
}
}
if isJobRunning {
conditions := make([]metav1.Condition, 0)
if job.Status.Active > 0 {
conditions = append(conditions, metav1.Condition{
Type: nvidiacomv1alpha1.DynamoComponentConditionTypeImageBuilding,
Status: metav1.ConditionTrue,
Reason: "Reconciling",
Message: fmt.Sprintf("Image building job %s is running", job.Name),
})
} else {
conditions = append(conditions, metav1.Condition{
Type: nvidiacomv1alpha1.DynamoComponentConditionTypeImageBuilding,
Status: metav1.ConditionUnknown,
Reason: "Reconciling",
Message: fmt.Sprintf("Image building job %s is waiting", job.Name),
})
}
if DynamoComponent.Spec.ImageBuildTimeout != nil {
if imageBuildingCondition != nil && imageBuildingCondition.LastTransitionTime.Add(time.Duration(*DynamoComponent.Spec.ImageBuildTimeout)).Before(time.Now()) {
conditions = append(conditions, metav1.Condition{
Type: nvidiacomv1alpha1.DynamoComponentConditionTypeImageBuilding,
Status: metav1.ConditionFalse,
Reason: "Timeout",
Message: fmt.Sprintf("Image building job %s is timeout", job.Name),
})
if _, err = r.setStatusConditions(ctx, req, conditions...); err != nil {
return
}
err = errors.New("image build timeout")
return
}
}
if DynamoComponent, err = r.setStatusConditions(ctx, req, conditions...); err != nil {
return
}
if imageBuildingCondition != nil && imageBuildingCondition.Status != metav1.ConditionTrue && isJobRunning {
r.Recorder.Eventf(DynamoComponent, corev1.EventTypeNormal, "DynamoComponentImageBuilder", "Image is building now")
}
return
}
if isJobFailed {
DynamoComponent, err = r.setStatusConditions(ctx, req,
metav1.Condition{
Type: nvidiacomv1alpha1.DynamoComponentConditionTypeImageBuilding,
Status: metav1.ConditionFalse,
Reason: "Reconciling",
Message: fmt.Sprintf("Image building job %s is failed.", job.Name),
},
metav1.Condition{
Type: nvidiacomv1alpha1.DynamoComponentConditionTypeDynamoComponentAvailable,
Status: metav1.ConditionFalse,
Reason: "Reconciling",
Message: fmt.Sprintf("Image building job %s is failed.", job.Name),
},
)
if err != nil {
return
}
return
}
DynamoComponent, err = r.setStatusConditions(ctx, req,
metav1.Condition{
Type: nvidiacomv1alpha1.DynamoComponentConditionTypeImageBuilding,
Status: metav1.ConditionFalse,
Reason: "Reconciling",
Message: fmt.Sprintf("Image building job %s is succeeded.", job.Name),
},
metav1.Condition{
Type: nvidiacomv1alpha1.DynamoComponentConditionTypeImageExists,
Status: metav1.ConditionTrue,
Reason: "Reconciling",
Message: imageInfo.ImageName,
},
)
if err != nil {
return
}
r.Recorder.Eventf(DynamoComponent, corev1.EventTypeNormal, "DynamoComponentImageBuilder", "Image has been built successfully")
imageExists = true
return
}
func (r *DynamoComponentReconciler) setStatusConditions(ctx context.Context, req ctrl.Request, conditions ...metav1.Condition) (DynamoComponent *nvidiacomv1alpha1.DynamoComponent, err error) {
DynamoComponent = &nvidiacomv1alpha1.DynamoComponent{}
/*
Please don't blame me when you see this kind of code,
this is to avoid "the object has been modified; please apply your changes to the latest version and try again" when updating CR status,
don't doubt that almost all CRD operators (e.g. cert-manager) can't avoid this stupid error and can only try to avoid this by this stupid way.
*/
for i := 0; i < 3; i++ {
if err = r.Get(ctx, req.NamespacedName, DynamoComponent); err != nil {
err = errors.Wrap(err, "Failed to re-fetch DynamoComponent")
return
}
for _, condition := range conditions {
meta.SetStatusCondition(&DynamoComponent.Status.Conditions, condition)
}
if err = r.Status().Update(ctx, DynamoComponent); err != nil {
time.Sleep(100 * time.Millisecond)
} else {
break
}
}
if err != nil {
err = errors.Wrap(err, "Failed to update DynamoComponent status")
return
}
if err = r.Get(ctx, req.NamespacedName, DynamoComponent); err != nil {
err = errors.Wrap(err, "Failed to re-fetch DynamoComponent")
return
}
return
}
type DynamoComponentImageBuildEngine string
const (
DynamoComponentImageBuildEngineKaniko DynamoComponentImageBuildEngine = "kaniko"
DynamoComponentImageBuildEngineBuildkit DynamoComponentImageBuildEngine = "buildkit"
DynamoComponentImageBuildEngineBuildkitRootless DynamoComponentImageBuildEngine = "buildkit-rootless"
)
const (
EnvDynamoImageBuildEngine = "DYNAMO_IMAGE_BUILD_ENGINE"
)
func getDynamoComponentImageBuildEngine() DynamoComponentImageBuildEngine {
engine := os.Getenv(EnvDynamoImageBuildEngine)
if engine == "" {
return DynamoComponentImageBuildEngineKaniko
}
return DynamoComponentImageBuildEngine(engine)
}
//nolint:nakedret
func (r *DynamoComponentReconciler) getApiStoreClient(ctx context.Context) (*apiStoreClient.ApiStoreClient, *commonconfig.ApiStoreConfig, error) {
apiStoreConf, err := commonconfig.GetApiStoreConfig(ctx)
isNotFound := k8serrors.IsNotFound(err)
if err != nil && !isNotFound {
err = errors.Wrap(err, "get api store config")
return nil, nil, err
}
if isNotFound {
return nil, nil, err
}
if apiStoreConf.Endpoint == "" {
return nil, nil, err
}
if apiStoreConf.ClusterName == "" {
apiStoreConf.ClusterName = "default"
}
apiStoreClient := apiStoreClient.NewApiStoreClient(apiStoreConf.Endpoint)
return apiStoreClient, apiStoreConf, nil
}
//nolint:nakedret
func (r *DynamoComponentReconciler) getDockerRegistry(DynamoComponent *nvidiacomv1alpha1.DynamoComponent) *schemas.DockerRegistrySchema {
dockerRegistryConfig := commonconfig.GetDockerRegistryConfig()
dynamoRepositoryName := "dynamo-components"
if dockerRegistryConfig.DynamoComponentsRepositoryName != "" {
dynamoRepositoryName = dockerRegistryConfig.DynamoComponentsRepositoryName
}
dynamoRepositoryURI := fmt.Sprintf("%s/%s", strings.TrimRight(dockerRegistryConfig.Server, "/"), dynamoRepositoryName)
if DynamoComponent != nil && DynamoComponent.Spec.DockerConfigJSONSecretName != "" {
dockerRegistryConfig.SecretName = DynamoComponent.Spec.DockerConfigJSONSecretName
}
return &schemas.DockerRegistrySchema{
Server: dockerRegistryConfig.Server,
Secure: dockerRegistryConfig.Secure,
DynamoRepositoryURI: dynamoRepositoryURI,
SecretName: dockerRegistryConfig.SecretName,
}
}
func isAddNamespacePrefix() bool {
return os.Getenv("ADD_NAMESPACE_PREFIX_TO_IMAGE_NAME") == trueStr
}
func getDynamoComponentImagePrefix(DynamoComponent *nvidiacomv1alpha1.DynamoComponent) string {
if DynamoComponent == nil {
return ""
}
prefix, exist := DynamoComponent.Annotations[consts.KubeAnnotationDynamoComponentStorageNS]
if exist && prefix != "" {
return fmt.Sprintf("%s.", prefix)
}
if isAddNamespacePrefix() {
return fmt.Sprintf("%s.", DynamoComponent.Namespace)
}
return ""
}
func getDynamoComponentImageName(DynamoComponent *nvidiacomv1alpha1.DynamoComponent, dockerRegistry schemas.DockerRegistrySchema, dynamoComponentRepositoryName, dynamoComponentVersion string) string {
if DynamoComponent != nil && DynamoComponent.Spec.Image != "" {
return DynamoComponent.Spec.Image
}
var uri, tag string
uri = dockerRegistry.DynamoRepositoryURI
tail := fmt.Sprintf("%s.%s", dynamoComponentRepositoryName, dynamoComponentVersion)
if isEstargzEnabled() {
tail += ".esgz"
}
tag = fmt.Sprintf("dynamo.%s%s", getDynamoComponentImagePrefix(DynamoComponent), tail)
if len(tag) > 128 {
hashStr := hash(tail)
tag = fmt.Sprintf("dynamo.%s%s", getDynamoComponentImagePrefix(DynamoComponent), hashStr)
if len(tag) > 128 {
tag = fmt.Sprintf("dynamo.%s", hash(fmt.Sprintf("%s%s", getDynamoComponentImagePrefix(DynamoComponent), tail)))[:128]
}
}
return fmt.Sprintf("%s:%s", uri, tag)
}
func checkImageExists(DynamoComponent *nvidiacomv1alpha1.DynamoComponent, imageName string) (bool, error) {
if DynamoComponent.Annotations["nvidia.com/force-build-image"] == commonconsts.KubeLabelValueTrue {
return false, nil
}
ref, err := name.ParseReference(imageName)
if err != nil {
return false, fmt.Errorf("parsing image reference: %w", err)
}
keychain := authn.NewMultiKeychain(
// This picks up auth from DOCKER_CONFIG env var
authn.DefaultKeychain,
// This picks up auth from GCR
google.Keychain,
// This picks up auth from ECR
authn.NewKeychainFromHelper(ecr.NewECRHelper()),
// This picks up auth from ACR
authn.NewKeychainFromHelper(credhelper.NewACRCredentialsHelper()),
)
_, err = remote.Head(ref, remote.WithAuthFromKeychain(keychain))
if err != nil {
if strings.Contains(err.Error(), "404") {
return false, nil
}
return false, fmt.Errorf("checking image: %w", err)
}
return true, nil
}
type ImageInfo struct {
DockerRegistry schemas.DockerRegistrySchema
DockerConfigJSONSecretName string
ImageName string
DockerRegistryInsecure bool
}
type GetImageInfoOption struct {
DynamoComponent *nvidiacomv1alpha1.DynamoComponent
}
//nolint:nakedret
func (r *DynamoComponentReconciler) getImageInfo(opt GetImageInfoOption) ImageInfo {
dynamoComponentRepositoryName, _, dynamoComponentVersion := xstrings.Partition(opt.DynamoComponent.Spec.DynamoComponent, ":")
dockerRegistry := r.getDockerRegistry(opt.DynamoComponent)
imageInfo := ImageInfo{
DockerRegistry: *dockerRegistry,
ImageName: getDynamoComponentImageName(opt.DynamoComponent, *dockerRegistry, dynamoComponentRepositoryName, dynamoComponentVersion),
DockerConfigJSONSecretName: dockerRegistry.SecretName,
DockerRegistryInsecure: opt.DynamoComponent.Annotations[commonconsts.KubeAnnotationDynamoDockerRegistryInsecure] == "true",
}
return imageInfo
}
func (r *DynamoComponentReconciler) getImageBuilderJobName() string {
guid := xid.New()
return fmt.Sprintf("dynamo-image-builder-%s", guid.String())
}
func (r *DynamoComponentReconciler) getImageBuilderJobLabels(DynamoComponent *nvidiacomv1alpha1.DynamoComponent) map[string]string {
return map[string]string{
commonconsts.KubeLabelDynamoComponent: DynamoComponent.Name,
commonconsts.KubeLabelIsDynamoImageBuilder: "true",
}
}
func (r *DynamoComponentReconciler) getImageBuilderPodLabels(DynamoComponent *nvidiacomv1alpha1.DynamoComponent) map[string]string {
return map[string]string{
commonconsts.KubeLabelDynamoComponent: DynamoComponent.Name,
commonconsts.KubeLabelIsDynamoImageBuilder: "true",
}
}
func hash(text string) string {
// nolint: gosec
hasher := md5.New()
hasher.Write([]byte(text))
return hex.EncodeToString(hasher.Sum(nil))
}
type GenerateImageBuilderJobOption struct {
ImageInfo ImageInfo
DynamoComponent *nvidiacomv1alpha1.DynamoComponent
}
//nolint:nakedret
func (r *DynamoComponentReconciler) generateImageBuilderJob(ctx context.Context, opt GenerateImageBuilderJobOption) (job *batchv1.Job, err error) {
// nolint: gosimple
podTemplateSpec, err := r.generateImageBuilderPodTemplateSpec(ctx, GenerateImageBuilderPodTemplateSpecOption(opt))
if err != nil {
err = errors.Wrap(err, "generate image builder pod template spec")
return
}
kubeAnnotations := make(map[string]string)
hashStr, err := r.getHashStr(opt.DynamoComponent)
if err != nil {
err = errors.Wrap(err, "failed to get hash string")
return
}
kubeAnnotations[consts.KubeAnnotationDynamoComponentHash] = hashStr
job = &batchv1.Job{
ObjectMeta: metav1.ObjectMeta{
Name: r.getImageBuilderJobName(),
Namespace: opt.DynamoComponent.Namespace,
Labels: r.getImageBuilderJobLabels(opt.DynamoComponent),
Annotations: kubeAnnotations,
},
Spec: batchv1.JobSpec{
TTLSecondsAfterFinished: ptr.To(int32(60 * 60 * 24)),
Completions: ptr.To(int32(1)),
Parallelism: ptr.To(int32(1)),
PodFailurePolicy: &batchv1.PodFailurePolicy{
Rules: []batchv1.PodFailurePolicyRule{
{
Action: batchv1.PodFailurePolicyActionFailJob,
OnExitCodes: &batchv1.PodFailurePolicyOnExitCodesRequirement{
ContainerName: ptr.To(BuilderContainerName),
Operator: batchv1.PodFailurePolicyOnExitCodesOpIn,
Values: []int32{BuilderJobFailedExitCode},
},
},
},
},
Template: *podTemplateSpec,
},
}
err = ctrl.SetControllerReference(opt.DynamoComponent, job, r.Scheme)
if err != nil {
err = errors.Wrapf(err, "set controller reference for job %s", job.Name)
return
}
return
}
func injectPodAffinity(podSpec *corev1.PodSpec, DynamoComponent *nvidiacomv1alpha1.DynamoComponent) {
if podSpec.Affinity == nil {
podSpec.Affinity = &corev1.Affinity{}
}
if podSpec.Affinity.PodAffinity == nil {
podSpec.Affinity.PodAffinity = &corev1.PodAffinity{}
}
podSpec.Affinity.PodAffinity.PreferredDuringSchedulingIgnoredDuringExecution = append(podSpec.Affinity.PodAffinity.PreferredDuringSchedulingIgnoredDuringExecution, corev1.WeightedPodAffinityTerm{
Weight: 100,
PodAffinityTerm: corev1.PodAffinityTerm{
LabelSelector: &metav1.LabelSelector{
MatchLabels: map[string]string{
commonconsts.KubeLabelDynamoComponent: DynamoComponent.Name,
},
},
TopologyKey: corev1.LabelHostname,
},
})
}
const BuilderContainerName = "builder"
const BuilderJobFailedExitCode = 42
const ModelSeederContainerName = "seeder"
const ModelSeederJobFailedExitCode = 42
type GenerateImageBuilderPodTemplateSpecOption struct {
ImageInfo ImageInfo
DynamoComponent *nvidiacomv1alpha1.DynamoComponent
}
//nolint:gocyclo,nakedret
func (r *DynamoComponentReconciler) generateImageBuilderPodTemplateSpec(ctx context.Context, opt GenerateImageBuilderPodTemplateSpecOption) (pod *corev1.PodTemplateSpec, err error) {
dynamoComponentRepositoryName, _, dynamoComponentVersion := xstrings.Partition(opt.DynamoComponent.Spec.DynamoComponent, ":")
kubeLabels := r.getImageBuilderPodLabels(opt.DynamoComponent)
imageName := opt.ImageInfo.ImageName
dockerConfigJSONSecretName := opt.ImageInfo.DockerConfigJSONSecretName
dockerRegistryInsecure := opt.ImageInfo.DockerRegistryInsecure
volumes := []corev1.Volume{
{
Name: "dynamo",
VolumeSource: corev1.VolumeSource{
EmptyDir: &corev1.EmptyDirVolumeSource{},
},
},
{
Name: "workspace",
VolumeSource: corev1.VolumeSource{
EmptyDir: &corev1.EmptyDirVolumeSource{},
},
},
}
volumeMounts := []corev1.VolumeMount{
{
Name: "dynamo",
MountPath: "/dynamo",
},
{
Name: "workspace",
MountPath: "/workspace",
},
{
Name: consts.DockerConfigVolumeName,
MountPath: consts.DockerConfigVolumeMountPath,
},
}
if dockerConfigJSONSecretName != "" {
volumes = append(volumes, corev1.Volume{
Name: consts.DockerConfigVolumeName,
VolumeSource: corev1.VolumeSource{
Secret: &corev1.SecretVolumeSource{
SecretName: dockerConfigJSONSecretName,
Items: []corev1.KeyToPath{
{
Key: ".dockerconfigjson",
Path: "config.json",
},
},
},
},
})
} else {
volumes = append(volumes, corev1.Volume{
Name: consts.DockerConfigVolumeName,
VolumeSource: corev1.VolumeSource{
EmptyDir: &corev1.EmptyDirVolumeSource{},
},
})
}
var dynamoComponent *schemas.DynamoComponent
dynamoComponentDownloadURL := opt.DynamoComponent.Spec.DownloadURL
if dynamoComponentDownloadURL == "" {
var apiStoreClient *apiStoreClient.ApiStoreClient
var apiStoreConf *commonconfig.ApiStoreConfig
apiStoreClient, apiStoreConf, err = r.getApiStoreClient(ctx)
if err != nil {
err = errors.Wrap(err, "get api store client")
return
}
if apiStoreClient == nil || apiStoreConf == nil {
err = errors.New("can't get api store client, please check api store configuration")
return
}
r.Recorder.Eventf(opt.DynamoComponent, corev1.EventTypeNormal, "GenerateImageBuilderPod", "Getting dynamoComponent %s from api store service", opt.DynamoComponent.Spec.DynamoComponent)
dynamoComponent, err = apiStoreClient.GetDynamoComponent(ctx, dynamoComponentRepositoryName, dynamoComponentVersion)
if err != nil {
err = errors.Wrap(err, "get dynamoComponent")
return
}
r.Recorder.Eventf(opt.DynamoComponent, corev1.EventTypeNormal, "GenerateImageBuilderPod", "Got dynamoComponent %s from api store service", opt.DynamoComponent.Spec.DynamoComponent)
if dynamoComponent.TransmissionStrategy != nil && *dynamoComponent.TransmissionStrategy == schemas.TransmissionStrategyPresignedURL {
var dynamoComponent_ *schemas.DynamoComponent
r.Recorder.Eventf(opt.DynamoComponent, corev1.EventTypeNormal, "GenerateImageBuilderPod", "Getting presigned url for dynamoComponent %s from api store service", opt.DynamoComponent.Spec.DynamoComponent)
dynamoComponent_, err = apiStoreClient.PresignDynamoComponentDownloadURL(ctx, dynamoComponentRepositoryName, dynamoComponentVersion)
if err != nil {
err = errors.Wrap(err, "presign dynamoComponent download url")
return
}
r.Recorder.Eventf(opt.DynamoComponent, corev1.EventTypeNormal, "GenerateImageBuilderPod", "Got presigned url for dynamoComponent %s from api store service", opt.DynamoComponent.Spec.DynamoComponent)
dynamoComponentDownloadURL = dynamoComponent_.PresignedDownloadUrl
} else {
dynamoComponentDownloadURL = fmt.Sprintf("%s/api/v1/dynamo_components/%s/versions/%s/download", apiStoreConf.Endpoint, dynamoComponentRepositoryName, dynamoComponentVersion)
}
}
internalImages := commonconfig.GetInternalImages()
logrus.Infof("Image builder is using the images %v", *internalImages)
buildEngine := getDynamoComponentImageBuildEngine()
dynamoComponentDownloadCommandTemplate, err := template.New("downloadCommand").Parse(`
set -e
mkdir -p /workspace/buildcontext
url="{{.DynamoComponentDownloadURL}}"
echo "Downloading dynamoComponent {{.DynamoComponentRepositoryName}}:{{.DynamoComponentVersion}} to /tmp/downloaded.tar..."
if [[ ${url} == s3://* ]]; then
echo "Downloading from s3..."
aws s3 cp ${url} /tmp/downloaded.tar
elif [[ ${url} == gs://* ]]; then
echo "Downloading from GCS..."
gsutil cp ${url} /tmp/downloaded.tar
else
curl --fail -L ${url} --output /tmp/downloaded.tar --progress-bar
fi
cd /workspace/buildcontext
echo "Extracting dynamoComponent tar file..."
tar -xvf /tmp/downloaded.tar
echo "Removing dynamoComponent tar file..."
rm /tmp/downloaded.tar
echo "Done"
`)
if err != nil {
err = errors.Wrap(err, "failed to parse download command template")
return
}
var dynamoComponentDownloadCommandBuffer bytes.Buffer
err = dynamoComponentDownloadCommandTemplate.Execute(&dynamoComponentDownloadCommandBuffer, map[string]interface{}{
"DynamoComponentDownloadURL": dynamoComponentDownloadURL,
"DynamoComponentRepositoryName": dynamoComponentRepositoryName,
"DynamoComponentVersion": dynamoComponentVersion,
})
if err != nil {
err = errors.Wrap(err, "failed to execute download command template")
return
}
dynamoComponentDownloadCommand := dynamoComponentDownloadCommandBuffer.String()
downloaderContainerResources := corev1.ResourceRequirements{
Limits: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("1000m"),
corev1.ResourceMemory: resource.MustParse("3000Mi"),
},
Requests: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("100m"),
corev1.ResourceMemory: resource.MustParse("1000Mi"),
},
}
downloaderContainerEnvFrom := opt.DynamoComponent.Spec.DownloaderContainerEnvFrom
initContainers := []corev1.Container{
{
Name: "dynamocomponent-downloader",
Image: internalImages.DynamoComponentsDownloader,
Command: []string{
"sh",
"-c",
dynamoComponentDownloadCommand,
},
VolumeMounts: volumeMounts,
Resources: downloaderContainerResources,
EnvFrom: downloaderContainerEnvFrom,
Env: []corev1.EnvVar{
{
Name: "AWS_EC2_METADATA_DISABLED",
Value: "true",
},
},
},
}
if dockerConfigJSONSecretName == "" {
// if no explicit docker config is provided, we need to provide the docker config to the image builder
var ref name.Reference
ref, err = name.ParseReference(imageName)
if err != nil {
err = errors.Wrap(err, "failed to parse reference")
return
}
dockerRegistry := ref.Context().RegistryStr()
if isGoogleRegistry(dockerRegistry) {
// for GCP, we use the google cloud sdk to get the docker config.
initContainers = append(initContainers, corev1.Container{
Name: "gcp-init-docker-config",
Image: "google/cloud-sdk:slim",
Command: []string{
"/bin/bash",
"-c",
fmt.Sprintf(`set -e
gcloud --quiet config get-value account
TOKEN=$(gcloud --quiet auth print-access-token)
cat > %s/config.json <<EOL
{
"auths": {
"%s": {
"auth": "$(echo -n "oauth2accesstoken:${TOKEN}" | base64 -w 0)"
}
}
}
EOL
echo 'Docker config.json created successfully'`, consts.DockerConfigVolumeMountPath, dockerRegistry),
},
Resources: downloaderContainerResources,
EnvFrom: downloaderContainerEnvFrom,
VolumeMounts: volumeMounts,
})
}
}
containers := make([]corev1.Container, 0)
var globalExtraPodMetadata *dynamoCommon.ExtraPodMetadata
var globalExtraPodSpec *dynamoCommon.ExtraPodSpec
var globalExtraContainerEnv []corev1.EnvVar
var globalDefaultImageBuilderContainerResources *corev1.ResourceRequirements
var buildArgs []string
var builderArgs []string
configNamespace, err := commonconfig.GetDynamoImageBuilderNamespace(ctx)
if err != nil {
err = errors.Wrap(err, "failed to get dynamo image builder namespace")
return
}
configCmName := "dynamo-image-builder-config"
r.Recorder.Eventf(opt.DynamoComponent, corev1.EventTypeNormal, "GenerateImageBuilderPod", "Getting configmap %s from namespace %s", configCmName, configNamespace)
configCm := &corev1.ConfigMap{}
err = r.Get(ctx, types.NamespacedName{Name: configCmName, Namespace: configNamespace}, configCm)
configCmIsNotFound := k8serrors.IsNotFound(err)
if err != nil && !configCmIsNotFound {
err = errors.Wrap(err, "failed to get configmap")
return
}
err = nil // nolint: ineffassign
if !configCmIsNotFound {
r.Recorder.Eventf(opt.DynamoComponent, corev1.EventTypeNormal, "GenerateImageBuilderPod", "Configmap %s is got from namespace %s", configCmName, configNamespace)
globalExtraPodMetadata = &dynamoCommon.ExtraPodMetadata{}
if val, ok := configCm.Data["extra_pod_metadata"]; ok {
err = yaml.Unmarshal([]byte(val), globalExtraPodMetadata)
if err != nil {
err = errors.Wrapf(err, "failed to yaml unmarshal extra_pod_metadata, please check the configmap %s in namespace %s", configCmName, configNamespace)
return
}
}
globalExtraPodSpec = &dynamoCommon.ExtraPodSpec{}
if val, ok := configCm.Data["extra_pod_spec"]; ok {
err = yaml.Unmarshal([]byte(val), globalExtraPodSpec)
if err != nil {
err = errors.Wrapf(err, "failed to yaml unmarshal extra_pod_spec, please check the configmap %s in namespace %s", configCmName, configNamespace)
return
}
}
globalExtraContainerEnv = []corev1.EnvVar{}
if val, ok := configCm.Data["extra_container_env"]; ok {
err = yaml.Unmarshal([]byte(val), &globalExtraContainerEnv)
if err != nil {
err = errors.Wrapf(err, "failed to yaml unmarshal extra_container_env, please check the configmap %s in namespace %s", configCmName, configNamespace)
return
}
}
if val, ok := configCm.Data["default_image_builder_container_resources"]; ok {
globalDefaultImageBuilderContainerResources = &corev1.ResourceRequirements{}
err = yaml.Unmarshal([]byte(val), globalDefaultImageBuilderContainerResources)
if err != nil {
err = errors.Wrapf(err, "failed to yaml unmarshal default_image_builder_container_resources, please check the configmap %s in namespace %s", configCmName, configNamespace)
return
}
}
buildArgs = []string{}
if val, ok := configCm.Data["build_args"]; ok {
err = yaml.Unmarshal([]byte(val), &buildArgs)
if err != nil {
err = errors.Wrapf(err, "failed to yaml unmarshal build_args, please check the configmap %s in namespace %s", configCmName, configNamespace)
return
}
}
builderArgs = []string{}
if val, ok := configCm.Data["builder_args"]; ok {
err = yaml.Unmarshal([]byte(val), &builderArgs)
if err != nil {
err = errors.Wrapf(err, "failed to yaml unmarshal builder_args, please check the configmap %s in namespace %s", configCmName, configNamespace)
return
}
}
logrus.Info("passed in builder args: ", builderArgs)
} else {
r.Recorder.Eventf(opt.DynamoComponent, corev1.EventTypeNormal, "GenerateImageBuilderPod", "Configmap %s is not found in namespace %s", configCmName, configNamespace)
}
if buildArgs == nil {
buildArgs = make([]string, 0)
}
if opt.DynamoComponent.Spec.BuildArgs != nil {
buildArgs = append(buildArgs, opt.DynamoComponent.Spec.BuildArgs...)
}
dockerFilePath := "/workspace/buildcontext/env/docker/Dockerfile"
builderContainerEnvFrom := make([]corev1.EnvFromSource, 0)
builderContainerEnvs := []corev1.EnvVar{
{
Name: "IFS",
Value: "''",
},
{
Name: "DOCKER_CONFIG",
Value: consts.DockerConfigVolumeMountPath,
},
}
kanikoCacheRepo := os.Getenv("KANIKO_CACHE_REPO")
if kanikoCacheRepo == "" {
kanikoCacheRepo = opt.ImageInfo.DockerRegistry.DynamoRepositoryURI
}
kubeAnnotations := make(map[string]string)
kubeAnnotations[consts.KubeAnnotationDynamoComponentImageBuiderHash] = opt.DynamoComponent.Annotations[consts.KubeAnnotationDynamoComponentImageBuiderHash]
command := []string{
"/kaniko/executor",
}
args := []string{
"--context=/workspace/buildcontext",
"--verbosity=info",
"--image-fs-extract-retry=3",
"--cache=false",
fmt.Sprintf("--cache-repo=%s", kanikoCacheRepo),
"--compressed-caching=false",
"--compression=zstd",
"--compression-level=-7",
fmt.Sprintf("--dockerfile=%s", dockerFilePath),
fmt.Sprintf("--insecure=%v", dockerRegistryInsecure),
fmt.Sprintf("--destination=%s", imageName),
}
kanikoSnapshotMode := os.Getenv("KANIKO_SNAPSHOT_MODE")
if kanikoSnapshotMode != "" {
args = append(args, fmt.Sprintf("--snapshot-mode=%s", kanikoSnapshotMode))
}
var builderImage string
switch buildEngine {
case DynamoComponentImageBuildEngineKaniko:
builderImage = internalImages.Kaniko
if isEstargzEnabled() {
builderContainerEnvs = append(builderContainerEnvs, corev1.EnvVar{
Name: "GGCR_EXPERIMENT_ESTARGZ",
Value: "1",
})
}
case DynamoComponentImageBuildEngineBuildkit:
builderImage = internalImages.Buildkit
case DynamoComponentImageBuildEngineBuildkitRootless:
builderImage = internalImages.BuildkitRootless
default:
err = errors.Errorf("unknown dynamoComponent image build engine %s", buildEngine)
return
}
isBuildkit := buildEngine == DynamoComponentImageBuildEngineBuildkit || buildEngine == DynamoComponentImageBuildEngineBuildkitRootless
if isBuildkit {
output := fmt.Sprintf("type=image,name=%s,push=true,registry.insecure=%v", imageName, dockerRegistryInsecure)
buildkitdFlags := []string{}
if isEstargzEnabled() {
buildkitdFlags = append(buildkitdFlags, "--oci-worker-snapshotter=stargz")
output += ",oci-mediatypes=true,compression=estargz,force-compression=true"
}
if len(buildkitdFlags) > 0 {
builderContainerEnvs = append(builderContainerEnvs, corev1.EnvVar{
Name: "BUILDKITD_FLAGS",
Value: strings.Join(buildkitdFlags, " "),
})
}
buildkitURL := os.Getenv("BUILDKIT_URL")
if buildkitURL == "" {
err = errors.New("BUILDKIT_URL is not set")
return
}
command = []string{
"buildctl",
}
args = []string{
"--addr",
buildkitURL,
"build",
"--frontend",
"dockerfile.v0",
"--local",
"context=/workspace/buildcontext",
"--local",
fmt.Sprintf("dockerfile=%s", filepath.Dir(dockerFilePath)),
"--output",
output,
}
cacheRepo := os.Getenv("BUILDKIT_CACHE_REPO")
if cacheRepo != "" {
args = append(args, "--export-cache", fmt.Sprintf("type=registry,ref=%s:buildcache,mode=max,compression=zstd,ignore-error=true", cacheRepo))
args = append(args, "--import-cache", fmt.Sprintf("type=registry,ref=%s:buildcache", cacheRepo))
}
}
// add build args to pass via --build-arg
for _, buildArg := range buildArgs {
quotedBuildArg := unix.SingleQuote.Quote(buildArg)
if isBuildkit {
args = append(args, "--opt", fmt.Sprintf("build-arg:%s", quotedBuildArg))
} else {
args = append(args, fmt.Sprintf("--build-arg=%s", quotedBuildArg))
}
}
// add other arguments to builder
args = append(args, builderArgs...)
logrus.Info("dynamo-image-builder args: ", args)
builderContainerArgs := []string{
"-c",
fmt.Sprintf("sleep 15; %s && exit 0 || exit %d", shquot.POSIXShell(append(command, args...)), BuilderJobFailedExitCode), // TODO: remove once functionality exists to wait for istio sidecar.
}
container := corev1.Container{
Name: BuilderContainerName,
Image: builderImage,
ImagePullPolicy: corev1.PullAlways,
Command: []string{"sh"},
Args: builderContainerArgs,
VolumeMounts: volumeMounts,
Env: builderContainerEnvs,
EnvFrom: builderContainerEnvFrom,
TTY: true,
Stdin: true,
}
if buildEngine == DynamoComponentImageBuildEngineKaniko {
// we need to run as root when using kaniko
container.SecurityContext = &corev1.SecurityContext{
RunAsUser: ptr.To(int64(0)),
}
}
if globalDefaultImageBuilderContainerResources != nil {
container.Resources = *globalDefaultImageBuilderContainerResources
}
if opt.DynamoComponent.Spec.ImageBuilderContainerResources != nil {
container.Resources = *opt.DynamoComponent.Spec.ImageBuilderContainerResources
}
containers = append(containers, container)
pod = &corev1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Labels: kubeLabels,
Annotations: kubeAnnotations,
},
Spec: corev1.PodSpec{
RestartPolicy: corev1.RestartPolicyNever,
Volumes: volumes,
InitContainers: initContainers,
Containers: containers,
SecurityContext: &corev1.PodSecurityContext{
RunAsUser: ptr.To(int64(1000)),
RunAsGroup: ptr.To(int64(1000)),
FSGroup: ptr.To(int64(1000)),
},
},
}
if globalExtraPodMetadata != nil {
for k, v := range globalExtraPodMetadata.Annotations {
pod.Annotations[k] = v
}
for k, v := range globalExtraPodMetadata.Labels {
pod.Labels[k] = v
}
}
if opt.DynamoComponent.Spec.ImageBuilderExtraPodMetadata != nil {
for k, v := range opt.DynamoComponent.Spec.ImageBuilderExtraPodMetadata.Annotations {
pod.Annotations[k] = v
}
for k, v := range opt.DynamoComponent.Spec.ImageBuilderExtraPodMetadata.Labels {
pod.Labels[k] = v
}
}
if globalExtraPodSpec != nil {
pod.Spec.PriorityClassName = globalExtraPodSpec.PriorityClassName
pod.Spec.SchedulerName = globalExtraPodSpec.SchedulerName
pod.Spec.NodeSelector = globalExtraPodSpec.NodeSelector
pod.Spec.Affinity = globalExtraPodSpec.Affinity
pod.Spec.Tolerations = globalExtraPodSpec.Tolerations
pod.Spec.TopologySpreadConstraints = globalExtraPodSpec.TopologySpreadConstraints
pod.Spec.ServiceAccountName = globalExtraPodSpec.ServiceAccountName
}
if opt.DynamoComponent.Spec.ImageBuilderExtraPodSpec != nil {
if opt.DynamoComponent.Spec.ImageBuilderExtraPodSpec.PriorityClassName != "" {
pod.Spec.PriorityClassName = opt.DynamoComponent.Spec.ImageBuilderExtraPodSpec.PriorityClassName
}
if opt.DynamoComponent.Spec.ImageBuilderExtraPodSpec.SchedulerName != "" {
pod.Spec.SchedulerName = opt.DynamoComponent.Spec.ImageBuilderExtraPodSpec.SchedulerName
}
if opt.DynamoComponent.Spec.ImageBuilderExtraPodSpec.NodeSelector != nil {
pod.Spec.NodeSelector = opt.DynamoComponent.Spec.ImageBuilderExtraPodSpec.NodeSelector
}
if opt.DynamoComponent.Spec.ImageBuilderExtraPodSpec.Affinity != nil {
pod.Spec.Affinity = opt.DynamoComponent.Spec.ImageBuilderExtraPodSpec.Affinity
}
if opt.DynamoComponent.Spec.ImageBuilderExtraPodSpec.Tolerations != nil {
pod.Spec.Tolerations = opt.DynamoComponent.Spec.ImageBuilderExtraPodSpec.Tolerations
}
if opt.DynamoComponent.Spec.ImageBuilderExtraPodSpec.TopologySpreadConstraints != nil {
pod.Spec.TopologySpreadConstraints = opt.DynamoComponent.Spec.ImageBuilderExtraPodSpec.TopologySpreadConstraints
}
if opt.DynamoComponent.Spec.ImageBuilderExtraPodSpec.ServiceAccountName != "" {
pod.Spec.ServiceAccountName = opt.DynamoComponent.Spec.ImageBuilderExtraPodSpec.ServiceAccountName
}
}
injectPodAffinity(&pod.Spec, opt.DynamoComponent)
if pod.Spec.ServiceAccountName == "" {
serviceAccounts := &corev1.ServiceAccountList{}
err = r.List(ctx, serviceAccounts, client.InNamespace(opt.DynamoComponent.Namespace), client.MatchingLabels{
commonconsts.KubeLabelDynamoImageBuilderPod: commonconsts.KubeLabelValueTrue,
})
if err != nil {
err = errors.Wrapf(err, "failed to list service accounts in namespace %s", opt.DynamoComponent.Namespace)
return
}
if len(serviceAccounts.Items) > 0 {
pod.Spec.ServiceAccountName = serviceAccounts.Items[0].Name
} else {
pod.Spec.ServiceAccountName = "default"
}
}
for i, c := range pod.Spec.InitContainers {
env := c.Env
if globalExtraContainerEnv != nil {
env = append(env, globalExtraContainerEnv...)
}
env = append(env, opt.DynamoComponent.Spec.ImageBuilderExtraContainerEnv...)
pod.Spec.InitContainers[i].Env = env
}
for i, c := range pod.Spec.Containers {
env := c.Env
if globalExtraContainerEnv != nil {
env = append(env, globalExtraContainerEnv...)
}
env = append(env, opt.DynamoComponent.Spec.ImageBuilderExtraContainerEnv...)
pod.Spec.Containers[i].Env = env
}
return
}
func (r *DynamoComponentReconciler) getHashStr(DynamoComponent *nvidiacomv1alpha1.DynamoComponent) (string, error) {
var hash uint64
hash, err := hashstructure.Hash(struct {
Spec nvidiacomv1alpha1.DynamoComponentSpec
Labels map[string]string
Annotations map[string]string
}{
Spec: DynamoComponent.Spec,
Labels: DynamoComponent.Labels,
Annotations: DynamoComponent.Annotations,
}, hashstructure.FormatV2, nil)
if err != nil {
err = errors.Wrap(err, "get DynamoComponent CR spec hash")
return "", err
}
hashStr := strconv.FormatUint(hash, 10)
return hashStr, nil
}
const (
trueStr = "true"
)
// SetupWithManager sets up the controller with the Manager.
func (r *DynamoComponentReconciler) SetupWithManager(mgr ctrl.Manager) error {
err := ctrl.NewControllerManagedBy(mgr).
For(&nvidiacomv1alpha1.DynamoComponent{}, builder.WithPredicates(predicate.GenerationChangedPredicate{})).
Owns(&nvidiacomv1alpha1.DynamoComponent{}).
Owns(&batchv1.Job{}).
WithEventFilter(controller_common.EphemeralDeploymentEventFilter(r.Config)).
Complete(r)
return errors.Wrap(err, "failed to setup DynamoComponent controller")
}
......@@ -23,11 +23,11 @@ import (
"context"
"fmt"
"os"
"sort"
"strconv"
"strings"
"time"
"github.com/imdario/mergo"
appsv1 "k8s.io/api/apps/v1"
autoscalingv2 "k8s.io/api/autoscaling/v2"
corev1 "k8s.io/api/core/v1"
......@@ -41,7 +41,6 @@ import (
commonconsts "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/controller_common"
commonController "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/controller_common"
"github.com/huandu/xstrings"
istioNetworking "istio.io/api/networking/v1beta1"
networkingv1beta1 "istio.io/client-go/pkg/apis/networking/v1beta1"
k8serrors "k8s.io/apimachinery/pkg/api/errors"
......@@ -93,6 +92,7 @@ type DynamoComponentDeploymentReconciler struct {
EtcdAddr string
EtcdStorage etcdStorage
UseVirtualService bool
DockerSecretRetriever dockerSecretRetriever
}
// +kubebuilder:rbac:groups=nvidia.com,resources=dynamocomponentdeployments,verbs=get;list;watch;create;update;patch;delete
......@@ -197,50 +197,6 @@ func (r *DynamoComponentDeploymentReconciler) Reconcile(ctx context.Context, req
}
}()
// retrieve the dynamo component
dynamoComponentCR := &v1alpha1.DynamoComponent{}
err = r.Get(ctx, types.NamespacedName{Name: getK8sName(dynamoComponentDeployment.Spec.DynamoComponent), Namespace: dynamoComponentDeployment.Namespace}, dynamoComponentCR)
if err != nil {
logs.Error(err, "Failed to get DynamoComponent")
return
}
// check if the component is ready
if dynamoComponentCR.IsReady() {
logs.Info(fmt.Sprintf("DynamoComponent %s ready", dynamoComponentDeployment.Spec.DynamoComponent))
r.Recorder.Eventf(dynamoComponentDeployment, corev1.EventTypeNormal, "GetDynamoComponent", "DynamoComponent %s is ready", dynamoComponentDeployment.Spec.DynamoComponent)
dynamoComponentDeployment, err = r.setStatusConditions(ctx, req,
metav1.Condition{
Type: v1alpha1.DynamoGraphDeploymentConditionTypeDynamoComponentReady,
Status: metav1.ConditionTrue,
Reason: "Reconciling",
Message: "DynamoComponent is ready",
},
)
if err != nil {
return
}
} else {
logs.Info(fmt.Sprintf("DynamoComponent %s not ready", dynamoComponentDeployment.Spec.DynamoComponent))
r.Recorder.Eventf(dynamoComponentDeployment, corev1.EventTypeWarning, "GetDynamoComponent", "DynamoComponent %s is not ready", dynamoComponentDeployment.Spec.DynamoComponent)
_, err_ := r.setStatusConditions(ctx, req,
metav1.Condition{
Type: v1alpha1.DynamoGraphDeploymentConditionTypeDynamoComponentReady,
Status: metav1.ConditionFalse,
Reason: "Reconciling",
Message: "DynamoComponent not ready",
},
metav1.Condition{
Type: v1alpha1.DynamoGraphDeploymentConditionTypeAvailable,
Status: metav1.ConditionFalse,
Reason: "Reconciling",
Message: "DynamoComponent not ready",
},
)
err = err_
return
}
modified := false
// Reconcile PVC
......@@ -271,7 +227,6 @@ func (r *DynamoComponentDeploymentReconciler) Reconcile(ctx context.Context, req
modified_, _, err := commonController.SyncResource(ctx, r, dynamoComponentDeployment, func(ctx context.Context) (*volcanov1beta1.PodGroup, bool, error) {
return r.generateVolcanoPodGroup(ctx, generateResourceOption{
dynamoComponentDeployment: dynamoComponentDeployment,
dynamoComponent: dynamoComponentCR,
isStealingTrafficDebugModeEnabled: false,
containsStealingTrafficDebugModeEnabled: false,
instanceID: &i,
......@@ -289,7 +244,6 @@ func (r *DynamoComponentDeploymentReconciler) Reconcile(ctx context.Context, req
modified_, lwsObj, err := commonController.SyncResource(ctx, r, dynamoComponentDeployment, func(ctx context.Context) (*leaderworkersetv1.LeaderWorkerSet, bool, error) {
return r.generateLeaderWorkerSet(ctx, generateResourceOption{
dynamoComponentDeployment: dynamoComponentDeployment,
dynamoComponent: dynamoComponentCR,
isStealingTrafficDebugModeEnabled: false,
containsStealingTrafficDebugModeEnabled: false,
instanceID: &i,
......@@ -308,7 +262,7 @@ func (r *DynamoComponentDeploymentReconciler) Reconcile(ctx context.Context, req
}
// Clean up any excess LeaderWorkerSets (if replicas were decreased)
baseKubeName := r.getKubeName(dynamoComponentDeployment, dynamoComponentCR, false)
baseKubeName := r.getKubeName(dynamoComponentDeployment, false)
for i := int(desiredReplicas); ; i++ {
// Try to find a LeaderWorkerSet with the next index
nextLWSName := fmt.Sprintf("%s-%d", baseKubeName, i)
......@@ -356,7 +310,6 @@ func (r *DynamoComponentDeploymentReconciler) Reconcile(ctx context.Context, req
} else {
modified_, obj, err := r.createOrUpdateOrDeleteDeployments(ctx, generateResourceOption{
dynamoComponentDeployment: dynamoComponentDeployment,
dynamoComponent: dynamoComponentCR,
})
if err != nil {
......@@ -373,7 +326,6 @@ func (r *DynamoComponentDeploymentReconciler) Reconcile(ctx context.Context, req
modified_, _, err = commonController.SyncResource(ctx, r, dynamoComponentDeployment, func(ctx context.Context) (*autoscalingv2.HorizontalPodAutoscaler, bool, error) {
return r.generateHPA(generateResourceOption{
dynamoComponentDeployment: dynamoComponentDeployment,
dynamoComponent: dynamoComponentCR,
})
})
if err != nil {
......@@ -389,7 +341,6 @@ func (r *DynamoComponentDeploymentReconciler) Reconcile(ctx context.Context, req
// create or update api-server service
modified_, err := r.createOrUpdateOrDeleteServices(ctx, generateResourceOption{
dynamoComponentDeployment: dynamoComponentDeployment,
dynamoComponent: dynamoComponentCR,
})
if err != nil {
return
......@@ -402,7 +353,6 @@ func (r *DynamoComponentDeploymentReconciler) Reconcile(ctx context.Context, req
// create or update api-server ingresses
modified_, err = r.createOrUpdateOrDeleteIngress(ctx, generateResourceOption{
dynamoComponentDeployment: dynamoComponentDeployment,
dynamoComponent: dynamoComponentCR,
})
if err != nil {
return
......@@ -521,7 +471,7 @@ func (r *DynamoComponentDeploymentReconciler) generateVolcanoPodGroup(ctx contex
return nil, false, fmt.Errorf("generateVolcanoPodGroup: instanceID cannot be negative, got %d", instanceID)
}
podGroupName := r.getKubeName(opt.dynamoComponentDeployment, opt.dynamoComponent, opt.isStealingTrafficDebugModeEnabled)
podGroupName := r.getKubeName(opt.dynamoComponentDeployment, opt.isStealingTrafficDebugModeEnabled)
podGroupName = fmt.Sprintf("%s-%d", podGroupName, instanceID)
kubeNs := opt.dynamoComponentDeployment.Namespace
......@@ -664,11 +614,11 @@ func (r *DynamoComponentDeploymentReconciler) generateLeaderWorkerSet(ctx contex
return nil, false, fmt.Errorf("generateLeaderWorkerSet: instanceID cannot be negative, got %d", instanceID)
}
kubeName := r.getKubeName(opt.dynamoComponentDeployment, opt.dynamoComponent, opt.isStealingTrafficDebugModeEnabled)
kubeName := r.getKubeName(opt.dynamoComponentDeployment, opt.isStealingTrafficDebugModeEnabled)
kubeName = fmt.Sprintf("%s-%d", kubeName, instanceID)
kubeNs := opt.dynamoComponentDeployment.Namespace
labels := r.getKubeLabels(opt.dynamoComponentDeployment, opt.dynamoComponent)
labels := r.getKubeLabels(opt.dynamoComponentDeployment)
if labels == nil {
labels = make(map[string]string)
......@@ -885,7 +835,6 @@ func (r *DynamoComponentDeploymentReconciler) createOrUpdateOrDeleteDeployments(
modified, depl, err = commonController.SyncResource(ctx, r, opt.dynamoComponentDeployment, func(ctx context.Context) (*appsv1.Deployment, bool, error) {
return r.generateDeployment(ctx, generateResourceOption{
dynamoComponentDeployment: opt.dynamoComponentDeployment,
dynamoComponent: opt.dynamoComponent,
isStealingTrafficDebugModeEnabled: false,
containsStealingTrafficDebugModeEnabled: containsStealingTrafficDebugModeEnabled,
})
......@@ -898,7 +847,6 @@ func (r *DynamoComponentDeploymentReconciler) createOrUpdateOrDeleteDeployments(
modified2, _, err := commonController.SyncResource(ctx, r, opt.dynamoComponentDeployment, func(ctx context.Context) (*appsv1.Deployment, bool, error) {
return r.generateDeployment(ctx, generateResourceOption{
dynamoComponentDeployment: opt.dynamoComponentDeployment,
dynamoComponent: opt.dynamoComponent,
isStealingTrafficDebugModeEnabled: true,
containsStealingTrafficDebugModeEnabled: containsStealingTrafficDebugModeEnabled,
})
......@@ -956,7 +904,6 @@ func (r *DynamoComponentDeploymentReconciler) createOrUpdateOrDeleteServices(ctx
modified, _, err = commonController.SyncResource(ctx, r, opt.dynamoComponentDeployment, func(ctx context.Context) (*corev1.Service, bool, error) {
return r.generateService(generateResourceOption{
dynamoComponentDeployment: opt.dynamoComponentDeployment,
dynamoComponent: opt.dynamoComponent,
isStealingTrafficDebugModeEnabled: false,
isDebugPodReceiveProductionTraffic: isDebugPodReceiveProductionTrafficEnabled,
containsStealingTrafficDebugModeEnabled: containsStealingTrafficDebugModeEnabled,
......@@ -971,7 +918,6 @@ func (r *DynamoComponentDeploymentReconciler) createOrUpdateOrDeleteServices(ctx
modified_, _, err := commonController.SyncResource(ctx, r, opt.dynamoComponentDeployment, func(ctx context.Context) (*corev1.Service, bool, error) {
return r.generateService(generateResourceOption{
dynamoComponentDeployment: opt.dynamoComponentDeployment,
dynamoComponent: opt.dynamoComponent,
isStealingTrafficDebugModeEnabled: false,
isDebugPodReceiveProductionTraffic: isDebugPodReceiveProductionTrafficEnabled,
containsStealingTrafficDebugModeEnabled: containsStealingTrafficDebugModeEnabled,
......@@ -986,7 +932,6 @@ func (r *DynamoComponentDeploymentReconciler) createOrUpdateOrDeleteServices(ctx
modified_, _, err = commonController.SyncResource(ctx, r, opt.dynamoComponentDeployment, func(ctx context.Context) (*corev1.Service, bool, error) {
return r.generateService(generateResourceOption{
dynamoComponentDeployment: opt.dynamoComponentDeployment,
dynamoComponent: opt.dynamoComponent,
isStealingTrafficDebugModeEnabled: true,
isDebugPodReceiveProductionTraffic: isDebugPodReceiveProductionTrafficEnabled,
containsStealingTrafficDebugModeEnabled: containsStealingTrafficDebugModeEnabled,
......@@ -1122,14 +1067,14 @@ func (r *DynamoComponentDeploymentReconciler) generateVirtualService(ctx context
return vs, false, nil
}
func (r *DynamoComponentDeploymentReconciler) getKubeName(dynamoComponentDeployment *v1alpha1.DynamoComponentDeployment, _ *v1alpha1.DynamoComponent, debug bool) string {
func (r *DynamoComponentDeploymentReconciler) getKubeName(dynamoComponentDeployment *v1alpha1.DynamoComponentDeployment, debug bool) string {
if debug {
return fmt.Sprintf("%s-d", dynamoComponentDeployment.Name)
}
return dynamoComponentDeployment.Name
}
func (r *DynamoComponentDeploymentReconciler) getServiceName(dynamoComponentDeployment *v1alpha1.DynamoComponentDeployment, _ *v1alpha1.DynamoComponent, debug bool) string {
func (r *DynamoComponentDeploymentReconciler) getServiceName(dynamoComponentDeployment *v1alpha1.DynamoComponentDeployment, debug bool) string {
var kubeName string
if debug {
kubeName = fmt.Sprintf("%s-d", dynamoComponentDeployment.Name)
......@@ -1139,28 +1084,19 @@ func (r *DynamoComponentDeploymentReconciler) getServiceName(dynamoComponentDepl
return kubeName
}
func (r *DynamoComponentDeploymentReconciler) getGenericServiceName(dynamoComponentDeployment *v1alpha1.DynamoComponentDeployment, dynamoComponent *v1alpha1.DynamoComponent) string {
return r.getKubeName(dynamoComponentDeployment, dynamoComponent, false)
func (r *DynamoComponentDeploymentReconciler) getGenericServiceName(dynamoComponentDeployment *v1alpha1.DynamoComponentDeployment) string {
return r.getKubeName(dynamoComponentDeployment, false)
}
func (r *DynamoComponentDeploymentReconciler) getKubeLabels(dynamoComponentDeployment *v1alpha1.DynamoComponentDeployment, dynamoComponent *v1alpha1.DynamoComponent) map[string]string {
labels := map[string]string{
commonconsts.KubeLabelDynamoComponent: dynamoComponent.Name,
}
func (r *DynamoComponentDeploymentReconciler) getKubeLabels(dynamoComponentDeployment *v1alpha1.DynamoComponentDeployment) map[string]string {
if dynamoComponentDeployment != nil && dynamoComponentDeployment.Labels != nil {
if v, ok := dynamoComponentDeployment.Labels[commonconsts.KubeLabelDynamoComponent]; ok && v != "" {
labels[commonconsts.KubeLabelDynamoComponentType] = v
}
return dynamoComponentDeployment.Labels
}
return labels
return map[string]string{}
}
func (r *DynamoComponentDeploymentReconciler) getKubeAnnotations(dynamoComponentDeployment *v1alpha1.DynamoComponentDeployment, dynamoComponent *v1alpha1.DynamoComponent) map[string]string {
dynamoComponentRepositoryName, dynamoComponentVersion := getDynamoComponentRepositoryNameAndDynamoComponentVersion(dynamoComponent)
annotations := map[string]string{
commonconsts.KubeAnnotationDynamoRepository: dynamoComponentRepositoryName,
commonconsts.KubeAnnotationDynamoVersion: dynamoComponentVersion,
}
func (r *DynamoComponentDeploymentReconciler) getKubeAnnotations(dynamoComponentDeployment *v1alpha1.DynamoComponentDeployment) map[string]string {
annotations := map[string]string{}
var extraAnnotations map[string]string
if dynamoComponentDeployment.Spec.ExtraPodMetadata != nil {
extraAnnotations = dynamoComponentDeployment.Spec.ExtraPodMetadata.Annotations
......@@ -1177,11 +1113,11 @@ func (r *DynamoComponentDeploymentReconciler) getKubeAnnotations(dynamoComponent
func (r *DynamoComponentDeploymentReconciler) generateDeployment(ctx context.Context, opt generateResourceOption) (kubeDeployment *appsv1.Deployment, toDelete bool, err error) {
kubeNs := opt.dynamoComponentDeployment.Namespace
labels := r.getKubeLabels(opt.dynamoComponentDeployment, opt.dynamoComponent)
labels := r.getKubeLabels(opt.dynamoComponentDeployment)
annotations := r.getKubeAnnotations(opt.dynamoComponentDeployment, opt.dynamoComponent)
annotations := r.getKubeAnnotations(opt.dynamoComponentDeployment)
kubeName := r.getKubeName(opt.dynamoComponentDeployment, opt.dynamoComponent, opt.isStealingTrafficDebugModeEnabled)
kubeName := r.getKubeName(opt.dynamoComponentDeployment, opt.isStealingTrafficDebugModeEnabled)
kubeDeployment = &appsv1.Deployment{
ObjectMeta: metav1.ObjectMeta{
......@@ -1272,7 +1208,6 @@ func (r *DynamoComponentDeploymentReconciler) generateDeployment(ctx context.Con
type generateResourceOption struct {
dynamoComponentDeployment *v1alpha1.DynamoComponentDeployment
dynamoComponent *v1alpha1.DynamoComponent
isStealingTrafficDebugModeEnabled bool
containsStealingTrafficDebugModeEnabled bool
isDebugPodReceiveProductionTraffic bool
......@@ -1281,11 +1216,11 @@ type generateResourceOption struct {
}
func (r *DynamoComponentDeploymentReconciler) generateHPA(opt generateResourceOption) (*autoscalingv2.HorizontalPodAutoscaler, bool, error) {
labels := r.getKubeLabels(opt.dynamoComponentDeployment, opt.dynamoComponent)
labels := r.getKubeLabels(opt.dynamoComponentDeployment)
annotations := r.getKubeAnnotations(opt.dynamoComponentDeployment, opt.dynamoComponent)
annotations := r.getKubeAnnotations(opt.dynamoComponentDeployment)
kubeName := r.getKubeName(opt.dynamoComponentDeployment, opt.dynamoComponent, false)
kubeName := r.getKubeName(opt.dynamoComponentDeployment, false)
kubeNs := opt.dynamoComponentDeployment.Namespace
......@@ -1337,23 +1272,17 @@ func (r *DynamoComponentDeploymentReconciler) generateHPA(opt generateResourceOp
return kubeHpa, false, nil
}
func getDynamoComponentRepositoryNameAndDynamoComponentVersion(dynamoComponent *v1alpha1.DynamoComponent) (repositoryName string, version string) {
repositoryName, _, version = xstrings.Partition(dynamoComponent.Spec.DynamoComponent, ":")
return
}
//nolint:gocyclo,nakedret
func (r *DynamoComponentDeploymentReconciler) generatePodTemplateSpec(ctx context.Context, opt generateResourceOption) (podTemplateSpec *corev1.PodTemplateSpec, err error) {
logs := log.FromContext(ctx)
podLabels := r.getKubeLabels(opt.dynamoComponentDeployment, opt.dynamoComponent)
podLabels := r.getKubeLabels(opt.dynamoComponentDeployment)
if opt.isStealingTrafficDebugModeEnabled {
podLabels[commonconsts.KubeLabelDynamoDeploymentTargetType] = DeploymentTargetTypeDebug
}
podAnnotations := make(map[string]string)
kubeName := r.getKubeName(opt.dynamoComponentDeployment, opt.dynamoComponent, opt.isStealingTrafficDebugModeEnabled)
kubeName := r.getKubeName(opt.dynamoComponentDeployment, opt.isStealingTrafficDebugModeEnabled)
containerPort := commonconsts.DynamoServicePort
......@@ -1446,33 +1375,6 @@ func (r *DynamoComponentDeploymentReconciler) generatePodTemplateSpec(ctx contex
args = append(args, "--enable-system-app")
args = append(args, "--use-default-health-checks")
// todo : remove this line when https://github.com/ai-dynamo/dynamo/issues/345 is fixed
enableDependsOption := false
if len(opt.dynamoComponentDeployment.Spec.ExternalServices) > 0 && enableDependsOption {
serviceSuffix := fmt.Sprintf("%s.svc.cluster.local:%d", opt.dynamoComponentDeployment.Namespace, containerPort)
keys := make([]string, 0, len(opt.dynamoComponentDeployment.Spec.ExternalServices))
for key := range opt.dynamoComponentDeployment.Spec.ExternalServices {
keys = append(keys, key)
}
sort.Strings(keys)
for _, key := range keys {
service := opt.dynamoComponentDeployment.Spec.ExternalServices[key]
// Check if DeploymentSelectorKey is not "name"
if service.DeploymentSelectorKey == "name" {
dependsFlag := fmt.Sprintf("--depends \"%s=http://%s.%s\"", key, service.DeploymentSelectorValue, serviceSuffix)
args = append(args, dependsFlag)
} else if service.DeploymentSelectorKey == "dynamo" {
dependsFlag := fmt.Sprintf("--depends \"%s=dynamo://%s\"", key, service.DeploymentSelectorValue)
args = append(args, dependsFlag)
} else {
return nil, errors.Errorf("DeploymentSelectorKey '%s' not supported. Only 'name' and 'dynamo' are supported", service.DeploymentSelectorKey)
}
}
}
if opt.dynamoComponentDeployment.Spec.ServiceName != "" {
args = append(args, []string{"--service-name", opt.dynamoComponentDeployment.Spec.ServiceName}...)
args = append(args, opt.dynamoComponentDeployment.Spec.DynamoTag)
......@@ -1534,9 +1436,9 @@ func (r *DynamoComponentDeploymentReconciler) generatePodTemplateSpec(ctx contex
})
}
imageName := opt.dynamoComponent.GetImage()
imageName := opt.dynamoComponentDeployment.GetImage()
if imageName == "" {
return nil, errors.Errorf("image is not ready for component %s", opt.dynamoComponent.Name)
return nil, errors.Errorf("image is not set for component %s", opt.dynamoComponentDeployment.Name)
}
var securityContext *corev1.SecurityContext
......@@ -1660,7 +1562,7 @@ func (r *DynamoComponentDeploymentReconciler) generatePodTemplateSpec(ctx contex
container.SecurityContext.RunAsUser = &[]int64{0}[0]
}
// For now only overwrite the command and args.
// Merge extraPodSpecMainContainer into container, only overriding empty fields
if opt.dynamoComponentDeployment.Spec.ExtraPodSpec != nil {
extraPodSpecMainContainer := opt.dynamoComponentDeployment.Spec.ExtraPodSpec.MainContainer
if extraPodSpecMainContainer != nil {
......@@ -1679,6 +1581,12 @@ func (r *DynamoComponentDeploymentReconciler) generatePodTemplateSpec(ctx contex
container.Args = extraPodSpecMainContainer.Args
}
}
// finally, Merge non empty fields from extraPodSpecMainContainer into container, only overriding empty fields
err := mergo.Merge(&container, extraPodSpecMainContainer)
if err != nil {
err = errors.Wrapf(err, "failed to merge extraPodSpecMainContainer into container")
return nil, err
}
}
}
......@@ -1727,12 +1635,21 @@ func (r *DynamoComponentDeploymentReconciler) generatePodTemplateSpec(ctx contex
imagePullSecrets := []corev1.LocalObjectReference{}
if opt.dynamoComponent.Spec.DockerConfigJSONSecretName != "" {
if r.DockerSecretRetriever == nil {
err = errors.New("DockerSecretRetriever is not initialized")
return
}
secretsName, err := r.DockerSecretRetriever.GetSecrets(opt.dynamoComponentDeployment.Namespace, imageName)
if err != nil {
err = errors.Wrapf(err, "failed to get secrets for component %s and image %s", opt.dynamoComponentDeployment.Name, imageName)
return
}
for _, secretName := range secretsName {
imagePullSecrets = append(imagePullSecrets, corev1.LocalObjectReference{
Name: opt.dynamoComponent.Spec.DockerConfigJSONSecretName,
Name: secretName,
})
}
imagePullSecrets = append(imagePullSecrets, opt.dynamoComponent.Spec.ImagePullSecrets...)
if len(imagePullSecrets) > 0 {
podSpec.ImagePullSecrets = imagePullSecrets
......@@ -1901,9 +1818,9 @@ func getResourcesConfig(resources *dynamoCommon.Resources) (corev1.ResourceRequi
func (r *DynamoComponentDeploymentReconciler) generateService(opt generateResourceOption) (*corev1.Service, bool, error) {
var kubeName string
if opt.isGenericService {
kubeName = r.getGenericServiceName(opt.dynamoComponentDeployment, opt.dynamoComponent)
kubeName = r.getGenericServiceName(opt.dynamoComponentDeployment)
} else {
kubeName = r.getServiceName(opt.dynamoComponentDeployment, opt.dynamoComponent, opt.isStealingTrafficDebugModeEnabled)
kubeName = r.getServiceName(opt.dynamoComponentDeployment, opt.isStealingTrafficDebugModeEnabled)
}
kubeNs := opt.dynamoComponentDeployment.Namespace
......@@ -1920,7 +1837,7 @@ func (r *DynamoComponentDeploymentReconciler) generateService(opt generateResour
return kubeService, true, nil
}
labels := r.getKubeLabels(opt.dynamoComponentDeployment, opt.dynamoComponent)
labels := r.getKubeLabels(opt.dynamoComponentDeployment)
selector := make(map[string]string)
......@@ -1954,7 +1871,7 @@ func (r *DynamoComponentDeploymentReconciler) generateService(opt generateResour
},
}
annotations := r.getKubeAnnotations(opt.dynamoComponentDeployment, opt.dynamoComponent)
annotations := r.getKubeAnnotations(opt.dynamoComponentDeployment)
kubeService.ObjectMeta.Annotations = annotations
kubeService.ObjectMeta.Labels = labels
......
......@@ -25,6 +25,7 @@ import (
"testing"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/common"
dynamoCommon "github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/common"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
commonconsts "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/controller_common"
......@@ -535,12 +536,6 @@ func TestDynamoComponentDeploymentReconciler_generateVolcanoPodGroup(t *testing.
},
},
},
dynamoComponent: &v1alpha1.DynamoComponent{
ObjectMeta: metav1.ObjectMeta{
Name: "service1",
Namespace: "default",
},
},
instanceID: ptr.To(5),
},
},
......@@ -580,12 +575,6 @@ func TestDynamoComponentDeploymentReconciler_generateVolcanoPodGroup(t *testing.
},
},
},
dynamoComponent: &v1alpha1.DynamoComponent{
ObjectMeta: metav1.ObjectMeta{
Name: "service-missing-lws-size",
Namespace: "default",
},
},
instanceID: ptr.To(0),
},
},
......@@ -614,12 +603,6 @@ func TestDynamoComponentDeploymentReconciler_generateVolcanoPodGroup(t *testing.
},
},
},
dynamoComponent: &v1alpha1.DynamoComponent{
ObjectMeta: metav1.ObjectMeta{
Name: "service-invalid-lws-size-non-int",
Namespace: "default",
},
},
instanceID: ptr.To(1),
},
},
......@@ -648,12 +631,6 @@ func TestDynamoComponentDeploymentReconciler_generateVolcanoPodGroup(t *testing.
},
},
},
dynamoComponent: &v1alpha1.DynamoComponent{
ObjectMeta: metav1.ObjectMeta{
Name: "service-invalid-lws-size-zero",
Namespace: "default",
},
},
instanceID: ptr.To(2),
},
},
......@@ -682,12 +659,6 @@ func TestDynamoComponentDeploymentReconciler_generateVolcanoPodGroup(t *testing.
},
},
},
dynamoComponent: &v1alpha1.DynamoComponent{
ObjectMeta: metav1.ObjectMeta{
Name: "service-invalid-lws-size-negative",
Namespace: "default",
},
},
instanceID: ptr.To(3),
},
},
......@@ -716,12 +687,6 @@ func TestDynamoComponentDeploymentReconciler_generateVolcanoPodGroup(t *testing.
},
},
},
dynamoComponent: &v1alpha1.DynamoComponent{
ObjectMeta: metav1.ObjectMeta{
Name: "service-valid-lws-size-one",
Namespace: "default",
},
},
instanceID: ptr.To(4),
},
},
......@@ -750,12 +715,6 @@ func TestDynamoComponentDeploymentReconciler_generateVolcanoPodGroup(t *testing.
},
},
},
dynamoComponent: &v1alpha1.DynamoComponent{
ObjectMeta: metav1.ObjectMeta{
Name: "service-nil-instanceid",
Namespace: "default",
},
},
instanceID: nil,
},
},
......@@ -784,12 +743,6 @@ func TestDynamoComponentDeploymentReconciler_generateVolcanoPodGroup(t *testing.
},
},
},
dynamoComponent: &v1alpha1.DynamoComponent{
ObjectMeta: metav1.ObjectMeta{
Name: "service-negative-instanceid",
Namespace: "default",
},
},
instanceID: ptr.To(-1),
},
},
......@@ -821,6 +774,14 @@ func TestDynamoComponentDeploymentReconciler_generateVolcanoPodGroup(t *testing.
}
}
type mockDockerSecretRetriever struct {
GetSecretsFunc func(namespace, imageName string) ([]string, error)
}
func (m *mockDockerSecretRetriever) GetSecrets(namespace, imageName string) ([]string, error) {
return m.GetSecretsFunc(namespace, imageName)
}
func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.T) {
var limit = ptr.To(resource.MustParse("250Mi"))
limit.SetMilli(ptr.To(resource.MustParse("1Gi")).MilliValue() / 2)
......@@ -832,6 +793,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
EtcdAddr string
EtcdStorage etcdStorage
UseVirtualService bool
DockerSecretRetriever *mockDockerSecretRetriever
}
type args struct {
ctx context.Context
......@@ -853,6 +815,11 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
fields: fields{
Recorder: record.NewFakeRecorder(100),
Config: controller_common.Config{}, // Provide default or test-specific config
DockerSecretRetriever: &mockDockerSecretRetriever{
GetSecretsFunc: func(namespace, imageName string) ([]string, error) {
return []string{}, nil
},
},
},
args: args{
ctx: context.Background(),
......@@ -877,12 +844,13 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
GPU: "1",
},
},
ExtraPodSpec: &dynamoCommon.ExtraPodSpec{
MainContainer: &corev1.Container{
Image: "test-image:latest",
},
},
},
},
dynamoComponent: &v1alpha1.DynamoComponent{
ObjectMeta: metav1.ObjectMeta{Name: "test-lws-component", Namespace: "default"},
Spec: v1alpha1.DynamoComponentSpec{Image: "test-image:latest"},
},
instanceID: ptr.To(0),
},
......@@ -904,7 +872,6 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
Name: "test-lws-deploy-0",
Namespace: "default",
Labels: map[string]string{
commonconsts.KubeLabelDynamoComponent: "test-lws-component",
"instance-id": "0",
},
},
......@@ -918,7 +885,6 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
Labels: map[string]string{
"instance-id": "0",
"role": "leader",
commonconsts.KubeLabelDynamoComponent: "test-lws-component",
},
Annotations: map[string]string{
"scheduling.k8s.io/group-name": "test-lws-deploy-0",
......@@ -971,7 +937,6 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
Labels: map[string]string{
"instance-id": "0",
"role": "worker",
commonconsts.KubeLabelDynamoComponent: "test-lws-component",
},
Annotations: map[string]string{
"scheduling.k8s.io/group-name": "test-lws-deploy-0",
......@@ -1027,12 +992,13 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
GPU: "1",
},
},
ExtraPodSpec: &dynamoCommon.ExtraPodSpec{
MainContainer: &corev1.Container{
Image: "test-image:latest",
},
},
},
},
dynamoComponent: &v1alpha1.DynamoComponent{
ObjectMeta: metav1.ObjectMeta{Namespace: "default"},
Spec: v1alpha1.DynamoComponentSpec{Image: "test-image:latest"},
},
instanceID: nil,
},
......@@ -1067,12 +1033,13 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
GPU: "1",
},
},
ExtraPodSpec: &dynamoCommon.ExtraPodSpec{
MainContainer: &corev1.Container{
Image: "", // Image is missing, will cause error in generatePodTemplateSpec
},
},
},
},
dynamoComponent: &v1alpha1.DynamoComponent{ // Image is missing, will cause error in generatePodTemplateSpec
ObjectMeta: metav1.ObjectMeta{Name: "test-lws-component-leader-err", Namespace: "default"},
Spec: v1alpha1.DynamoComponentSpec{Image: ""},
},
instanceID: ptr.To(0),
},
......@@ -1115,9 +1082,6 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
if tt.args.opt.dynamoComponentDeployment != nil {
initialClientObjects = append(initialClientObjects, tt.args.opt.dynamoComponentDeployment)
}
if tt.args.opt.dynamoComponent != nil {
initialClientObjects = append(initialClientObjects, tt.args.opt.dynamoComponent)
}
if len(tt.args.mockServiceAccounts) > 0 {
initialClientObjects = append(initialClientObjects, tt.args.mockServiceAccounts...)
}
......@@ -1135,6 +1099,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
EtcdAddr: tt.fields.EtcdAddr,
EtcdStorage: tt.fields.EtcdStorage,
UseVirtualService: tt.fields.UseVirtualService,
DockerSecretRetriever: tt.fields.DockerSecretRetriever,
// Scheme: s, // Pass scheme if reconciler uses it directly, often client uses it
}
got, got1, err := r.generateLeaderWorkerSet(tt.args.ctx, tt.args.opt)
......
......@@ -117,18 +117,10 @@ func (r *DynamoGraphDeploymentReconciler) Reconcile(ctx context.Context, req ctr
return ctrl.Result{}, nil
}
// fetch the dynamoGraphConfig
dynamoGraphConfig, err := dynamo.GetDynamoGraphConfig(ctx, dynamoDeployment, r.Recorder)
if err != nil {
logger.Error(err, "failed to get the DynamoGraphConfig")
reason = "failed_to_get_the_DynamoGraphConfig"
return ctrl.Result{}, err
}
// generate the dynamoComponentsDeployments from the config
dynamoComponentsDeployments, err := dynamo.GenerateDynamoComponentsDeployments(ctx, dynamoDeployment, dynamoGraphConfig, r.generateDefaultIngressSpec(dynamoDeployment))
dynamoComponentsDeployments, err := dynamo.GenerateDynamoComponentsDeployments(ctx, dynamoDeployment, r.generateDefaultIngressSpec(dynamoDeployment))
if err != nil {
logger.Error(err, "failed to generate the DynamoComponentsDeployments")
logger.Error(err, "failed to generate the DynamoComponentsDeployments and DynamoComponents")
reason = "failed_to_generate_the_DynamoComponentsDeployments"
return ctrl.Result{}, err
}
......@@ -140,33 +132,6 @@ func (r *DynamoGraphDeploymentReconciler) Reconcile(ctx context.Context, req ctr
}
}
// reconcile the dynamoComponent
// for now we use the same component for all the services and we differentiate them by the service name when launching the component
dynamoComponent := &nvidiacomv1alpha1.DynamoComponent{
ObjectMeta: metav1.ObjectMeta{
Name: getK8sName(dynamoDeployment.Spec.DynamoGraph),
Namespace: dynamoDeployment.Namespace,
},
Spec: nvidiacomv1alpha1.DynamoComponentSpec{
DynamoComponent: dynamoDeployment.Spec.DynamoGraph,
},
}
_, dynamoComponent, err = commonController.SyncResource(ctx, r, dynamoDeployment, func(ctx context.Context) (*nvidiacomv1alpha1.DynamoComponent, bool, error) {
return dynamoComponent, false, nil
})
if err != nil {
logger.Error(err, "failed to sync the DynamoComponent")
reason = "failed_to_sync_the_DynamoComponent"
return ctrl.Result{}, err
}
if !dynamoComponent.IsReady() {
logger.Info("The DynamoComponent is not ready")
reason = "dynamoComponent_is_not_ready"
message = "The DynamoComponent is not ready"
readyStatus = metav1.ConditionFalse
return ctrl.Result{}, nil
}
notReadyDeployments := []string{}
// reconcile the dynamoComponentsDeployments
for serviceName, dynamoComponentDeployment := range dynamoComponentsDeployments {
......@@ -247,13 +212,6 @@ func (r *DynamoGraphDeploymentReconciler) SetupWithManager(mgr ctrl.Manager) err
UpdateFunc: func(de event.UpdateEvent) bool { return true },
GenericFunc: func(ge event.GenericEvent) bool { return true },
})).
Owns(&nvidiacomv1alpha1.DynamoComponent{}, builder.WithPredicates(predicate.Funcs{
// ignore creation cause we don't want to be called again after we create the deployment
CreateFunc: func(ce event.CreateEvent) bool { return false },
DeleteFunc: func(de event.DeleteEvent) bool { return true },
UpdateFunc: func(de event.UpdateEvent) bool { return true },
GenericFunc: func(ge event.GenericEvent) bool { return true },
})).
WithEventFilter(commonController.EphemeralDeploymentEventFilter(r.Config)).
Complete(r)
}
......
......@@ -18,36 +18,16 @@
package dynamo
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"strconv"
"strings"
"dario.cat/mergo"
"emperror.dev/errors"
apiStoreClient "github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/api_store_client"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/common"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/schemas"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
commonconfig "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/config"
commonconsts "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
"github.com/huandu/xstrings"
corev1 "k8s.io/api/core/v1"
k8serrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/runtime"
"sigs.k8s.io/controller-runtime/pkg/log"
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/archive"
"github.com/goccy/go-yaml"
)
const (
ComponentTypePlanner = "planner"
PlannerServiceAccountName = "planner-serviceaccount"
)
// ServiceConfig represents the YAML configuration structure for a service
......@@ -117,147 +97,12 @@ func GetDefaultDynamoNamespace(ctx context.Context, dynamoDeployment *v1alpha1.D
return fmt.Sprintf("dynamo-%s", dynamoDeployment.Name)
}
func RetrieveDynamoGraphDownloadURL(ctx context.Context, dynamoDeployment *v1alpha1.DynamoGraphDeployment, recorder EventRecorder) (*string, error) {
dynamoGraphDownloadURL := ""
var dynamoComponent *schemas.DynamoComponent
dynamoComponentRepositoryName, _, dynamoComponentVersion := xstrings.Partition(dynamoDeployment.Spec.DynamoGraph, ":")
var err error
var apiStoreClient *apiStoreClient.ApiStoreClient
var apiStoreConf *commonconfig.ApiStoreConfig
apiStoreClient, apiStoreConf, err = GetApiStoreClient(ctx)
if err != nil {
err = errors.Wrap(err, "get api store client")
return nil, err
}
if apiStoreClient == nil || apiStoreConf == nil {
err = errors.New("can't get api store client, please check api store configuration")
return nil, err
}
recorder.Eventf(dynamoDeployment, corev1.EventTypeNormal, "GenerateImageBuilderPod", "Getting dynamo graph %s from api store service", dynamoDeployment.Spec.DynamoGraph)
dynamoComponent, err = apiStoreClient.GetDynamoComponent(ctx, dynamoComponentRepositoryName, dynamoComponentVersion)
if err != nil {
err = errors.Wrap(err, "get dynamo component")
return nil, err
}
recorder.Eventf(dynamoDeployment, corev1.EventTypeNormal, "GenerateImageBuilderPod", "Got dynamo graph %s from api store service", dynamoDeployment.Spec.DynamoGraph)
if dynamoComponent.TransmissionStrategy != nil && *dynamoComponent.TransmissionStrategy == schemas.TransmissionStrategyPresignedURL {
var dynamoComponent_ *schemas.DynamoComponent
recorder.Eventf(dynamoDeployment, corev1.EventTypeNormal, "GenerateImageBuilderPod", "Getting presigned url for dynamo graph %s from api store service", dynamoDeployment.Spec.DynamoGraph)
dynamoComponent_, err = apiStoreClient.PresignDynamoComponentDownloadURL(ctx, dynamoComponentRepositoryName, dynamoComponentVersion)
if err != nil {
err = errors.Wrap(err, "presign dynamo component download url")
return nil, err
}
recorder.Eventf(dynamoDeployment, corev1.EventTypeNormal, "GenerateImageBuilderPod", "Got presigned url for dynamo graph %s from api store service", dynamoDeployment.Spec.DynamoGraph)
dynamoGraphDownloadURL = dynamoComponent_.PresignedDownloadUrl
} else {
dynamoGraphDownloadURL = fmt.Sprintf("%s/api/v1/dynamo_components/%s/versions/%s/download", apiStoreConf.Endpoint, dynamoComponentRepositoryName, dynamoComponentVersion)
}
return &dynamoGraphDownloadURL, nil
}
// ServicesConfig represents the top-level YAML structure of a dynamoComponent yaml file stored in a dynamoComponent tar file
type DynamoGraphConfig struct {
DynamoTag string `yaml:"service"`
Services []ServiceConfig `yaml:"services"`
EntryService string `yaml:"entry_service"`
}
type EventRecorder interface {
Eventf(obj runtime.Object, eventtype string, reason string, message string, args ...interface{})
}
func RetrieveDynamoGraphConfigurationFile(ctx context.Context, url string) (*bytes.Buffer, error) {
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return nil, err
}
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
return nil, err
}
defer func() {
if err := resp.Body.Close(); err != nil {
logger := log.FromContext(ctx)
logger.Error(err, "error closing response body")
}
}()
// Read the tar file into memory
tarData, err := io.ReadAll(resp.Body)
if err != nil {
return nil, err
}
// Extract the YAML file
yamlFileName := "dynamo.yaml"
yamlContent, err := archive.ExtractFileFromTar(tarData, yamlFileName)
if err != nil {
return nil, err
}
return yamlContent, nil
}
func GetApiStoreClient(ctx context.Context) (*apiStoreClient.ApiStoreClient, *commonconfig.ApiStoreConfig, error) {
apiStoreConf, err := commonconfig.GetApiStoreConfig(ctx)
isNotFound := k8serrors.IsNotFound(err)
if err != nil && !isNotFound {
err = errors.Wrap(err, "get api store config")
return nil, nil, err
}
if isNotFound {
return nil, nil, errors.New("endpoint config not found")
}
if apiStoreConf.Endpoint == "" {
return nil, nil, errors.New("endpoint is empty")
}
if apiStoreConf.ClusterName == "" {
apiStoreConf.ClusterName = "default"
}
apiStoreClient := apiStoreClient.NewApiStoreClient(apiStoreConf.Endpoint)
return apiStoreClient, apiStoreConf, nil
}
func ParseDynamoGraphConfig(ctx context.Context, yamlContent *bytes.Buffer) (*DynamoGraphConfig, error) {
var config DynamoGraphConfig
logger := log.FromContext(ctx)
logger.Info("trying to parse dynamo graph config", "yamlContent", yamlContent.String())
err := yaml.Unmarshal(yamlContent.Bytes(), &config)
return &config, err
}
func ParseDynDeploymentConfig(ctx context.Context, jsonContent []byte) (DynDeploymentConfig, error) {
var config DynDeploymentConfig
err := json.Unmarshal(jsonContent, &config)
return config, err
}
func GetDynamoGraphConfig(ctx context.Context, dynamoDeployment *v1alpha1.DynamoGraphDeployment, recorder EventRecorder) (*DynamoGraphConfig, error) {
dynamoGraphDownloadURL, err := RetrieveDynamoGraphDownloadURL(ctx, dynamoDeployment, recorder)
if err != nil {
return nil, err
}
yamlContent, err := RetrieveDynamoGraphConfigurationFile(ctx, *dynamoGraphDownloadURL)
if err != nil {
return nil, err
}
return ParseDynamoGraphConfig(ctx, yamlContent)
}
func SetLwsAnnotations(serviceArgs *ServiceArgs, deployment *v1alpha1.DynamoComponentDeployment) error {
if serviceArgs.Resources != nil &&
serviceArgs.Resources.GPU != nil && *serviceArgs.Resources.GPU != "" && *serviceArgs.Resources.GPU != "0" &&
......@@ -284,104 +129,39 @@ func SetLwsAnnotations(serviceArgs *ServiceArgs, deployment *v1alpha1.DynamoComp
}
// GenerateDynamoComponentsDeployments generates a map of DynamoComponentDeployments from a DynamoGraphConfig
func GenerateDynamoComponentsDeployments(ctx context.Context, parentDynamoGraphDeployment *v1alpha1.DynamoGraphDeployment, config *DynamoGraphConfig, ingressSpec *v1alpha1.IngressSpec) (map[string]*v1alpha1.DynamoComponentDeployment, error) {
dynamoServices := make(map[string]string)
func GenerateDynamoComponentsDeployments(ctx context.Context, parentDynamoGraphDeployment *v1alpha1.DynamoGraphDeployment, ingressSpec *v1alpha1.IngressSpec) (map[string]*v1alpha1.DynamoComponentDeployment, error) {
deployments := make(map[string]*v1alpha1.DynamoComponentDeployment)
graphDynamoNamespace := ""
for _, service := range config.Services {
for componentName, component := range parentDynamoGraphDeployment.Spec.Services {
deployment := &v1alpha1.DynamoComponentDeployment{}
deployment.Name = fmt.Sprintf("%s-%s", parentDynamoGraphDeployment.Name, strings.ToLower(service.Name))
deployment.Spec.DynamoComponentDeploymentSharedSpec = component.DynamoComponentDeploymentSharedSpec
deployment.Name = getDynamoComponentName(parentDynamoGraphDeployment, componentName)
deployment.Namespace = parentDynamoGraphDeployment.Namespace
deployment.Spec.DynamoTag = config.DynamoTag
deployment.Spec.DynamoComponent = parentDynamoGraphDeployment.Spec.DynamoGraph
deployment.Spec.ServiceName = service.Name
deployment.Spec.Replicas = service.Config.Workers
deployment.Spec.ServiceName = componentName
dynamoNamespace := GetDefaultDynamoNamespace(ctx, parentDynamoGraphDeployment)
if component.DynamoNamespace != nil && *component.DynamoNamespace != "" {
dynamoNamespace = *component.DynamoNamespace
}
if graphDynamoNamespace != "" && graphDynamoNamespace != dynamoNamespace {
return nil, fmt.Errorf("namespace mismatch for component %s: graph uses namespace %s but component specifies %s", componentName, graphDynamoNamespace, dynamoNamespace)
}
graphDynamoNamespace = dynamoNamespace
deployment.Spec.DynamoNamespace = &dynamoNamespace
labels := make(map[string]string)
// add the labels in the spec in order to label all sub-resources
deployment.Spec.Labels = labels
// and add the labels to the deployment itself
deployment.Labels = labels
labels[commonconsts.KubeLabelDynamoComponent] = service.Name
if service.Config.Dynamo != nil && service.Config.Dynamo.Enabled {
dynamoNamespace := service.Config.Dynamo.Namespace
if dynamoNamespace == "" {
// if no namespace is specified, use the default namespace
dynamoNamespace = GetDefaultDynamoNamespace(ctx, parentDynamoGraphDeployment)
}
deployment.Spec.DynamoNamespace = &dynamoNamespace
dynamoServices[service.Name] = fmt.Sprintf("%s/%s", service.Config.Dynamo.Name, dynamoNamespace)
labels[commonconsts.KubeLabelDynamoComponent] = componentName
labels[commonconsts.KubeLabelDynamoNamespace] = dynamoNamespace
// we check that all dynamo components are in the same namespace
// this is needed for the planner to work correctly
// this check will be removed when the global planner will be implemented
if graphDynamoNamespace != "" && graphDynamoNamespace != dynamoNamespace {
return nil, fmt.Errorf("different namespaces for the same graph, expected %s, got %s", graphDynamoNamespace, dynamoNamespace)
}
graphDynamoNamespace = dynamoNamespace
if service.Config.Dynamo.ComponentType == ComponentTypePlanner {
deployment.Spec.ExtraPodSpec = &common.ExtraPodSpec{
ServiceAccountName: PlannerServiceAccountName,
}
if component.ComponentType == commonconsts.ComponentTypePlanner {
if deployment.Spec.ExtraPodSpec == nil {
deployment.Spec.ExtraPodSpec = &common.ExtraPodSpec{}
}
deployment.Spec.ExtraPodSpec.ServiceAccountName = commonconsts.PlannerServiceAccountName
}
// Check http_exposed independently
if config.EntryService == service.Name && service.Config.HttpExposed {
if deployment.IsMainComponent() && ingressSpec != nil {
deployment.Spec.Ingress = *ingressSpec
// TODO (maybe): add paths to IngressSpec
}
if service.Config.Resources != nil {
deployment.Spec.Resources = &common.Resources{
Requests: &common.ResourceItem{
Custom: service.Config.Resources.Custom,
},
Limits: &common.ResourceItem{
Custom: service.Config.Resources.Custom,
},
}
if service.Config.Resources.CPU != nil {
deployment.Spec.Resources.Requests.CPU = *service.Config.Resources.CPU
deployment.Spec.Resources.Limits.CPU = *service.Config.Resources.CPU
}
if service.Config.Resources.Memory != nil {
deployment.Spec.Resources.Requests.Memory = *service.Config.Resources.Memory
deployment.Spec.Resources.Limits.Memory = *service.Config.Resources.Memory
}
if service.Config.Resources.GPU != nil {
deployment.Spec.Resources.Requests.GPU = *service.Config.Resources.GPU
deployment.Spec.Resources.Limits.GPU = *service.Config.Resources.GPU
}
serviceArgs := ServiceArgs{
Resources: service.Config.Resources,
TotalGpus: service.Config.TotalGpus,
Workers: service.Config.Workers,
}
if err := SetLwsAnnotations(&serviceArgs, deployment); err != nil {
return nil, err
}
}
deployment.Spec.Autoscaling = &v1alpha1.Autoscaling{
Enabled: false,
}
if service.Config.Autoscaling != nil {
deployment.Spec.Autoscaling.Enabled = true
deployment.Spec.Autoscaling.MinReplicas = service.Config.Autoscaling.MinReplicas
deployment.Spec.Autoscaling.MaxReplicas = service.Config.Autoscaling.MaxReplicas
}
// Override properties from the ExtraPodSpec (i.e. command and args) if provided.
if err := mergeExtraPodSpec(deployment, &service.Config); err != nil {
return nil, err
}
// override the component config with the component config that is in the parent deployment
if configOverride, ok := parentDynamoGraphDeployment.Spec.Services[service.Name]; ok {
err := mergo.Merge(&deployment.Spec.DynamoComponentDeploymentSharedSpec, configOverride.DynamoComponentDeploymentSharedSpec, mergo.WithOverride)
if err != nil {
return nil, err
}
}
// merge the envs from the parent deployment with the envs from the service
if len(parentDynamoGraphDeployment.Spec.Envs) > 0 {
......@@ -397,37 +177,10 @@ func GenerateDynamoComponentsDeployments(ctx context.Context, parentDynamoGraphD
}
// we only override the replicas if it is not set in the CRD.
// replicas, if set in the CRD must always be the source of truth.
if parentSpec, ok := parentDynamoGraphDeployment.Spec.Services[service.Name]; ok {
if parentSpec.DynamoComponentDeploymentSharedSpec.Replicas != nil {
deployment.Spec.Replicas = parentSpec.DynamoComponentDeploymentSharedSpec.Replicas
}
}
deployments[service.Name] = deployment
}
for _, service := range config.Services {
deployment := deployments[service.Name]
// generate external services
for _, dependency := range service.Dependencies {
dependentServiceName := dependency["service"]
if deployment.Spec.ExternalServices == nil {
deployment.Spec.ExternalServices = make(map[string]v1alpha1.ExternalService)
}
dependencyDeployment := deployments[dependentServiceName]
if dependencyDeployment == nil {
return nil, fmt.Errorf("dependency %s not found", dependentServiceName)
}
if dynamoService, ok := dynamoServices[dependentServiceName]; ok {
deployment.Spec.ExternalServices[dependentServiceName] = v1alpha1.ExternalService{
DeploymentSelectorKey: "dynamo",
DeploymentSelectorValue: dynamoService,
}
} else {
deployment.Spec.ExternalServices[dependentServiceName] = v1alpha1.ExternalService{
DeploymentSelectorKey: "name",
DeploymentSelectorValue: dependentServiceName,
}
}
if component.Replicas != nil {
deployment.Spec.Replicas = component.Replicas
}
deployments[componentName] = deployment
}
return deployments, nil
}
......@@ -536,21 +289,6 @@ func mergeEnvs(common, specific []corev1.EnvVar) []corev1.EnvVar {
return merged
}
// mergeExtraPodSpec merges the ExtraPodSpec from service config into the deployment spec
func mergeExtraPodSpec(deployment *v1alpha1.DynamoComponentDeployment, serviceConfig *Config) error {
if serviceConfig.ExtraPodSpec != nil && serviceConfig.ExtraPodSpec.MainContainer != nil {
if deployment.Spec.DynamoComponentDeploymentSharedSpec.ExtraPodSpec == nil {
deployment.Spec.DynamoComponentDeploymentSharedSpec.ExtraPodSpec = new(common.ExtraPodSpec)
}
err := mergo.Merge(
deployment.Spec.DynamoComponentDeploymentSharedSpec.ExtraPodSpec,
serviceConfig.ExtraPodSpec,
mergo.WithOverride,
mergo.WithOverwriteWithEmptyValue,
)
if err != nil {
return err
}
}
return nil
func getDynamoComponentName(dynamoDeployment *v1alpha1.DynamoGraphDeployment, component string) string {
return fmt.Sprintf("%s-%s", dynamoDeployment.Name, strings.ToLower(component))
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment