"lib/bindings/python/vscode:/vscode.git/clone" did not exist on "7d5d6f8c086ac2fc3094cb05d240e8dc71ad4f7d"
Commit 183941fa authored by julienmancuso's avatar julienmancuso Committed by GitHub
Browse files

feat: deploy and use buildkit to build dynamo images (#450)

parent 5fe1f8a2
...@@ -70,7 +70,7 @@ dynamo-operator: ...@@ -70,7 +70,7 @@ dynamo-operator:
secure: true secure: true
bentoRepositoryName: yatai-bentos bentoRepositoryName: yatai-bentos
bentoImageBuildEngine: kaniko bentoImageBuildEngine: buildkit
addNamespacePrefixToImageName: false addNamespacePrefixToImageName: false
estargz: estargz:
......
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
debug = true
[history]
maxAge = 345600
maxEntries = 1000
[worker.oci]
enabled = true
gc = true
gckeepstorage = "1000GB"
[[worker.oci.gcpolicy]]
keepBytes = "200GB"
keepDuration = "168h" # 7 days
filters = [ "type==source.local", "type==exec.cachemount", "type==source.git.checkout"]
[[worker.oci.gcpolicy]]
all = false
keepDuration = "336h" # 14 days
keepBytes = "300GB"
[[worker.oci.gcpolicy]]
all = true
keepBytes = "500GB"
[registry."docker.io"]
mirrors = []
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
{{- if .Values.dynamo.bentoImageBuildEngine | eq "buildkit" }}
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
labels:
app.kubernetes.io/name: {{ include "dynamo-operator.fullname" . }}-buildkitd
name: {{ include "dynamo-operator.fullname" . }}-buildkitd
spec:
serviceName: {{ include "dynamo-operator.fullname" . }}-buildkitd
podManagementPolicy: Parallel
updateStrategy:
type: RollingUpdate
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: {{ include "dynamo-operator.fullname" . }}-buildkitd
template:
metadata:
labels:
app.kubernetes.io/name: {{ include "dynamo-operator.fullname" . }}-buildkitd
annotations:
container.apparmor.security.beta.kubernetes.io/buildkitd: unconfined
# see buildkit/docs/rootless.md for caveats of rootless mode
spec:
containers:
- name: buildkitd
image: moby/buildkit:v0.20.0-rootless
args:
- --oci-worker-no-process-sandbox
- --addr
- unix:///run/user/1000/buildkit/buildkitd.sock
- --addr
- tcp://0.0.0.0:1234
resources:
requests:
cpu: 3
memory: 8Gi
limits:
cpu: 8
memory: 30Gi
readinessProbe:
exec:
command:
- buildctl
- debug
- workers
initialDelaySeconds: 5
periodSeconds: 30
livenessProbe:
exec:
command:
- buildctl
- debug
- workers
initialDelaySeconds: 5
periodSeconds: 30
securityContext:
seccompProfile:
type: Unconfined
# To change UID/GID, you need to rebuild the image
runAsUser: 1000
runAsGroup: 1000
volumeMounts:
- mountPath: /home/user/.local/share/buildkit
name: cache
- mountPath: /home/user/.config/buildkit
name: config
readOnly: true
- mountPath: /dev/shm
name: dshm
securityContext:
fsGroup: 1000
volumes:
- name: config
configMap:
name: {{ include "dynamo-operator.fullname" . }}-buildkitd
items:
- key: buildkitd.toml
path: buildkitd.toml
- name: dshm
emptyDir:
medium: Memory
volumeClaimTemplates:
- metadata:
name: cache
spec:
accessModes:
- ReadWriteOnce
storageClassName: local-path
resources:
requests:
storage: 1000Gi
---
apiVersion: v1
kind: Service
metadata:
name: {{ include "dynamo-operator.fullname" . }}-buildkitd
labels:
app.kubernetes.io/name: {{ include "dynamo-operator.fullname" . }}-buildkitd
spec:
ports:
- name: http
port: 1234
targetPort: 1234
protocol: TCP
clusterIP: None
selector:
app.kubernetes.io/name: {{ include "dynamo-operator.fullname" . }}-buildkitd
---
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ include "dynamo-operator.fullname" . }}-buildkitd
data:
buildkitd.toml: |
{{- .Files.Get "buildkitd.toml" | nindent 4 }}
{{- end }}
\ No newline at end of file
...@@ -59,6 +59,7 @@ stringData: ...@@ -59,6 +59,7 @@ stringData:
INTERNAL_IMAGES_KANIKO: {{ .Values.dynamo.internalImages.kaniko | quote }} INTERNAL_IMAGES_KANIKO: {{ .Values.dynamo.internalImages.kaniko | quote }}
INTERNAL_IMAGES_BUILDKIT: {{ .Values.dynamo.internalImages.buildkit | quote }} INTERNAL_IMAGES_BUILDKIT: {{ .Values.dynamo.internalImages.buildkit | quote }}
INTERNAL_IMAGES_BUILDKIT_ROOTLESS: {{ .Values.dynamo.internalImages.buildkitRootless | quote }} INTERNAL_IMAGES_BUILDKIT_ROOTLESS: {{ .Values.dynamo.internalImages.buildkitRootless | quote }}
BUILDKIT_URL: tcp://{{ include "dynamo-operator.fullname" . }}-buildkitd:1234
BENTO_IMAGE_BUILD_ENGINE: {{ .Values.dynamo.bentoImageBuildEngine | quote }} BENTO_IMAGE_BUILD_ENGINE: {{ .Values.dynamo.bentoImageBuildEngine | quote }}
......
...@@ -88,7 +88,7 @@ dynamo: ...@@ -88,7 +88,7 @@ dynamo:
internalImages: internalImages:
bentoDownloader: quay.io/bentoml/bento-downloader:0.0.5 bentoDownloader: quay.io/bentoml/bento-downloader:0.0.5
kaniko: quay.io/bentoml/kaniko:debug kaniko: quay.io/bentoml/kaniko:debug
buildkit: quay.io/bentoml/buildkit:master buildkit: moby/buildkit:latest
buildkitRootless: quay.io/bentoml/buildkit:master-rootless buildkitRootless: quay.io/bentoml/buildkit:master-rootless
metricsTransformer: quay.io/bentoml/yatai-bento-metrics-transformer:0.0.4 metricsTransformer: quay.io/bentoml/yatai-bento-metrics-transformer:0.0.4
debugger: quay.io/bentoml/bento-debugger:0.0.8 debugger: quay.io/bentoml/bento-debugger:0.0.8
...@@ -110,7 +110,7 @@ dynamo: ...@@ -110,7 +110,7 @@ dynamo:
secure: true secure: true
bentoRepositoryName: yatai-bentos bentoRepositoryName: yatai-bentos
bentoImageBuildEngine: kaniko # options: kaniko, buildkit, buildkit-rootless bentoImageBuildEngine: buildkit # options: kaniko, buildkit, buildkit-rootless
addNamespacePrefixToImageName: false addNamespacePrefixToImageName: false
estargz: estargz:
......
...@@ -18,8 +18,6 @@ ...@@ -18,8 +18,6 @@
package controller package controller
import ( import (
"strings"
"github.com/ai-dynamo/dynamo/deploy/dynamo/operator/api/v1alpha1" "github.com/ai-dynamo/dynamo/deploy/dynamo/operator/api/v1alpha1"
corev1 "k8s.io/api/core/v1" corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
...@@ -78,7 +76,3 @@ func getPvcName(crd metav1.Object, defaultName *string) string { ...@@ -78,7 +76,3 @@ func getPvcName(crd metav1.Object, defaultName *string) string {
} }
return crd.GetName() return crd.GetName()
} }
func generateDynamoNimRequestName(tag string) string {
return strings.Split(tag, ":")[0]
}
...@@ -19,6 +19,7 @@ package controller ...@@ -19,6 +19,7 @@ package controller
import ( import (
"context" "context"
"strings"
"dario.cat/mergo" "dario.cat/mergo"
"emperror.dev/errors" "emperror.dev/errors"
...@@ -133,7 +134,7 @@ func (r *DynamoDeploymentReconciler) Reconcile(ctx context.Context, req ctrl.Req ...@@ -133,7 +134,7 @@ func (r *DynamoDeploymentReconciler) Reconcile(ctx context.Context, req ctrl.Req
// reconcile the dynamoNimRequest // reconcile the dynamoNimRequest
dynamoNimRequest := &nvidiacomv1alpha1.DynamoNimRequest{ dynamoNimRequest := &nvidiacomv1alpha1.DynamoNimRequest{
ObjectMeta: metav1.ObjectMeta{ ObjectMeta: metav1.ObjectMeta{
Name: generateDynamoNimRequestName(dynamoDeployment.Spec.DynamoNim), Name: strings.ReplaceAll(dynamoDeployment.Spec.DynamoNim, ":", "--"),
Namespace: dynamoDeployment.Namespace, Namespace: dynamoDeployment.Namespace,
}, },
Spec: nvidiacomv1alpha1.DynamoNimRequestSpec{ Spec: nvidiacomv1alpha1.DynamoNimRequestSpec{
......
...@@ -1784,7 +1784,7 @@ monitoring.options.insecure=true` ...@@ -1784,7 +1784,7 @@ monitoring.options.insecure=true`
args := make([]string, 0) args := make([]string, 0)
args = append(args, "uv", "run", "dynamo", "start") args = append(args, "cd", "src", "&&", "uv", "run", "dynamo", "start")
// todo : remove this line when https://github.com/ai-dynamo/dynamo/issues/345 is fixed // todo : remove this line when https://github.com/ai-dynamo/dynamo/issues/345 is fixed
enableDependsOption := false enableDependsOption := false
...@@ -1815,7 +1815,7 @@ monitoring.options.insecure=true` ...@@ -1815,7 +1815,7 @@ monitoring.options.insecure=true`
if opt.dynamoNimDeployment.Spec.ServiceName != "" { if opt.dynamoNimDeployment.Spec.ServiceName != "" {
args = append(args, []string{"--service-name", opt.dynamoNimDeployment.Spec.ServiceName}...) args = append(args, []string{"--service-name", opt.dynamoNimDeployment.Spec.ServiceName}...)
args = append(args, "src."+opt.dynamoNimDeployment.Spec.DynamoTag) args = append(args, opt.dynamoNimDeployment.Spec.DynamoTag)
} }
yataiResources := opt.dynamoNimDeployment.Spec.Resources yataiResources := opt.dynamoNimDeployment.Spec.Resources
......
...@@ -2530,8 +2530,17 @@ echo "Done" ...@@ -2530,8 +2530,17 @@ echo "Done"
Value: strings.Join(buildkitdFlags, " "), Value: strings.Join(buildkitdFlags, " "),
}) })
} }
command = []string{"buildctl-daemonless.sh"} buildkitURL := os.Getenv("BUILDKIT_URL")
if buildkitURL == "" {
err = errors.New("BUILDKIT_URL is not set")
return
}
command = []string{
"buildctl",
}
args = []string{ args = []string{
"--addr",
buildkitURL,
"build", "build",
"--frontend", "--frontend",
"dockerfile.v0", "dockerfile.v0",
...@@ -2543,11 +2552,10 @@ echo "Done" ...@@ -2543,11 +2552,10 @@ echo "Done"
output, output,
} }
cacheRepo := os.Getenv("BUILDKIT_CACHE_REPO") cacheRepo := os.Getenv("BUILDKIT_CACHE_REPO")
if cacheRepo == "" { if cacheRepo != "" {
cacheRepo = opt.ImageInfo.DockerRegistry.BentosRepositoryURIInCluster args = append(args, "--export-cache", fmt.Sprintf("type=registry,ref=%s:buildcache,mode=max,compression=zstd,ignore-error=true", cacheRepo))
args = append(args, "--import-cache", fmt.Sprintf("type=registry,ref=%s:buildcache", cacheRepo))
} }
args = append(args, "--export-cache", fmt.Sprintf("type=registry,ref=%s:buildcache,mode=max,compression=zstd,ignore-error=true", cacheRepo))
args = append(args, "--import-cache", fmt.Sprintf("type=registry,ref=%s:buildcache", cacheRepo))
} }
var builderContainerSecurityContext *corev1.SecurityContext var builderContainerSecurityContext *corev1.SecurityContext
......
...@@ -258,7 +258,7 @@ func GenerateDynamoNIMDeployments(parentDynamoDeployment *v1alpha1.DynamoDeploym ...@@ -258,7 +258,7 @@ func GenerateDynamoNIMDeployments(parentDynamoDeployment *v1alpha1.DynamoDeploym
deployment.Name = fmt.Sprintf("%s-%s", parentDynamoDeployment.Name, strings.ToLower(service.Name)) deployment.Name = fmt.Sprintf("%s-%s", parentDynamoDeployment.Name, strings.ToLower(service.Name))
deployment.Namespace = parentDynamoDeployment.Namespace deployment.Namespace = parentDynamoDeployment.Namespace
deployment.Spec.DynamoTag = config.DynamoTag deployment.Spec.DynamoTag = config.DynamoTag
deployment.Spec.DynamoNim = strings.Split(parentDynamoDeployment.Spec.DynamoNim, ":")[0] deployment.Spec.DynamoNim = strings.ReplaceAll(parentDynamoDeployment.Spec.DynamoNim, ":", "--")
deployment.Spec.ServiceName = service.Name deployment.Spec.ServiceName = service.Name
if service.Config.Dynamo != nil && service.Config.Dynamo.Enabled { if service.Config.Dynamo != nil && service.Config.Dynamo.Enabled {
dynamoServices[service.Name] = fmt.Sprintf("%s/%s", service.Config.Dynamo.Name, service.Config.Dynamo.Namespace) dynamoServices[service.Name] = fmt.Sprintf("%s/%s", service.Config.Dynamo.Name, service.Config.Dynamo.Namespace)
......
...@@ -92,7 +92,7 @@ func TestGenerateDynamoNIMDeployments(t *testing.T) { ...@@ -92,7 +92,7 @@ func TestGenerateDynamoNIMDeployments(t *testing.T) {
Namespace: "default", Namespace: "default",
}, },
Spec: v1alpha1.DynamoNimDeploymentSpec{ Spec: v1alpha1.DynamoNimDeploymentSpec{
DynamoNim: "dynamonim", DynamoNim: "dynamonim--ac4e234",
DynamoTag: "dynamonim:MyService1", DynamoTag: "dynamonim:MyService1",
ServiceName: "service1", ServiceName: "service1",
Resources: &compounaiCommon.Resources{ Resources: &compounaiCommon.Resources{
...@@ -127,7 +127,7 @@ func TestGenerateDynamoNIMDeployments(t *testing.T) { ...@@ -127,7 +127,7 @@ func TestGenerateDynamoNIMDeployments(t *testing.T) {
Namespace: "default", Namespace: "default",
}, },
Spec: v1alpha1.DynamoNimDeploymentSpec{ Spec: v1alpha1.DynamoNimDeploymentSpec{
DynamoNim: "dynamonim", DynamoNim: "dynamonim--ac4e234",
DynamoTag: "dynamonim:MyService1", DynamoTag: "dynamonim:MyService1",
ServiceName: "service2", ServiceName: "service2",
}, },
...@@ -188,7 +188,7 @@ func TestGenerateDynamoNIMDeployments(t *testing.T) { ...@@ -188,7 +188,7 @@ func TestGenerateDynamoNIMDeployments(t *testing.T) {
Namespace: "default", Namespace: "default",
}, },
Spec: v1alpha1.DynamoNimDeploymentSpec{ Spec: v1alpha1.DynamoNimDeploymentSpec{
DynamoNim: "dynamonim", DynamoNim: "dynamonim--ac4e234",
DynamoTag: "dynamonim:MyService2", DynamoTag: "dynamonim:MyService2",
ServiceName: "service1", ServiceName: "service1",
Resources: &compounaiCommon.Resources{ Resources: &compounaiCommon.Resources{
...@@ -227,7 +227,7 @@ func TestGenerateDynamoNIMDeployments(t *testing.T) { ...@@ -227,7 +227,7 @@ func TestGenerateDynamoNIMDeployments(t *testing.T) {
Namespace: "default", Namespace: "default",
}, },
Spec: v1alpha1.DynamoNimDeploymentSpec{ Spec: v1alpha1.DynamoNimDeploymentSpec{
DynamoNim: "dynamonim", DynamoNim: "dynamonim--ac4e234",
DynamoTag: "dynamonim:MyService2", DynamoTag: "dynamonim:MyService2",
ServiceName: "service2", ServiceName: "service2",
}, },
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment