Unverified Commit f11ea8f7 authored by julienmancuso's avatar julienmancuso Committed by GitHub
Browse files

feat: remove bento/yatai references (#782)

parent ea84ab11
......@@ -48,14 +48,6 @@ Create chart name and version as used by the chart label.
{{ include "dynamo-operator.fullname" . }}-dynamo-env
{{- end }}
{{/*
Generate k8s robot token
*/}}
{{- define "dynamo-operator.yataiApiToken" -}}
{{- $secretObj := (lookup "v1" "Secret" .Release.Namespace (include "dynamo-operator.dynamo.envname" .)) | default dict }}
{{- $secretData := (get $secretObj "data") | default dict }}
{{- (get $secretData "YATAI_API_TOKEN") | default (randAlphaNum 16 | nospace | b64enc) | b64dec }}
{{- end -}}
{{/*
Common labels
......
......@@ -12,7 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
{{- if .Values.dynamo.bentoImageBuildEngine | eq "buildkit" }}
{{- if .Values.dynamo.imageBuildEngine | eq "buildkit" }}
---
apiVersion: apps/v1
kind: StatefulSet
......
......@@ -15,7 +15,7 @@
apiVersion: v1
kind: Secret
metadata:
name: yatai-regcred
name: dynamo-regcred
labels:
{{- include "dynamo-operator.labels" . | nindent 4 }}
type: kubernetes.io/dockerconfigjson
......
......@@ -21,23 +21,17 @@ metadata:
{{- include "dynamo-operator.labels" . | nindent 4 }}
type: Opaque
stringData:
YATAI_ENDPOINT: {{ .Values.dynamo.yatai.endpoint | quote }}
YATAI_CLUSTER_NAME: {{ .Values.dynamo.yatai.clusterName | quote }}
YATAI_SYSTEM_NAMESPACE: {{ default .Release.Namespace .Values.dynamo.yataiSystem.namespace }}
YATAI_DEPLOYMENT_NAMESPACE: {{ .Release.Namespace }}
YATAI_IMAGE_BUILDER_NAMESPACE: {{ .Release.Namespace }}
YATAI_API_TOKEN: {{ include "dynamo-operator.yataiApiToken" . | quote }}
API_STORE_ENDPOINT : {{ .Values.dynamo.apiStore.endpoint | quote }}
API_STORE_CLUSTER_NAME: {{ .Values.dynamo.apiStore.clusterName | quote }}
DYNAMO_SYSTEM_NAMESPACE: {{ .Release.Namespace }}
DYNAMO_DEPLOYMENT_NAMESPACE: {{ .Release.Namespace }}
DYNAMO_IMAGE_BUILDER_NAMESPACE: {{ .Release.Namespace }}
INTERNAL_IMAGES_METRICS_TRANSFORMER: {{ .Values.dynamo.internalImages.metricsTransformer | quote }}
INTERNAL_IMAGES_DEBUGGER: {{ .Values.dynamo.internalImages.debugger | quote }}
INTERNAL_IMAGES_MONITOR_EXPORTER: {{ .Values.dynamo.internalImages.monitorExporter | quote }}
INTERNAL_IMAGES_PROXY: {{ .Values.dynamo.internalImages.proxy | quote }}
{{- if .Values.dynamo.disableAutomateBentoImageBuilder }}
DISABLE_AUTOMATE_BENTO_IMAGE_BUILDER: "true"
{{- end }}
{{- if .Values.dynamo.enableRestrictedSecurityContext }}
ENABLE_RESTRICTED_SECURITY_CONTEXT: "true"
{{- end }}
......@@ -53,17 +47,15 @@ stringData:
DOCKER_REGISTRY_PASSWORD: {{ .password | quote }}
{{- end }}
DOCKER_REGISTRY_SECURE: {{ .Values.dynamo.dockerRegistry.secure | quote }}
DOCKER_REGISTRY_BENTO_REPOSITORY_NAME: {{ .Values.dynamo.dockerRegistry.bentoRepositoryName | quote }}
DOCKER_REGISTRY_DYNAMO_COMPONENTS_REPOSITORY_NAME: {{ .Values.dynamo.dockerRegistry.dynamoComponentsRepositoryName | quote }}
INTERNAL_IMAGES_BENTO_DOWNLOADER: {{ .Values.dynamo.internalImages.bentoDownloader | quote }}
INTERNAL_IMAGES_DYNAMO_COMPONENTS_DOWNLOADER: {{ .Values.dynamo.internalImages.dynamoComponentsDownloader | quote }}
INTERNAL_IMAGES_KANIKO: {{ .Values.dynamo.internalImages.kaniko | quote }}
INTERNAL_IMAGES_BUILDKIT: {{ .Values.dynamo.internalImages.buildkit | quote }}
INTERNAL_IMAGES_BUILDKIT_ROOTLESS: {{ .Values.dynamo.internalImages.buildkitRootless | quote }}
BUILDKIT_URL: tcp://{{ include "dynamo-operator.fullname" . }}-buildkitd:1234
BENTO_IMAGE_BUILD_ENGINE: {{ .Values.dynamo.bentoImageBuildEngine | quote }}
DISABLE_YATAI_COMPONENT_REGISTRATION: {{ .Values.dynamo.disableYataiComponentRegistration | quote }}
DYNAMO_IMAGE_BUILD_ENGINE: {{ .Values.dynamo.imageBuildEngine | quote }}
ADD_NAMESPACE_PREFIX_TO_IMAGE_NAME: {{ .Values.dynamo.addNamespacePrefixToImageName | quote }}
......
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
apiVersion: v1
kind: Secret
metadata:
name: dynamo-deployment-shared-env
namespace: {{ .Release.Namespace }}
labels:
{{- include "dynamo-operator.labels" . | nindent 4 }}
type: Opaque
stringData:
BENTO_DEPLOYMENT_ALL_NAMESPACES: "false"
BENTO_DEPLOYMENT_NAMESPACES: {{ default .Release.Namespace .Values.dynamo.yataiSystem.namespace }}
YATAI_DEPLOYMENT_NAMESPACE: {{ default .Release.Namespace .Values.dynamo.yataiSystem.namespace }}
\ No newline at end of file
......@@ -73,27 +73,21 @@ controllerManager:
annotations: {}
dynamo:
yatai:
apiStore:
endpoint: http://dynamo-server.dynamo-system.svc.cluster.local
clusterName: default
yataiSystem:
# If left blank, will default to the installation namespace
namespace: ""
internalImages:
bentoDownloader: quay.io/bentoml/bento-downloader:0.0.5
kaniko: quay.io/bentoml/kaniko:debug
buildkit: moby/buildkit:latest
buildkitRootless: quay.io/bentoml/buildkit:master-rootless
dynamoComponentsDownloader: quay.io/bentoml/bento-downloader:0.0.5
kaniko: gcr.io/kaniko-project/executor:debug
buildkit: moby/buildkit:v0.20.2
buildkitRootless: moby/buildkit:v0.20.2-rootless
metricsTransformer: quay.io/bentoml/yatai-bento-metrics-transformer:0.0.4
debugger: quay.io/bentoml/bento-debugger:0.0.8
monitorExporter: quay.io/bentoml/bentoml-monitor-exporter:0.0.3
proxy: quay.io/bentoml/bentoml-proxy:0.0.1
disableAutomateBentoImageBuilder: false
enableRestrictedSecurityContext: false
disableYataiComponentRegistration: false
dockerRegistry:
server: 'nvcr.io/nvidian/nim-llm-dev'
......@@ -103,9 +97,9 @@ dynamo:
passwordExistingSecretName: ''
passwordExistingSecretKey: ''
secure: true
bentoRepositoryName: dynamo-pipelines
dynamoComponentsRepositoryName: dynamo-pipelines
bentoImageBuildEngine: buildkit # options: kaniko, buildkit, buildkit-rootless
imageBuildEngine: buildkit # options: kaniko, buildkit, buildkit-rootless
addNamespacePrefixToImageName: false
estargz:
......
......@@ -37,31 +37,27 @@ dynamo-operator:
- --health-probe-bind-address=:8081
- --metrics-bind-address=127.0.0.1:8080
dynamo:
yatai:
apiStore:
endpoint: http://dynamo-store
clusterName: default
yataiSystem:
namespace: ""
internalImages:
bentoDownloader: quay.io/bentoml/bento-downloader:0.0.5
kaniko: quay.io/bentoml/kaniko:debug
buildkit: quay.io/bentoml/buildkit:master
buildkitRootless: quay.io/bentoml/buildkit:master-rootless
dynamoComponentsDownloader: quay.io/bentoml/bento-downloader:0.0.5
kaniko: gcr.io/kaniko-project/executor:debug
buildkit: moby/buildkit:v0.20.2
buildkitRootless: moby/buildkit:v0.20.2-rootless
metricsTransformer: quay.io/bentoml/yatai-bento-metrics-transformer:0.0.4
debugger: quay.io/bentoml/bento-debugger:0.0.8
monitorExporter: quay.io/bentoml/bentoml-monitor-exporter:0.0.3
proxy: quay.io/bentoml/bentoml-proxy:0.0.1
disableAutomateBentoImageBuilder: false
enableRestrictedSecurityContext: false
disableYataiComponentRegistration: false
dockerRegistry:
server: ""
inClusterServer: ""
username: ""
password: ""
secure: true
bentoRepositoryName: dynamo-pipelines
bentoImageBuildEngine: buildkit
dynamoComponentsRepositoryName: dynamo-pipelines
imageBuildEngine: buildkit
addNamespacePrefixToImageName: false
estargz:
enabled: false
......
......@@ -17,7 +17,7 @@
* Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES
*/
package yataiclient
package api_store_client
import (
"context"
......@@ -27,53 +27,27 @@ import (
"github.com/ai-dynamo/dynamo/deploy/dynamo/operator/api/dynamo/schemas"
)
const (
YataiApiTokenHeaderName = "X-YATAI-API-TOKEN"
NgcOrganizationHeaderName = "Nv-Ngc-Org"
NgcUserHeaderName = "Nv-Actor-Id"
)
type DynamoAuthHeaders struct {
OrgId string
UserId string
}
type YataiClient struct {
type ApiStoreClient struct {
endpoint string
apiToken string
headers DynamoAuthHeaders
}
func NewYataiClient(endpoint, apiToken string) *YataiClient {
return &YataiClient{
func NewApiStoreClient(endpoint string) *ApiStoreClient {
return &ApiStoreClient{
endpoint: endpoint,
apiToken: apiToken,
}
}
func (c *YataiClient) SetAuth(headers DynamoAuthHeaders) {
c.headers = headers
}
func (c *YataiClient) getHeaders() map[string]string {
return map[string]string{
YataiApiTokenHeaderName: c.apiToken,
NgcOrganizationHeaderName: c.headers.OrgId,
NgcUserHeaderName: c.headers.UserId,
}
}
func (c *YataiClient) GetBento(ctx context.Context, bentoRepositoryName, bentoVersion string) (bento *schemas.DynamoNIM, err error) {
url_ := urlJoin(c.endpoint, fmt.Sprintf("/api/v1/bento_repositories/%s/bentos/%s", bentoRepositoryName, bentoVersion))
bento = &schemas.DynamoNIM{}
_, err = DoJsonRequest(ctx, "GET", url_, c.getHeaders(), nil, nil, bento, nil)
func (c *ApiStoreClient) GetDynamoComponent(ctx context.Context, name, version string) (component *schemas.DynamoComponent, err error) {
url_ := urlJoin(c.endpoint, fmt.Sprintf("/api/v1/dynamo_nims/%s/versions/%s", name, version))
component = &schemas.DynamoComponent{}
_, err = DoJsonRequest(ctx, "GET", url_, nil, nil, nil, component, nil)
return
}
func (c *YataiClient) PresignBentoDownloadURL(ctx context.Context, bentoRepositoryName, bentoVersion string) (bento *schemas.DynamoNIM, err error) {
url_ := urlJoin(c.endpoint, fmt.Sprintf("/api/v1/dynamo_nims/%s/versions/%s/presign_download_url", bentoRepositoryName, bentoVersion))
bento = &schemas.DynamoNIM{}
_, err = DoJsonRequest(ctx, "PATCH", url_, c.getHeaders(), nil, nil, bento, nil)
func (c *ApiStoreClient) PresignDynamoComponentDownloadURL(ctx context.Context, name, version string) (component *schemas.DynamoComponent, err error) {
url_ := urlJoin(c.endpoint, fmt.Sprintf("/api/v1/dynamo_nims/%s/versions/%s/presign_download_url", name, version))
component = &schemas.DynamoComponent{}
_, err = DoJsonRequest(ctx, "PATCH", url_, nil, nil, nil, component, nil)
return
}
......
......@@ -15,7 +15,7 @@
* limitations under the License.
*/
package yataiclient
package api_store_client
import (
"context"
......
......@@ -23,10 +23,10 @@ import (
"time"
)
type DynamoNIM struct {
PresignedDownloadUrl string `json:"presigned_download_url"`
TransmissionStrategy *TransmissionStrategy `json:"transmission_strategy"`
Manifest *DynamoNIMManifest `json:"manifest"`
type DynamoComponent struct {
PresignedDownloadUrl string `json:"presigned_download_url"`
TransmissionStrategy *TransmissionStrategy `json:"transmission_strategy"`
Manifest *DynamoComponentManifest `json:"manifest"`
}
type TransmissionStrategy string
......@@ -36,7 +36,7 @@ const (
TransmissionStrategyProxy TransmissionStrategy = "proxy"
)
type DynamoNIMManifest struct {
type DynamoComponentManifest struct {
BentomlVersion string `json:"bentoml_version"`
Models []string `json:"models"`
}
......@@ -77,10 +77,8 @@ const (
)
type DockerRegistrySchema struct {
BentosRepositoryURI string `json:"bentosRepositoryURI"`
ModelsRepositoryURI string `json:"modelsRepositoryURI"`
BentosRepositoryURIInCluster string `json:"bentosRepositoryURIInCluster"`
ModelsRepositoryURIInCluster string `json:"modelsRepositoryURIInCluster"`
DynamoRepositoryURI string `json:"dynamoRepositoryURI"`
DynamoRepositoryURIInCluster string `json:"dynamoRepositoryURIInCluster"`
Server string `json:"server"`
Username string `json:"username"`
Password string `json:"password"`
......
......@@ -179,7 +179,7 @@ func main() {
if err = (&controller.DynamoNimDeploymentReconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
Recorder: mgr.GetEventRecorderFor("yatai-deployment"),
Recorder: mgr.GetEventRecorderFor("dynamo-deployment"),
Config: ctrlConfig,
NatsAddr: natsAddr,
EtcdAddr: etcdAddr,
......@@ -192,7 +192,7 @@ func main() {
if err = (&controller.DynamoNimRequestReconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
Recorder: mgr.GetEventRecorderFor("yatai-image-builder"),
Recorder: mgr.GetEventRecorderFor("dynamo-image-builder"),
Config: ctrlConfig,
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "DynamoNimRequest")
......
......@@ -24,43 +24,41 @@ import (
"github.com/ai-dynamo/dynamo/deploy/dynamo/operator/internal/consts"
)
func GetYataiImageBuilderNamespace(ctx context.Context) (namespace string, err error) {
return os.Getenv(consts.EnvYataiImageBuilderNamespace), nil
func GetDynamoImageBuilderNamespace(ctx context.Context) (namespace string, err error) {
return os.Getenv(consts.EnvDynamoImageBuilderNamespace), nil
}
type DockerRegistryConfig struct {
BentoRepositoryName string `yaml:"bento_repository_name"`
ModelRepositoryName string `yaml:"model_repository_name"`
Server string `yaml:"server"`
InClusterServer string `yaml:"in_cluster_server"`
Username string `yaml:"username"`
Password string `yaml:"password"`
Secure bool `yaml:"secure"`
DynamoComponentsRepositoryName string `yaml:"dynamo_components_repository_name"`
Server string `yaml:"server"`
InClusterServer string `yaml:"in_cluster_server"`
Username string `yaml:"username"`
Password string `yaml:"password"`
Secure bool `yaml:"secure"`
}
func GetDockerRegistryConfig() (conf *DockerRegistryConfig, err error) {
return &DockerRegistryConfig{
BentoRepositoryName: os.Getenv(consts.EnvDockerRegistryBentoRepositoryName),
ModelRepositoryName: os.Getenv(consts.EnvDockerRegistryModelRepositoryName),
Server: os.Getenv(consts.EnvDockerRegistryServer),
InClusterServer: os.Getenv(consts.EnvDockerRegistryInClusterServer),
Username: os.Getenv(consts.EnvDockerRegistryUsername),
Password: os.Getenv(consts.EnvDockerRegistryPassword),
Secure: os.Getenv(consts.EnvDockerRegistrySecure) == "true",
DynamoComponentsRepositoryName: os.Getenv(consts.EnvDockerRegistryDynamoComponentsRepositoryName),
Server: os.Getenv(consts.EnvDockerRegistryServer),
InClusterServer: os.Getenv(consts.EnvDockerRegistryInClusterServer),
Username: os.Getenv(consts.EnvDockerRegistryUsername),
Password: os.Getenv(consts.EnvDockerRegistryPassword),
Secure: os.Getenv(consts.EnvDockerRegistrySecure) == "true",
}, nil
}
type YataiConfig struct {
type ApiStoreConfig struct {
Endpoint string `yaml:"endpoint"`
ClusterName string `yaml:"cluster_name"`
ApiToken string `yaml:"api_token"`
}
func GetYataiConfig(ctx context.Context) (conf *YataiConfig, err error) {
return &YataiConfig{
Endpoint: os.Getenv(consts.EnvYataiEndpoint),
ClusterName: os.Getenv(consts.EnvYataiClusterName),
ApiToken: os.Getenv(consts.EnvYataiApiToken),
func GetApiStoreConfig(ctx context.Context) (conf *ApiStoreConfig, err error) {
return &ApiStoreConfig{
Endpoint: os.Getenv(consts.EnvApiStoreEndpoint),
ClusterName: os.Getenv(consts.EnvApiStoreClusterName),
ApiToken: os.Getenv(consts.EnvApiStoreApiToken),
}, nil
}
......@@ -72,16 +70,16 @@ func getEnv(key, fallback string) string {
}
type InternalImages struct {
BentoDownloader string
Kaniko string
MetricsTransformer string
Buildkit string
BuildkitRootless string
DynamoComponentsDownloader string
Kaniko string
MetricsTransformer string
Buildkit string
BuildkitRootless string
}
func GetInternalImages() (conf *InternalImages) {
conf = &InternalImages{}
conf.BentoDownloader = getEnv(consts.EnvInternalImagesBentoDownloader, consts.InternalImagesBentoDownloaderDefault)
conf.DynamoComponentsDownloader = getEnv(consts.EnvInternalImagesDynamoComponentsDownloader, consts.InternalImagesDynamoComponentsDownloaderDefault)
conf.Kaniko = getEnv(consts.EnvInternalImagesKaniko, consts.InternalImagesKanikoDefault)
conf.MetricsTransformer = getEnv(consts.EnvInternalImagesMetricsTransformer, consts.InternalImagesMetricsTransformerDefault)
conf.Buildkit = getEnv(consts.EnvInternalImagesBuildkit, consts.InternalImagesBuildkitDefault)
......
......@@ -3,96 +3,81 @@ package consts
const (
HPACPUDefaultAverageUtilization = 80
// nolint: gosec
YataiApiTokenHeaderName = "X-YATAI-API-TOKEN"
NgcOrganizationHeaderName = "Nv-Ngc-Org"
NgcUserHeaderName = "Nv-Actor-Id"
DefaultUserId = "default"
DefaultOrgId = "default"
BentoServicePort = 3000
BentoServicePortName = "http"
BentoContainerPortName = "http"
DynamoServicePort = 3000
DynamoServicePortName = "http"
DynamoContainerPortName = "http"
YataiImageBuilderComponentName = "yatai-image-builder"
YataiDeploymentComponentName = "yatai-deployment"
DynamoImageBuilderComponentName = "dynamo-image-builder"
YataiBentoDeploymentComponentApiServer = "api-server"
DynamoDeploymentComponentApiServer = "api-server"
InternalImagesBentoDownloaderDefault = "quay.io/bentoml/bento-downloader:0.0.3"
InternalImagesKanikoDefault = "quay.io/bentoml/kaniko:1.9.1"
InternalImagesMetricsTransformerDefault = "quay.io/bentoml/yatai-bento-metrics-transformer:0.0.3"
InternalImagesBuildkitDefault = "quay.io/bentoml/buildkit:master"
InternalImagesBuildkitRootlessDefault = "quay.io/bentoml/buildkit:master-rootless"
InternalImagesDynamoComponentsDownloaderDefault = "quay.io/bentoml/bento-downloader:0.0.3"
InternalImagesKanikoDefault = "gcr.io/kaniko-project/executor:debug"
InternalImagesMetricsTransformerDefault = "quay.io/bentoml/yatai-bento-metrics-transformer:0.0.3"
InternalImagesBuildkitDefault = "moby/buildkit:v0.20.2"
InternalImagesBuildkitRootlessDefault = "moby/buildkit:v0.20.2-rootless"
EnvYataiEndpoint = "YATAI_ENDPOINT"
EnvYataiClusterName = "YATAI_CLUSTER_NAME"
EnvApiStoreEndpoint = "API_STORE_ENDPOINT"
EnvApiStoreClusterName = "API_STORE_CLUSTER_NAME"
// nolint: gosec
EnvYataiApiToken = "YATAI_API_TOKEN"
EnvApiStoreApiToken = "API_STORE_API_TOKEN"
EnvBentoServicePort = "PORT"
EnvDynamoServicePort = "PORT"
// tracking envars
EnvYataiDeploymentUID = "YATAI_T_DEPLOYMENT_UID"
EnvDynamoDeploymentUID = "DYNAMO_DEPLOYMENT_UID"
EnvYataiBentoDeploymentName = "YATAI_BENTO_DEPLOYMENT_NAME"
EnvYataiBentoDeploymentNamespace = "YATAI_BENTO_DEPLOYMENT_NAMESPACE"
EnvDynamoDeploymentName = "DYNAMO_DEPLOYMENT_NAME"
EnvDynamoDeploymentNamespace = "DYNAMO_DEPLOYMENT_NAMESPACE"
EnvDockerRegistryServer = "DOCKER_REGISTRY_SERVER"
EnvDockerRegistryInClusterServer = "DOCKER_REGISTRY_IN_CLUSTER_SERVER"
EnvDockerRegistryUsername = "DOCKER_REGISTRY_USERNAME"
// nolint:gosec
EnvDockerRegistryPassword = "DOCKER_REGISTRY_PASSWORD"
EnvDockerRegistrySecure = "DOCKER_REGISTRY_SECURE"
EnvDockerRegistryBentoRepositoryName = "DOCKER_REGISTRY_BENTO_REPOSITORY_NAME"
EnvDockerRegistryModelRepositoryName = "DOCKER_REGISTRY_MODEL_REPOSITORY_NAME"
EnvInternalImagesBentoDownloader = "INTERNAL_IMAGES_BENTO_DOWNLOADER"
EnvInternalImagesKaniko = "INTERNAL_IMAGES_KANIKO"
EnvInternalImagesMetricsTransformer = "INTERNAL_IMAGES_METRICS_TRANSFORMER"
EnvInternalImagesBuildkit = "INTERNAL_IMAGES_BUILDKIT"
EnvInternalImagesBuildkitRootless = "INTERNAL_IMAGES_BUILDKIT_ROOTLESS"
EnvYataiSystemNamespace = "YATAI_SYSTEM_NAMESPACE"
EnvYataiImageBuilderNamespace = "YATAI_IMAGE_BUILDER_NAMESPACE"
EnvYataiDeploymentNamespace = "YATAI_DEPLOYMENT_NAMESPACE"
EnvBentoDeploymentNamespaces = "BENTO_DEPLOYMENT_NAMESPACES"
EnvImageBuildersNamespace = "IMAGE_BUILDERS_NAMESPACE"
KubeLabelYataiSelector = "yatai.ai/selector"
KubeLabelYataiBentoRepository = "yatai.ai/bento-repository"
KubeLabelYataiBento = "yatai.ai/bento"
KubeLabelYataiModelRepository = "yatai.ai/model-repository"
KubeLabelYataiModel = "yatai.ai/model"
KubeLabelYataiBentoDeployment = "yatai.ai/bento-deployment"
KubeLabelYataiBentoDeploymentComponentType = "yatai.ai/bento-deployment-component-type"
KubeLabelYataiBentoDeploymentTargetType = "yatai.ai/bento-deployment-target-type"
KubeLabelBentoRepository = "yatai.ai/bento-repository"
KubeLabelBentoVersion = "yatai.ai/bento-version"
KubeLabelCreator = "yatai.ai/creator"
KubeLabelIsBentoImageBuilder = "yatai.ai/is-bento-image-builder"
KubeLabelIsModelSeeder = "yatai.ai/is-model-seeder"
KubeLabelBentoRequest = "yatai.ai/bento-request"
EnvDockerRegistryPassword = "DOCKER_REGISTRY_PASSWORD"
EnvDockerRegistrySecure = "DOCKER_REGISTRY_SECURE"
EnvDockerRegistryDynamoComponentsRepositoryName = "DOCKER_REGISTRY_DYNAMO_COMPONENTS_REPOSITORY_NAME"
EnvInternalImagesDynamoComponentsDownloader = "INTERNAL_IMAGES_DYNAMO_COMPONENTS_DOWNLOADER"
EnvInternalImagesKaniko = "INTERNAL_IMAGES_KANIKO"
EnvInternalImagesMetricsTransformer = "INTERNAL_IMAGES_METRICS_TRANSFORMER"
EnvInternalImagesBuildkit = "INTERNAL_IMAGES_BUILDKIT"
EnvInternalImagesBuildkitRootless = "INTERNAL_IMAGES_BUILDKIT_ROOTLESS"
EnvDynamoSystemNamespace = "DYNAMO_SYSTEM_NAMESPACE"
EnvDynamoImageBuilderNamespace = "DYNAMO_IMAGE_BUILDER_NAMESPACE"
KubeLabelDynamoSelector = "nvidia.com/selector"
KubeLabelDynamoRepository = "nvidia.com/dynamo-repository"
KubeLabelDynamoVersion = "nvidia.com/dynamo-version"
KubeLabelDynamoDeployment = "nvidia.com/dynamo-deployment"
KubeLabelDynamoDeploymentComponentType = "nvidia.com/dynamo-deployment-component-type"
KubeLabelDynamoDeploymentTargetType = "nvidia.com/dynamo-deployment-target-type"
KubeLabelDynamoCreator = "nvidia.com/dynamo-creator"
KubeLabelIsDynamoImageBuilder = "nvidia.com/is-dynamo-image-builder"
KubeLabelDynamoRequest = "nvidia.com/dynamo-request"
KubeLabelValueFalse = "false"
KubeLabelValueTrue = "true"
KubeLabelYataiImageBuilderPod = "yatai.ai/yatai-image-builder-pod"
KubeLabelBentoDeploymentPod = "yatai.ai/bento-deployment-pod"
KubeLabelDynamoImageBuilderPod = "nvidia.com/dynamo-image-builder-pod"
KubeLabelDynamoDeploymentPod = "nvidia.com/dynamo-deployment-pod"
KubeAnnotationBentoRepository = "yatai.ai/bento-repository"
KubeAnnotationBentoVersion = "yatai.ai/bento-version"
KubeAnnotationDockerRegistryInsecure = "yatai.ai/docker-registry-insecure"
KubeAnnotationYataiImageBuilderSeparateModels = "yatai.ai/yatai-image-builder-separate-models"
KubeAnnotationIsMultiTenancy = "yatai.ai/is-multi-tenancy"
KubeAnnotationDynamoRepository = "nvidia.com/dynamo-repository"
KubeAnnotationDynamoVersion = "nvidia.com/dynamo-version"
KubeAnnotationDynamoDockerRegistryInsecure = "nvidia.com/docker-registry-insecure"
KubeResourceGPUNvidia = "nvidia.com/gpu"
// nolint: gosec
KubeSecretNameRegcred = "yatai-regcred"
KubeSecretNameRegcred = "dynamo-regcred"
KubeAnnotationDynamoNimRequestHash = "nvidia.com/dynamo-request-hash"
KubeAnnotationDynamoNimRequestImageBuiderHash = "nvidia.com/dynamo-request-image-builder-hash"
KubeAnnotationDynamoNimStorageNS = "nvidia.com/dynamo-storage-namespace"
)
......@@ -37,7 +37,7 @@ import (
nvidiacomv1alpha1 "github.com/ai-dynamo/dynamo/deploy/dynamo/operator/api/v1alpha1"
commonController "github.com/ai-dynamo/dynamo/deploy/dynamo/operator/internal/controller_common"
"github.com/ai-dynamo/dynamo/deploy/dynamo/operator/internal/nim"
"github.com/ai-dynamo/dynamo/deploy/dynamo/operator/internal/dynamo"
)
const (
......@@ -121,26 +121,26 @@ func (r *DynamoDeploymentReconciler) Reconcile(ctx context.Context, req ctrl.Req
return ctrl.Result{}, nil
}
// fetch the DynamoNIMConfig
dynamoNIMConfig, err := nim.GetDynamoNIMConfig(ctx, dynamoDeployment, r.Recorder)
// fetch the dynamoGraphConfig
dynamoGraphConfig, err := dynamo.GetDynamoGraphConfig(ctx, dynamoDeployment, r.Recorder)
if err != nil {
reason = "failed_to_get_the_DynamoNIMConfig"
reason = "failed_to_get_the_DynamoGraphConfig"
return ctrl.Result{}, err
}
// generate the DynamoNimDeployments from the config
dynamoNimDeployments, err := nim.GenerateDynamoNIMDeployments(ctx, dynamoDeployment, dynamoNIMConfig, r.generateDefaultIngressSpec(dynamoDeployment))
// generate the dynamoComponentsDeployments from the config
dynamoComponentsDeployments, err := dynamo.GenerateDynamoComponentsDeployments(ctx, dynamoDeployment, dynamoGraphConfig, r.generateDefaultIngressSpec(dynamoDeployment))
if err != nil {
reason = "failed_to_generate_the_DynamoNimDeployments"
reason = "failed_to_generate_the_DynamoComponentsDeployments"
return ctrl.Result{}, err
}
// merge the DynamoNimDeployments with the DynamoNimDeployments from the CRD
for serviceName, deployment := range dynamoNimDeployments {
// merge the dynamoComponentsDeployments with the dynamoComponentsDeployments from the CRD
for serviceName, deployment := range dynamoComponentsDeployments {
if _, ok := dynamoDeployment.Spec.Services[serviceName]; ok {
err := mergo.Merge(&deployment.Spec.DynamoNimDeploymentSharedSpec, dynamoDeployment.Spec.Services[serviceName].DynamoNimDeploymentSharedSpec, mergo.WithOverride)
if err != nil {
reason = "failed_to_merge_the_DynamoNimDeployments"
reason = "failed_to_merge_the_DynamoComponentsDeployments"
return ctrl.Result{}, err
}
}
......@@ -149,8 +149,8 @@ func (r *DynamoDeploymentReconciler) Reconcile(ctx context.Context, req ctrl.Req
}
}
// Set common env vars on each of the dynamoNimDeployments
for _, deployment := range dynamoNimDeployments {
// Set common env vars on each of the dynamoComponentsDeployments
for _, deployment := range dynamoComponentsDeployments {
if len(dynamoDeployment.Spec.Envs) > 0 {
deployment.Spec.Envs = mergeEnvs(dynamoDeployment.Spec.Envs, deployment.Spec.Envs)
}
......@@ -177,20 +177,20 @@ func (r *DynamoDeploymentReconciler) Reconcile(ctx context.Context, req ctrl.Req
}
notReadyDeployments := []string{}
// reconcile the DynamoNimDeployments
for serviceName, dynamoNimDeployment := range dynamoNimDeployments {
logger.Info("Reconciling the DynamoNimDeployment", "serviceName", serviceName, "dynamoNimDeployment", dynamoNimDeployment)
if err := ctrl.SetControllerReference(dynamoDeployment, dynamoNimDeployment, r.Scheme); err != nil {
reason = "failed_to_set_the_controller_reference_for_the_DynamoNimDeployment"
// reconcile the dynamoComponentsDeployments
for serviceName, dynamoComponentDeployment := range dynamoComponentsDeployments {
logger.Info("Reconciling the DynamoNimDeployment", "serviceName", serviceName, "dynamoComponentDeployment", dynamoComponentDeployment)
if err := ctrl.SetControllerReference(dynamoDeployment, dynamoComponentDeployment, r.Scheme); err != nil {
reason = "failed_to_set_the_controller_reference_for_the_DynamoComponentDeployment"
return ctrl.Result{}, err
}
dynamoNimDeployment, err = commonController.SyncResource(ctx, r.Client, dynamoNimDeployment, types.NamespacedName{Name: dynamoNimDeployment.Name, Namespace: dynamoNimDeployment.Namespace}, false)
dynamoComponentDeployment, err = commonController.SyncResource(ctx, r.Client, dynamoComponentDeployment, types.NamespacedName{Name: dynamoComponentDeployment.Name, Namespace: dynamoComponentDeployment.Namespace}, false)
if err != nil {
reason = "failed_to_sync_the_DynamoNimDeployment"
return ctrl.Result{}, err
}
if !dynamoNimDeployment.Status.IsReady() {
notReadyDeployments = append(notReadyDeployments, dynamoNimDeployment.Name)
if !dynamoComponentDeployment.Status.IsReady() {
notReadyDeployments = append(notReadyDeployments, dynamoComponentDeployment.Name)
}
}
if len(notReadyDeployments) == 0 {
......
......@@ -69,26 +69,26 @@ import (
)
const (
DefaultClusterName = "default"
DefaultServiceAccountName = "default"
KubeValueNameSharedMemory = "shared-memory"
KubeAnnotationDeploymentStrategy = "yatai.ai/deployment-strategy"
KubeAnnotationYataiEnableStealingTrafficDebugMode = "yatai.ai/enable-stealing-traffic-debug-mode"
KubeAnnotationYataiEnableDebugMode = "yatai.ai/enable-debug-mode"
KubeAnnotationYataiEnableDebugPodReceiveProductionTraffic = "yatai.ai/enable-debug-pod-receive-production-traffic"
KubeAnnotationYataiProxySidecarResourcesLimitsCPU = "yatai.ai/proxy-sidecar-resources-limits-cpu"
KubeAnnotationYataiProxySidecarResourcesLimitsMemory = "yatai.ai/proxy-sidecar-resources-limits-memory"
KubeAnnotationYataiProxySidecarResourcesRequestsCPU = "yatai.ai/proxy-sidecar-resources-requests-cpu"
KubeAnnotationYataiProxySidecarResourcesRequestsMemory = "yatai.ai/proxy-sidecar-resources-requests-memory"
DeploymentTargetTypeProduction = "production"
DeploymentTargetTypeDebug = "debug"
ContainerPortNameHTTPProxy = "http-proxy"
ServicePortNameHTTPNonProxy = "http-non-proxy"
HeaderNameDebug = "X-Yatai-Debug"
DefaultIngressSuffix = "local"
DefaultClusterName = "default"
DefaultServiceAccountName = "default"
KubeValueNameSharedMemory = "shared-memory"
KubeAnnotationDeploymentStrategy = "nvidia.com/deployment-strategy"
KubeAnnotationEnableStealingTrafficDebugMode = "nvidia.com/enable-stealing-traffic-debug-mode"
KubeAnnotationEnableDebugMode = "nvidia.com/enable-debug-mode"
KubeAnnotationEnableDebugPodReceiveProductionTraffic = "nvidia.com/enable-debug-pod-receive-production-traffic"
KubeAnnotationProxySidecarResourcesLimitsCPU = "nvidia.com/proxy-sidecar-resources-limits-cpu"
KubeAnnotationProxySidecarResourcesLimitsMemory = "nvidia.com/proxy-sidecar-resources-limits-memory"
KubeAnnotationProxySidecarResourcesRequestsCPU = "nvidia.com/proxy-sidecar-resources-requests-cpu"
KubeAnnotationProxySidecarResourcesRequestsMemory = "nvidia.com/proxy-sidecar-resources-requests-memory"
DeploymentTargetTypeProduction = "production"
DeploymentTargetTypeDebug = "debug"
ContainerPortNameHTTPProxy = "http-proxy"
ServicePortNameHTTPNonProxy = "http-non-proxy"
HeaderNameDebug = "X-Nvidia-Debug"
DefaultIngressSuffix = "local"
)
var ServicePortHTTPNonProxy = commonconsts.BentoServicePort + 1
var ServicePortHTTPNonProxy = commonconsts.DynamoServicePort + 1
// DynamoNimDeploymentReconciler reconciles a DynamoNimDeployment object
type DynamoNimDeploymentReconciler struct {
......@@ -394,7 +394,7 @@ func (r *DynamoNimDeploymentReconciler) Reconcile(ctx context.Context, req ctrl.
}
if !modified {
r.Recorder.Eventf(dynamoNimDeployment, corev1.EventTypeNormal, "UpdateYataiDeployment", "No changes to yatai deployment %s", dynamoNimDeployment.Name)
r.Recorder.Eventf(dynamoNimDeployment, corev1.EventTypeNormal, "UpdateDynamoDeployment", "No changes to dynamo deployment %s", dynamoNimDeployment.Name)
}
logs.Info("Finished reconciling.")
......@@ -723,7 +723,7 @@ func checkIfIsDebugModeEnabled(annotations map[string]string) bool {
return false
}
return annotations[KubeAnnotationYataiEnableDebugMode] == commonconsts.KubeLabelValueTrue
return annotations[KubeAnnotationEnableDebugMode] == commonconsts.KubeLabelValueTrue
}
func checkIfIsStealingTrafficDebugModeEnabled(annotations map[string]string) bool {
......@@ -731,7 +731,7 @@ func checkIfIsStealingTrafficDebugModeEnabled(annotations map[string]string) boo
return false
}
return annotations[KubeAnnotationYataiEnableStealingTrafficDebugMode] == commonconsts.KubeLabelValueTrue
return annotations[KubeAnnotationEnableStealingTrafficDebugMode] == commonconsts.KubeLabelValueTrue
}
func checkIfIsDebugPodReceiveProductionTrafficEnabled(annotations map[string]string) bool {
......@@ -739,7 +739,7 @@ func checkIfIsDebugPodReceiveProductionTrafficEnabled(annotations map[string]str
return false
}
return annotations[KubeAnnotationYataiEnableDebugPodReceiveProductionTraffic] == commonconsts.KubeLabelValueTrue
return annotations[KubeAnnotationEnableDebugPodReceiveProductionTraffic] == commonconsts.KubeLabelValueTrue
}
func checkIfContainsStealingTrafficDebugModeEnabled(dynamoNimDeployment *v1alpha1.DynamoNimDeployment) bool {
......@@ -933,21 +933,21 @@ func (r *DynamoNimDeploymentReconciler) getGenericServiceName(dynamoNimDeploymen
func (r *DynamoNimDeploymentReconciler) getKubeLabels(dynamoNimDeployment *v1alpha1.DynamoNimDeployment, dynamoNim *v1alpha1.DynamoNim) map[string]string {
dynamoNimRepositoryName, _, dynamoNimVersion := xstrings.Partition(dynamoNim.Spec.Tag, ":")
labels := map[string]string{
commonconsts.KubeLabelYataiBentoDeployment: dynamoNimDeployment.Name,
commonconsts.KubeLabelBentoRepository: dynamoNimRepositoryName,
commonconsts.KubeLabelBentoVersion: dynamoNimVersion,
commonconsts.KubeLabelYataiBentoDeploymentTargetType: DeploymentTargetTypeProduction,
commonconsts.KubeLabelCreator: "yatai-deployment",
commonconsts.KubeLabelDynamoDeployment: dynamoNimDeployment.Name,
commonconsts.KubeLabelDynamoRepository: dynamoNimRepositoryName,
commonconsts.KubeLabelDynamoVersion: dynamoNimVersion,
commonconsts.KubeLabelDynamoDeploymentTargetType: DeploymentTargetTypeProduction,
commonconsts.KubeLabelDynamoCreator: "dynamo-deployment",
}
labels[commonconsts.KubeLabelYataiBentoDeploymentComponentType] = commonconsts.YataiBentoDeploymentComponentApiServer
labels[commonconsts.KubeLabelDynamoDeploymentComponentType] = commonconsts.DynamoDeploymentComponentApiServer
return labels
}
func (r *DynamoNimDeploymentReconciler) getKubeAnnotations(dynamoNimDeployment *v1alpha1.DynamoNimDeployment, dynamoNim *v1alpha1.DynamoNim) map[string]string {
dynamoNimRepositoryName, dynamoNimVersion := getDynamoNimRepositoryNameAndDynamoNimVersion(dynamoNim)
annotations := map[string]string{
commonconsts.KubeAnnotationBentoRepository: dynamoNimRepositoryName,
commonconsts.KubeAnnotationBentoVersion: dynamoNimVersion,
commonconsts.KubeAnnotationDynamoRepository: dynamoNimRepositoryName,
commonconsts.KubeAnnotationDynamoVersion: dynamoNimVersion,
}
var extraAnnotations map[string]string
if dynamoNimDeployment.Spec.ExtraPodMetadata != nil {
......@@ -1048,7 +1048,7 @@ func (r *DynamoNimDeploymentReconciler) generateDeployment(ctx context.Context,
Replicas: replicas,
Selector: &metav1.LabelSelector{
MatchLabels: map[string]string{
commonconsts.KubeLabelYataiSelector: kubeName,
commonconsts.KubeLabelDynamoSelector: kubeName,
},
},
Template: *podTemplateSpec,
......@@ -1135,21 +1135,17 @@ func (r *DynamoNimDeploymentReconciler) generatePodTemplateSpec(ctx context.Cont
dynamoNimRepositoryName, _ := getDynamoNimRepositoryNameAndDynamoNimVersion(opt.dynamoNim)
podLabels := r.getKubeLabels(opt.dynamoNimDeployment, opt.dynamoNim)
if opt.isStealingTrafficDebugModeEnabled {
podLabels[commonconsts.KubeLabelYataiBentoDeploymentTargetType] = DeploymentTargetTypeDebug
podLabels[commonconsts.KubeLabelDynamoDeploymentTargetType] = DeploymentTargetTypeDebug
}
podAnnotations := r.getKubeAnnotations(opt.dynamoNimDeployment, opt.dynamoNim)
kubeName := r.getKubeName(opt.dynamoNimDeployment, opt.dynamoNim, opt.isStealingTrafficDebugModeEnabled)
containerPort := commonconsts.BentoServicePort
containerPort := commonconsts.DynamoServicePort
lastPort := containerPort + 1
monitorExporter := opt.dynamoNimDeployment.Spec.MonitorExporter
needMonitorContainer := monitorExporter != nil && monitorExporter.Enabled
lastPort++
monitorExporterPort := lastPort
var envs []corev1.EnvVar
envsSeen := make(map[string]struct{})
......@@ -1170,7 +1166,7 @@ func (r *DynamoNimDeploymentReconciler) generatePodTemplateSpec(ctx context.Cont
if _, ok := envsSeen[env.Name]; ok {
continue
}
if env.Name == commonconsts.EnvBentoServicePort {
if env.Name == commonconsts.EnvDynamoServicePort {
// nolint: gosec
containerPort, err = strconv.Atoi(env.Value)
if err != nil {
......@@ -1187,19 +1183,19 @@ func (r *DynamoNimDeploymentReconciler) generatePodTemplateSpec(ctx context.Cont
defaultEnvs := []corev1.EnvVar{
{
Name: commonconsts.EnvBentoServicePort,
Name: commonconsts.EnvDynamoServicePort,
Value: fmt.Sprintf("%d", containerPort),
},
{
Name: commonconsts.EnvYataiDeploymentUID,
Name: commonconsts.EnvDynamoDeploymentUID,
Value: string(opt.dynamoNimDeployment.UID),
},
{
Name: commonconsts.EnvYataiBentoDeploymentName,
Name: commonconsts.EnvDynamoDeploymentName,
Value: opt.dynamoNimDeployment.Name,
},
{
Name: commonconsts.EnvYataiBentoDeploymentNamespace,
Name: commonconsts.EnvDynamoDeploymentNamespace,
Value: opt.dynamoNimDeployment.Namespace,
},
}
......@@ -1224,44 +1220,6 @@ func (r *DynamoNimDeploymentReconciler) generatePodTemplateSpec(ctx context.Cont
}
}
if needMonitorContainer {
monitoringConfigTemplate := `monitoring.enabled=true
monitoring.type=otlp
monitoring.options.endpoint=http://127.0.0.1:%d
monitoring.options.insecure=true`
var bentomlOptions string
index := -1
for i, env := range envs {
if env.Name == "BENTOML_CONFIG_OPTIONS" {
bentomlOptions = env.Value
index = i
break
}
}
if index == -1 {
// BENOML_CONFIG_OPTIONS not defined
bentomlOptions = fmt.Sprintf(monitoringConfigTemplate, monitorExporterPort)
envs = append(envs, corev1.EnvVar{
Name: "BENTOML_CONFIG_OPTIONS",
Value: bentomlOptions,
})
} else if !strings.Contains(bentomlOptions, "monitoring") {
// monitoring config not defined
envs = append(envs[:index], envs[index+1:]...)
bentomlOptions = strings.TrimSpace(bentomlOptions) // ' ' -> ''
if bentomlOptions != "" {
bentomlOptions += "\n"
}
bentomlOptions += fmt.Sprintf(monitoringConfigTemplate, monitorExporterPort)
envs = append(envs, corev1.EnvVar{
Name: "BENTOML_CONFIG_OPTIONS",
Value: bentomlOptions,
})
}
// monitoring config already defined
// do nothing
}
var livenessProbe *corev1.Probe
if opt.dynamoNimDeployment.Spec.LivenessProbe != nil {
livenessProbe = opt.dynamoNimDeployment.Spec.LivenessProbe
......@@ -1322,9 +1280,9 @@ monitoring.options.insecure=true`
}
}
yataiResources := opt.dynamoNimDeployment.Spec.Resources
dynamoResources := opt.dynamoNimDeployment.Spec.Resources
resources, err := getResourcesConfig(yataiResources)
resources, err := getResourcesConfig(dynamoResources)
if err != nil {
err = errors.Wrap(err, "failed to get resources config")
return nil, err
......@@ -1405,7 +1363,7 @@ monitoring.options.insecure=true`
Ports: []corev1.ContainerPort{
{
Protocol: corev1.ProtocolTCP,
Name: commonconsts.BentoContainerPortName,
Name: commonconsts.DynamoContainerPortName,
ContainerPort: int32(containerPort), // nolint: gosec
},
},
......@@ -1424,14 +1382,14 @@ monitoring.options.insecure=true`
}
}
if resourceAnnotations["yatai.ai/enable-container-privileged"] == commonconsts.KubeLabelValueTrue {
if resourceAnnotations["nvidia.com/enable-container-privileged"] == commonconsts.KubeLabelValueTrue {
if container.SecurityContext == nil {
container.SecurityContext = &corev1.SecurityContext{}
}
container.SecurityContext.Privileged = &[]bool{true}[0]
}
if resourceAnnotations["yatai.ai/enable-container-ptrace"] == commonconsts.KubeLabelValueTrue {
if resourceAnnotations["nvidia.com/enable-container-ptrace"] == commonconsts.KubeLabelValueTrue {
if container.SecurityContext == nil {
container.SecurityContext = &corev1.SecurityContext{}
}
......@@ -1440,7 +1398,7 @@ monitoring.options.insecure=true`
}
}
if resourceAnnotations["yatai.ai/run-container-as-root"] == commonconsts.KubeLabelValueTrue {
if resourceAnnotations["nvidia.com/run-container-as-root"] == commonconsts.KubeLabelValueTrue {
if container.SecurityContext == nil {
container.SecurityContext = &corev1.SecurityContext{}
}
......@@ -1522,7 +1480,7 @@ monitoring.options.insecure=true`
lastPort++
proxyPort := lastPort
proxyResourcesRequestsCPUStr := resourceAnnotations[KubeAnnotationYataiProxySidecarResourcesRequestsCPU]
proxyResourcesRequestsCPUStr := resourceAnnotations[KubeAnnotationProxySidecarResourcesRequestsCPU]
if proxyResourcesRequestsCPUStr == "" {
proxyResourcesRequestsCPUStr = "100m"
}
......@@ -1532,7 +1490,7 @@ monitoring.options.insecure=true`
err = errors.Wrapf(err, "failed to parse proxy sidecar resources requests cpu: %s", proxyResourcesRequestsCPUStr)
return nil, err
}
proxyResourcesRequestsMemoryStr := resourceAnnotations[KubeAnnotationYataiProxySidecarResourcesRequestsMemory]
proxyResourcesRequestsMemoryStr := resourceAnnotations[KubeAnnotationProxySidecarResourcesRequestsMemory]
if proxyResourcesRequestsMemoryStr == "" {
proxyResourcesRequestsMemoryStr = "200Mi"
}
......@@ -1542,7 +1500,7 @@ monitoring.options.insecure=true`
err = errors.Wrapf(err, "failed to parse proxy sidecar resources requests memory: %s", proxyResourcesRequestsMemoryStr)
return nil, err
}
proxyResourcesLimitsCPUStr := resourceAnnotations[KubeAnnotationYataiProxySidecarResourcesLimitsCPU]
proxyResourcesLimitsCPUStr := resourceAnnotations[KubeAnnotationProxySidecarResourcesLimitsCPU]
if proxyResourcesLimitsCPUStr == "" {
proxyResourcesLimitsCPUStr = "300m"
}
......@@ -1552,7 +1510,7 @@ monitoring.options.insecure=true`
err = errors.Wrapf(err, "failed to parse proxy sidecar resources limits cpu: %s", proxyResourcesLimitsCPUStr)
return nil, err
}
proxyResourcesLimitsMemoryStr := resourceAnnotations[KubeAnnotationYataiProxySidecarResourcesLimitsMemory]
proxyResourcesLimitsMemoryStr := resourceAnnotations[KubeAnnotationProxySidecarResourcesLimitsMemory]
if proxyResourcesLimitsMemoryStr == "" {
proxyResourcesLimitsMemoryStr = "1000Mi"
}
......@@ -1690,106 +1648,6 @@ monitoring.options.insecure=true`
SecurityContext: securityContext,
})
if needMonitorContainer {
lastPort++
monitorExporterProbePort := lastPort
monitorExporterImage := "quay.io/bentoml/bentoml-monitor-exporter:0.0.3"
monitorExporterImage_ := os.Getenv("INTERNAL_IMAGES_MONITOR_EXPORTER")
if monitorExporterImage_ != "" {
monitorExporterImage = monitorExporterImage_
}
monitorOptEnvs := make([]corev1.EnvVar, 0, len(monitorExporter.Options)+len(monitorExporter.StructureOptions))
monitorOptEnvsSeen := make(map[string]struct{})
for _, env := range monitorExporter.StructureOptions {
monitorOptEnvsSeen[strings.ToLower(env.Name)] = struct{}{}
monitorOptEnvs = append(monitorOptEnvs, corev1.EnvVar{
Name: "FLUENTBIT_OUTPUT_OPTION_" + strings.ToUpper(env.Name),
Value: env.Value,
ValueFrom: env.ValueFrom,
})
}
for k, v := range monitorExporter.Options {
if _, exists := monitorOptEnvsSeen[strings.ToLower(k)]; exists {
continue
}
monitorOptEnvs = append(monitorOptEnvs, corev1.EnvVar{
Name: "FLUENTBIT_OUTPUT_OPTION_" + strings.ToUpper(k),
Value: v,
})
}
monitorVolumeMounts := make([]corev1.VolumeMount, 0, len(monitorExporter.Mounts))
for idx, mount := range monitorExporter.Mounts {
volumeName := fmt.Sprintf("monitor-exporter-%d", idx)
volumes = append(volumes, corev1.Volume{
Name: volumeName,
VolumeSource: mount.VolumeSource,
})
monitorVolumeMounts = append(monitorVolumeMounts, corev1.VolumeMount{
Name: volumeName,
MountPath: mount.Path,
ReadOnly: mount.ReadOnly,
})
}
containers = append(containers, corev1.Container{
Name: "monitor-exporter",
Image: monitorExporterImage,
VolumeMounts: monitorVolumeMounts,
Env: append([]corev1.EnvVar{
{
Name: "FLUENTBIT_OTLP_PORT",
Value: fmt.Sprint(monitorExporterPort),
},
{
Name: "FLUENTBIT_HTTP_PORT",
Value: fmt.Sprint(monitorExporterProbePort),
},
{
Name: "FLUENTBIT_OUTPUT",
Value: monitorExporter.Output,
},
}, monitorOptEnvs...),
Resources: corev1.ResourceRequirements{
Requests: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("100m"),
corev1.ResourceMemory: resource.MustParse("24Mi"),
},
Limits: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("1000m"),
corev1.ResourceMemory: resource.MustParse("72Mi"),
},
},
ReadinessProbe: &corev1.Probe{
InitialDelaySeconds: 5,
TimeoutSeconds: 5,
FailureThreshold: 10,
ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{
Path: "/readyz",
Port: intstr.FromInt(monitorExporterProbePort),
},
},
},
LivenessProbe: &corev1.Probe{
InitialDelaySeconds: 5,
TimeoutSeconds: 5,
FailureThreshold: 10,
ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{
Path: "/livez",
Port: intstr.FromInt(monitorExporterProbePort),
},
},
},
SecurityContext: securityContext,
})
}
debuggerImage := "quay.io/bentoml/bento-debugger:0.0.8"
debuggerImage_ := os.Getenv("INTERNAL_IMAGES_DEBUGGER")
if debuggerImage_ != "" {
......@@ -1824,7 +1682,7 @@ monitoring.options.insecure=true`
})
}
podLabels[commonconsts.KubeLabelYataiSelector] = kubeName
podLabels[commonconsts.KubeLabelDynamoSelector] = kubeName
podSpec := corev1.PodSpec{
Containers: containers,
......@@ -1860,7 +1718,7 @@ monitoring.options.insecure=true`
if podSpec.ServiceAccountName == "" {
serviceAccounts := &corev1.ServiceAccountList{}
err = r.List(ctx, serviceAccounts, client.InNamespace(opt.dynamoNimDeployment.Namespace), client.MatchingLabels{
commonconsts.KubeLabelBentoDeploymentPod: commonconsts.KubeLabelValueTrue,
commonconsts.KubeLabelDynamoDeploymentPod: commonconsts.KubeLabelValueTrue,
})
if err != nil {
err = errors.Wrapf(err, "failed to list service accounts in namespace %s", opt.dynamoNimDeployment.Namespace)
......@@ -1873,15 +1731,15 @@ monitoring.options.insecure=true`
}
}
if resourceAnnotations["yatai.ai/enable-host-ipc"] == commonconsts.KubeLabelValueTrue {
if resourceAnnotations["nvidia.com/enable-host-ipc"] == commonconsts.KubeLabelValueTrue {
podSpec.HostIPC = true
}
if resourceAnnotations["yatai.ai/enable-host-network"] == commonconsts.KubeLabelValueTrue {
if resourceAnnotations["nvidia.com/enable-host-network"] == commonconsts.KubeLabelValueTrue {
podSpec.HostNetwork = true
}
if resourceAnnotations["yatai.ai/enable-host-pid"] == commonconsts.KubeLabelValueTrue {
if resourceAnnotations["nvidia.com/enable-host-pid"] == commonconsts.KubeLabelValueTrue {
podSpec.HostPID = true
}
......@@ -2025,12 +1883,12 @@ func (r *DynamoNimDeploymentReconciler) generateService(ctx context.Context, opt
}
if opt.isStealingTrafficDebugModeEnabled {
selector[commonconsts.KubeLabelYataiBentoDeploymentTargetType] = DeploymentTargetTypeDebug
selector[commonconsts.KubeLabelDynamoDeploymentTargetType] = DeploymentTargetTypeDebug
}
targetPort := intstr.FromString(commonconsts.BentoContainerPortName)
targetPort := intstr.FromString(commonconsts.DynamoContainerPortName)
if opt.isGenericService {
delete(selector, commonconsts.KubeLabelYataiBentoDeploymentTargetType)
delete(selector, commonconsts.KubeLabelDynamoDeploymentTargetType)
if opt.containsStealingTrafficDebugModeEnabled {
targetPort = intstr.FromString(ContainerPortNameHTTPProxy)
}
......@@ -2040,15 +1898,15 @@ func (r *DynamoNimDeploymentReconciler) generateService(ctx context.Context, opt
Selector: selector,
Ports: []corev1.ServicePort{
{
Name: commonconsts.BentoServicePortName,
Port: commonconsts.BentoServicePort,
Name: commonconsts.DynamoServicePortName,
Port: commonconsts.DynamoServicePort,
TargetPort: targetPort,
Protocol: corev1.ProtocolTCP,
},
{
Name: ServicePortNameHTTPNonProxy,
Port: int32(ServicePortHTTPNonProxy),
TargetPort: intstr.FromString(commonconsts.BentoContainerPortName),
TargetPort: intstr.FromString(commonconsts.DynamoContainerPortName),
Protocol: corev1.ProtocolTCP,
},
},
......
......@@ -37,6 +37,7 @@ import (
"emperror.dev/errors"
commonconfig "github.com/ai-dynamo/dynamo/deploy/dynamo/operator/internal/config"
"github.com/ai-dynamo/dynamo/deploy/dynamo/operator/internal/consts"
commonconsts "github.com/ai-dynamo/dynamo/deploy/dynamo/operator/internal/consts"
"github.com/ai-dynamo/dynamo/deploy/dynamo/operator/internal/controller_common"
"github.com/apparentlymart/go-shquot/shquot"
......@@ -64,23 +65,12 @@ import (
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/predicate"
apiStoreClient "github.com/ai-dynamo/dynamo/deploy/dynamo/operator/api/dynamo/api_store_client"
dynamoCommon "github.com/ai-dynamo/dynamo/deploy/dynamo/operator/api/dynamo/common"
"github.com/ai-dynamo/dynamo/deploy/dynamo/operator/api/dynamo/schemas"
yataiclient "github.com/ai-dynamo/dynamo/deploy/dynamo/operator/api/dynamo/yatai-client"
nvidiacomv1alpha1 "github.com/ai-dynamo/dynamo/deploy/dynamo/operator/api/v1alpha1"
)
const (
KubeAnnotationDynamoNimRequestHash = "yatai.ai/bento-request-hash"
KubeAnnotationDynamoNimRequestImageBuiderHash = "yatai.ai/bento-request-image-builder-hash"
KubeAnnotationDynamoNimRequestModelSeederHash = "yatai.ai/bento-request-model-seeder-hash"
KubeLabelYataiImageBuilderSeparateModels = "yatai.ai/yatai-image-builder-separate-models"
KubeAnnotationDynamoNimStorageNS = "yatai.ai/bento-storage-namespace"
KubeAnnotationModelStorageNS = "yatai.ai/model-storage-namespace"
StoreSchemaAWS = "aws"
StoreSchemaGCP = "gcp"
)
// DynamoNimRequestReconciler reconciles a DynamoNimRequest object
type DynamoNimRequestReconciler struct {
client.Client
......@@ -204,12 +194,6 @@ func (r *DynamoNimRequestReconciler) Reconcile(ctx context.Context, req ctrl.Req
}
}
if isSeparateModels(dynamoNimRequest) {
err = errors.New("separate models, unsupported feature")
logs.Error(err, "unsupported feature")
return
}
dynamoNimRequest, imageInfo, imageExists, imageExistsResult, err := r.ensureImageExists(ctx, ensureImageExistsOption{
dynamoNimRequest: dynamoNimRequest,
req: req,
......@@ -265,14 +249,14 @@ func (r *DynamoNimRequestReconciler) Reconcile(ctx context.Context, req ctrl.Req
}
if dynamoNimRequest.Spec.DownloadURL == "" {
var dynamoNim *schemas.DynamoNIM
dynamoNim, err = r.getDynamoNim(ctx, dynamoNimRequest)
var dynamoComponent *schemas.DynamoComponent
dynamoComponent, err = r.getDynamoComponent(ctx, dynamoNimRequest)
if err != nil {
err = errors.Wrap(err, "get dynamoNim")
err = errors.Wrap(err, "get dynamo component")
return
}
dynamoNimCR.Spec.Context = &nvidiacomv1alpha1.BentoContext{
BentomlVersion: dynamoNim.Manifest.BentomlVersion,
BentomlVersion: dynamoComponent.Manifest.BentomlVersion,
}
}
......@@ -425,14 +409,8 @@ func (r *DynamoNimRequestReconciler) ensureImageExists(ctx context.Context, opt
}
jobLabels := map[string]string{
commonconsts.KubeLabelBentoRequest: dynamoNimRequest.Name,
commonconsts.KubeLabelIsBentoImageBuilder: commonconsts.KubeLabelValueTrue,
}
if isSeparateModels(opt.dynamoNimRequest) {
jobLabels[KubeLabelYataiImageBuilderSeparateModels] = commonconsts.KubeLabelValueTrue
} else {
jobLabels[KubeLabelYataiImageBuilderSeparateModels] = commonconsts.KubeLabelValueFalse
commonconsts.KubeLabelDynamoRequest: dynamoNimRequest.Name,
commonconsts.KubeLabelIsDynamoImageBuilder: commonconsts.KubeLabelValueTrue,
}
jobs := &batchv1.JobList{}
......@@ -446,7 +424,7 @@ func (r *DynamoNimRequestReconciler) ensureImageExists(ctx context.Context, opt
for _, job_ := range jobs.Items {
job_ := job_
oldHash := job_.Annotations[KubeAnnotationDynamoNimRequestHash]
oldHash := job_.Annotations[consts.KubeAnnotationDynamoNimRequestHash]
if oldHash != dynamoNimRequestHashStr {
logs.Info("Because hash changed, delete old job", "job", job_.Name, "oldHash", oldHash, "newHash", dynamoNimRequestHashStr)
// --cascade=foreground
......@@ -660,11 +638,11 @@ const (
)
const (
EnvDynamoNimImageBuildEngine = "BENTO_IMAGE_BUILD_ENGINE"
EnvDynamoImageBuildEngine = "DYNAMO_IMAGE_BUILD_ENGINE"
)
func getDynamoNimImageBuildEngine() DynamoNimImageBuildEngine {
engine := os.Getenv(EnvDynamoNimImageBuildEngine)
engine := os.Getenv(EnvDynamoImageBuildEngine)
if engine == "" {
return DynamoNimImageBuildEngineKaniko
}
......@@ -750,56 +728,29 @@ func (r *DynamoNimRequestReconciler) makeSureDockerConfigJSONSecret(ctx context.
}
//nolint:nakedret
func (r *DynamoNimRequestReconciler) getYataiClient(ctx context.Context) (yataiClient **yataiclient.YataiClient, yataiConf **commonconfig.YataiConfig, err error) {
yataiConf_, err := commonconfig.GetYataiConfig(ctx)
func (r *DynamoNimRequestReconciler) getApiStoreClient(ctx context.Context) (*apiStoreClient.ApiStoreClient, *commonconfig.ApiStoreConfig, error) {
apiStoreConf, err := commonconfig.GetApiStoreConfig(ctx)
isNotFound := k8serrors.IsNotFound(err)
if err != nil && !isNotFound {
err = errors.Wrap(err, "get yatai config")
return
err = errors.Wrap(err, "get api store config")
return nil, nil, err
}
if isNotFound {
return
}
if yataiConf_.Endpoint == "" {
return
}
if yataiConf_.ClusterName == "" {
yataiConf_.ClusterName = "default"
}
yataiClient_ := yataiclient.NewYataiClient(yataiConf_.Endpoint, fmt.Sprintf("%s:%s:%s", commonconsts.YataiImageBuilderComponentName, yataiConf_.ClusterName, yataiConf_.ApiToken))
yataiClient = &yataiClient_
yataiConf = &yataiConf_
return
}
func (r *DynamoNimRequestReconciler) getYataiClientWithAuth(ctx context.Context, dynamoNimRequest *nvidiacomv1alpha1.DynamoNimRequest) (**yataiclient.YataiClient, **commonconfig.YataiConfig, error) {
orgId, ok := dynamoNimRequest.Labels[commonconsts.NgcOrganizationHeaderName]
if !ok {
orgId = commonconsts.DefaultOrgId
return nil, nil, err
}
userId, ok := dynamoNimRequest.Labels[commonconsts.NgcUserHeaderName]
if !ok {
userId = commonconsts.DefaultUserId
if apiStoreConf.Endpoint == "" {
return nil, nil, err
}
auth := yataiclient.DynamoAuthHeaders{
OrgId: orgId,
UserId: userId,
if apiStoreConf.ClusterName == "" {
apiStoreConf.ClusterName = "default"
}
client, yataiConf, err := r.getYataiClient(ctx)
if err != nil {
return nil, nil, err
}
apiStoreClient := apiStoreClient.NewApiStoreClient(apiStoreConf.Endpoint)
(*client).SetAuth(auth)
return client, yataiConf, err
return apiStoreClient, apiStoreConf, nil
}
//nolint:nakedret
......@@ -881,28 +832,19 @@ func (r *DynamoNimRequestReconciler) getDockerRegistry(ctx context.Context, dyna
return
}
dynamoNimRepositoryName := "yatai-bentos"
modelRepositoryName := "yatai-models"
if dockerRegistryConfig.BentoRepositoryName != "" {
dynamoNimRepositoryName = dockerRegistryConfig.BentoRepositoryName
}
if dockerRegistryConfig.ModelRepositoryName != "" {
modelRepositoryName = dockerRegistryConfig.ModelRepositoryName
dynamoRepositoryName := "dynamo-components"
if dockerRegistryConfig.DynamoComponentsRepositoryName != "" {
dynamoRepositoryName = dockerRegistryConfig.DynamoComponentsRepositoryName
}
dynamoNimRepositoryURI := fmt.Sprintf("%s/%s", strings.TrimRight(dockerRegistryConfig.Server, "/"), dynamoNimRepositoryName)
modelRepositoryURI := fmt.Sprintf("%s/%s", strings.TrimRight(dockerRegistryConfig.Server, "/"), modelRepositoryName)
dynamoRepositoryURI := fmt.Sprintf("%s/%s", strings.TrimRight(dockerRegistryConfig.Server, "/"), dynamoRepositoryName)
if strings.Contains(dockerRegistryConfig.Server, "docker.io") {
dynamoNimRepositoryURI = fmt.Sprintf("docker.io/%s", dynamoNimRepositoryName)
modelRepositoryURI = fmt.Sprintf("docker.io/%s", modelRepositoryName)
dynamoRepositoryURI = fmt.Sprintf("docker.io/%s", dynamoRepositoryName)
}
dynamoNimRepositoryInClusterURI := dynamoNimRepositoryURI
modelRepositoryInClusterURI := modelRepositoryURI
dynamoRepositoryInClusterURI := dynamoRepositoryURI
if dockerRegistryConfig.InClusterServer != "" {
dynamoNimRepositoryInClusterURI = fmt.Sprintf("%s/%s", strings.TrimRight(dockerRegistryConfig.InClusterServer, "/"), dynamoNimRepositoryName)
modelRepositoryInClusterURI = fmt.Sprintf("%s/%s", strings.TrimRight(dockerRegistryConfig.InClusterServer, "/"), modelRepositoryName)
dynamoRepositoryInClusterURI = fmt.Sprintf("%s/%s", strings.TrimRight(dockerRegistryConfig.InClusterServer, "/"), dynamoRepositoryName)
if strings.Contains(dockerRegistryConfig.InClusterServer, "docker.io") {
dynamoNimRepositoryInClusterURI = fmt.Sprintf("docker.io/%s", dynamoNimRepositoryName)
modelRepositoryInClusterURI = fmt.Sprintf("docker.io/%s", modelRepositoryName)
dynamoRepositoryInClusterURI = fmt.Sprintf("docker.io/%s", dynamoRepositoryName)
}
}
dockerRegistry = schemas.DockerRegistrySchema{
......@@ -910,10 +852,8 @@ func (r *DynamoNimRequestReconciler) getDockerRegistry(ctx context.Context, dyna
Username: dockerRegistryConfig.Username,
Password: dockerRegistryConfig.Password,
Secure: dockerRegistryConfig.Secure,
BentosRepositoryURI: dynamoNimRepositoryURI,
BentosRepositoryURIInCluster: dynamoNimRepositoryInClusterURI,
ModelsRepositoryURI: modelRepositoryURI,
ModelsRepositoryURIInCluster: modelRepositoryInClusterURI,
DynamoRepositoryURI: dynamoRepositoryURI,
DynamoRepositoryURIInCluster: dynamoRepositoryInClusterURI,
}
return
......@@ -927,7 +867,7 @@ func getDynamoNimImagePrefix(dynamoNimRequest *nvidiacomv1alpha1.DynamoNimReques
if dynamoNimRequest == nil {
return ""
}
prefix, exist := dynamoNimRequest.Annotations[KubeAnnotationDynamoNimStorageNS]
prefix, exist := dynamoNimRequest.Annotations[consts.KubeAnnotationDynamoNimStorageNS]
if exist && prefix != "" {
return fmt.Sprintf("%s.", prefix)
}
......@@ -943,37 +883,29 @@ func getDynamoNimImageName(dynamoNimRequest *nvidiacomv1alpha1.DynamoNimRequest,
}
var uri, tag string
if inCluster {
uri = dockerRegistry.BentosRepositoryURIInCluster
uri = dockerRegistry.DynamoRepositoryURIInCluster
} else {
uri = dockerRegistry.BentosRepositoryURI
uri = dockerRegistry.DynamoRepositoryURI
}
tail := fmt.Sprintf("%s.%s", dynamoNimRepositoryName, dynamoNimVersion)
separateModels := isSeparateModels(dynamoNimRequest)
if separateModels {
tail += ".nomodels"
}
if isEstargzEnabled() {
tail += ".esgz"
}
tag = fmt.Sprintf("yatai.%s%s", getDynamoNimImagePrefix(dynamoNimRequest), tail)
tag = fmt.Sprintf("dynamo.%s%s", getDynamoNimImagePrefix(dynamoNimRequest), tail)
if len(tag) > 128 {
hashStr := hash(tail)
tag = fmt.Sprintf("yatai.%s%s", getDynamoNimImagePrefix(dynamoNimRequest), hashStr)
tag = fmt.Sprintf("dynamo.%s%s", getDynamoNimImagePrefix(dynamoNimRequest), hashStr)
if len(tag) > 128 {
tag = fmt.Sprintf("yatai.%s", hash(fmt.Sprintf("%s%s", getDynamoNimImagePrefix(dynamoNimRequest), tail)))[:128]
tag = fmt.Sprintf("dynamo.%s", hash(fmt.Sprintf("%s%s", getDynamoNimImagePrefix(dynamoNimRequest), tail)))[:128]
}
}
return fmt.Sprintf("%s:%s", uri, tag)
}
func isSeparateModels(dynamoNimRequest *nvidiacomv1alpha1.DynamoNimRequest) (separateModels bool) {
return dynamoNimRequest.Annotations[commonconsts.KubeAnnotationYataiImageBuilderSeparateModels] == commonconsts.KubeLabelValueTrue
}
func checkImageExists(dynamoNimRequest *nvidiacomv1alpha1.DynamoNimRequest, dockerRegistry schemas.DockerRegistrySchema, imageName string) (bool, error) {
if dynamoNimRequest.Annotations["yatai.ai/force-build-image"] == commonconsts.KubeLabelValueTrue {
if dynamoNimRequest.Annotations["nvidia.com/force-build-image"] == commonconsts.KubeLabelValueTrue {
return false, nil
}
......@@ -1035,7 +967,7 @@ func (r *DynamoNimRequestReconciler) getImageInfo(ctx context.Context, opt GetIm
imageInfo.DockerConfigJSONSecretName = opt.DynamoNimRequest.Spec.DockerConfigJSONSecretName
imageInfo.DockerRegistryInsecure = opt.DynamoNimRequest.Annotations[commonconsts.KubeAnnotationDockerRegistryInsecure] == "true"
imageInfo.DockerRegistryInsecure = opt.DynamoNimRequest.Annotations[commonconsts.KubeAnnotationDynamoDockerRegistryInsecure] == "true"
if opt.DynamoNimRequest.Spec.OCIRegistryInsecure != nil {
imageInfo.DockerRegistryInsecure = *opt.DynamoNimRequest.Spec.OCIRegistryInsecure
}
......@@ -1063,61 +995,54 @@ func (r *DynamoNimRequestReconciler) getImageInfo(ctx context.Context, opt GetIm
return
}
func (r *DynamoNimRequestReconciler) getDynamoNim(ctx context.Context, dynamoNimRequest *nvidiacomv1alpha1.DynamoNimRequest) (dynamoNim *schemas.DynamoNIM, err error) {
dynamoNimRepositoryName, _, dynamoNimVersion := xstrings.Partition(dynamoNimRequest.Spec.BentoTag, ":")
func (r *DynamoNimRequestReconciler) getDynamoComponent(ctx context.Context, dynamoNimRequest *nvidiacomv1alpha1.DynamoNimRequest) (dynamoComponent *schemas.DynamoComponent, err error) {
dynamoComponentRepositoryName, _, dynamoComponentVersion := xstrings.Partition(dynamoNimRequest.Spec.BentoTag, ":")
yataiClient_, _, err := r.getYataiClient(ctx)
apiStoreClient, _, err := r.getApiStoreClient(ctx)
if err != nil {
err = errors.Wrap(err, "get yatai client")
err = errors.Wrap(err, "get api store client")
return
}
if yataiClient_ == nil {
err = errors.New("can't get yatai client, please check yatai configuration")
if apiStoreClient == nil {
err = errors.New("can't get api store client, please check api store configuration")
return
}
yataiClient := *yataiClient_
r.Recorder.Eventf(dynamoNimRequest, corev1.EventTypeNormal, "FetchDynamoNim", "Getting dynamoNim %s from yatai service", dynamoNimRequest.Spec.BentoTag)
dynamoNim, err = yataiClient.GetBento(ctx, dynamoNimRepositoryName, dynamoNimVersion)
r.Recorder.Eventf(dynamoNimRequest, corev1.EventTypeNormal, "FetchDynamoComponent", "Getting dynamo component %s from api store service", dynamoNimRequest.Spec.BentoTag)
dynamoComponent, err = apiStoreClient.GetDynamoComponent(ctx, dynamoComponentRepositoryName, dynamoComponentVersion)
if err != nil {
err = errors.Wrap(err, "get dynamoNim")
err = errors.Wrap(err, "get dynamo component")
return
}
r.Recorder.Eventf(dynamoNimRequest, corev1.EventTypeNormal, "FetchDynamoNim", "Got dynamoNim %s from yatai service", dynamoNimRequest.Spec.BentoTag)
r.Recorder.Eventf(dynamoNimRequest, corev1.EventTypeNormal, "FetchDynamoComponent", "Got dynamo component %s from api store service", dynamoNimRequest.Spec.BentoTag)
return
}
func (r *DynamoNimRequestReconciler) getImageBuilderJobName() string {
guid := xid.New()
return fmt.Sprintf("yatai-dynamonim-image-builder-%s", guid.String())
return fmt.Sprintf("dynamo-image-builder-%s", guid.String())
}
func (r *DynamoNimRequestReconciler) getImageBuilderJobLabels(dynamoNimRequest *nvidiacomv1alpha1.DynamoNimRequest) map[string]string {
dynamoNimRepositoryName, _, dynamoNimVersion := xstrings.Partition(dynamoNimRequest.Spec.BentoTag, ":")
labels := map[string]string{
commonconsts.KubeLabelBentoRequest: dynamoNimRequest.Name,
commonconsts.KubeLabelIsBentoImageBuilder: "true",
commonconsts.KubeLabelYataiBentoRepository: dynamoNimRepositoryName,
commonconsts.KubeLabelYataiBento: dynamoNimVersion,
commonconsts.KubeLabelDynamoRequest: dynamoNimRequest.Name,
commonconsts.KubeLabelIsDynamoImageBuilder: "true",
commonconsts.KubeLabelDynamoRepository: dynamoNimRepositoryName,
commonconsts.KubeLabelDynamoVersion: dynamoNimVersion,
}
if isSeparateModels(dynamoNimRequest) {
labels[KubeLabelYataiImageBuilderSeparateModels] = commonconsts.KubeLabelValueTrue
} else {
labels[KubeLabelYataiImageBuilderSeparateModels] = commonconsts.KubeLabelValueFalse
}
return labels
}
func (r *DynamoNimRequestReconciler) getImageBuilderPodLabels(dynamoNimRequest *nvidiacomv1alpha1.DynamoNimRequest) map[string]string {
dynamoNimRepositoryName, _, dynamoNimVersion := xstrings.Partition(dynamoNimRequest.Spec.BentoTag, ":")
return map[string]string{
commonconsts.KubeLabelBentoRequest: dynamoNimRequest.Name,
commonconsts.KubeLabelIsBentoImageBuilder: "true",
commonconsts.KubeLabelYataiBentoRepository: dynamoNimRepositoryName,
commonconsts.KubeLabelYataiBento: dynamoNimVersion,
commonconsts.KubeLabelDynamoRequest: dynamoNimRequest.Name,
commonconsts.KubeLabelIsDynamoImageBuilder: "true",
commonconsts.KubeLabelDynamoRepository: dynamoNimRepositoryName,
commonconsts.KubeLabelDynamoVersion: dynamoNimVersion,
}
}
......@@ -1147,7 +1072,7 @@ func (r *DynamoNimRequestReconciler) generateImageBuilderJob(ctx context.Context
err = errors.Wrap(err, "failed to get hash string")
return
}
kubeAnnotations[KubeAnnotationDynamoNimRequestHash] = hashStr
kubeAnnotations[consts.KubeAnnotationDynamoNimRequestHash] = hashStr
job = &batchv1.Job{
ObjectMeta: metav1.ObjectMeta{
Name: r.getImageBuilderJobName(),
......@@ -1196,7 +1121,7 @@ func injectPodAffinity(podSpec *corev1.PodSpec, dynamoNimRequest *nvidiacomv1alp
PodAffinityTerm: corev1.PodAffinityTerm{
LabelSelector: &metav1.LabelSelector{
MatchLabels: map[string]string{
commonconsts.KubeLabelBentoRequest: dynamoNimRequest.Name,
commonconsts.KubeLabelDynamoRequest: dynamoNimRequest.Name,
},
},
TopologyKey: corev1.LabelHostname,
......@@ -1216,7 +1141,7 @@ type GenerateImageBuilderPodTemplateSpecOption struct {
//nolint:gocyclo,nakedret
func (r *DynamoNimRequestReconciler) generateImageBuilderPodTemplateSpec(ctx context.Context, opt GenerateImageBuilderPodTemplateSpecOption) (pod *corev1.PodTemplateSpec, err error) {
dynamoNimRepositoryName, _, dynamoNimVersion := xstrings.Partition(opt.DynamoNimRequest.Spec.BentoTag, ":")
dynamoComponentRepositoryName, _, dynamoComponentVersion := xstrings.Partition(opt.DynamoNimRequest.Spec.BentoTag, ":")
kubeLabels := r.getImageBuilderPodLabels(opt.DynamoNimRequest)
inClusterImageName := opt.ImageInfo.InClusterImageName
......@@ -1227,7 +1152,7 @@ func (r *DynamoNimRequestReconciler) generateImageBuilderPodTemplateSpec(ctx con
volumes := []corev1.Volume{
{
Name: "yatai",
Name: "dynamo",
VolumeSource: corev1.VolumeSource{
EmptyDir: &corev1.EmptyDirVolumeSource{},
},
......@@ -1242,8 +1167,8 @@ func (r *DynamoNimRequestReconciler) generateImageBuilderPodTemplateSpec(ctx con
volumeMounts := []corev1.VolumeMount{
{
Name: "yatai",
MountPath: "/yatai",
Name: "dynamo",
MountPath: "/dynamo",
},
{
Name: "workspace",
......@@ -1272,92 +1197,46 @@ func (r *DynamoNimRequestReconciler) generateImageBuilderPodTemplateSpec(ctx con
})
}
var dynamoNim *schemas.DynamoNIM
yataiAPITokenSecretName := ""
dynamoNimDownloadURL := opt.DynamoNimRequest.Spec.DownloadURL
dynamoNimDownloadHeader := ""
var dynamoComponent *schemas.DynamoComponent
dynamoComponentDownloadURL := opt.DynamoNimRequest.Spec.DownloadURL
if dynamoNimDownloadURL == "" {
var yataiClient_ **yataiclient.YataiClient
var yataiConf_ **commonconfig.YataiConfig
if dynamoComponentDownloadURL == "" {
var apiStoreClient *apiStoreClient.ApiStoreClient
var apiStoreConf *commonconfig.ApiStoreConfig
yataiClient_, yataiConf_, err = r.getYataiClientWithAuth(ctx, opt.DynamoNimRequest)
apiStoreClient, apiStoreConf, err = r.getApiStoreClient(ctx)
if err != nil {
err = errors.Wrap(err, "get yatai client")
err = errors.Wrap(err, "get api store client")
return
}
if yataiClient_ == nil || yataiConf_ == nil {
err = errors.New("can't get yatai client, please check yatai configuration")
if apiStoreClient == nil || apiStoreConf == nil {
err = errors.New("can't get api store client, please check api store configuration")
return
}
yataiClient := *yataiClient_
yataiConf := *yataiConf_
r.Recorder.Eventf(opt.DynamoNimRequest, corev1.EventTypeNormal, "GenerateImageBuilderPod", "Getting dynamoNim %s from yatai service", opt.DynamoNimRequest.Spec.BentoTag)
dynamoNim, err = yataiClient.GetBento(ctx, dynamoNimRepositoryName, dynamoNimVersion)
r.Recorder.Eventf(opt.DynamoNimRequest, corev1.EventTypeNormal, "GenerateImageBuilderPod", "Getting dynamoNim %s from api store service", opt.DynamoNimRequest.Spec.BentoTag)
dynamoComponent, err = apiStoreClient.GetDynamoComponent(ctx, dynamoComponentRepositoryName, dynamoComponentVersion)
if err != nil {
err = errors.Wrap(err, "get dynamoNim")
return
}
r.Recorder.Eventf(opt.DynamoNimRequest, corev1.EventTypeNormal, "GenerateImageBuilderPod", "Got dynamoNim %s from yatai service", opt.DynamoNimRequest.Spec.BentoTag)
r.Recorder.Eventf(opt.DynamoNimRequest, corev1.EventTypeNormal, "GenerateImageBuilderPod", "Got dynamoNim %s from api store service", opt.DynamoNimRequest.Spec.BentoTag)
if dynamoNim.TransmissionStrategy != nil && *dynamoNim.TransmissionStrategy == schemas.TransmissionStrategyPresignedURL {
var dynamoNim_ *schemas.DynamoNIM
r.Recorder.Eventf(opt.DynamoNimRequest, corev1.EventTypeNormal, "GenerateImageBuilderPod", "Getting presigned url for dynamoNim %s from yatai service", opt.DynamoNimRequest.Spec.BentoTag)
dynamoNim_, err = yataiClient.PresignBentoDownloadURL(ctx, dynamoNimRepositoryName, dynamoNimVersion)
if dynamoComponent.TransmissionStrategy != nil && *dynamoComponent.TransmissionStrategy == schemas.TransmissionStrategyPresignedURL {
var dynamoComponent_ *schemas.DynamoComponent
r.Recorder.Eventf(opt.DynamoNimRequest, corev1.EventTypeNormal, "GenerateImageBuilderPod", "Getting presigned url for dynamoNim %s from api store service", opt.DynamoNimRequest.Spec.BentoTag)
dynamoComponent_, err = apiStoreClient.PresignDynamoComponentDownloadURL(ctx, dynamoComponentRepositoryName, dynamoComponentVersion)
if err != nil {
err = errors.Wrap(err, "presign dynamoNim download url")
return
}
r.Recorder.Eventf(opt.DynamoNimRequest, corev1.EventTypeNormal, "GenerateImageBuilderPod", "Got presigned url for dynamoNim %s from yatai service", opt.DynamoNimRequest.Spec.BentoTag)
dynamoNimDownloadURL = dynamoNim_.PresignedDownloadUrl
r.Recorder.Eventf(opt.DynamoNimRequest, corev1.EventTypeNormal, "GenerateImageBuilderPod", "Got presigned url for dynamoNim %s from api store service", opt.DynamoNimRequest.Spec.BentoTag)
dynamoComponentDownloadURL = dynamoComponent_.PresignedDownloadUrl
} else {
dynamoNimDownloadURL = fmt.Sprintf("%s/api/v1/dynamo_nims/%s/versions/%s/download", yataiConf.Endpoint, dynamoNimRepositoryName, dynamoNimVersion)
dynamoNimDownloadHeader = fmt.Sprintf("%s: %s:%s:$%s", commonconsts.YataiApiTokenHeaderName, commonconsts.YataiImageBuilderComponentName, yataiConf.ClusterName, commonconsts.EnvYataiApiToken)
}
// nolint: gosec
yataiAPITokenSecretName = "yatai-api-token"
yataiAPITokenSecret := &corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
Name: yataiAPITokenSecretName,
Namespace: opt.DynamoNimRequest.Namespace,
},
StringData: map[string]string{
commonconsts.EnvYataiApiToken: yataiConf.ApiToken,
},
dynamoComponentDownloadURL = fmt.Sprintf("%s/api/v1/dynamo_nims/%s/versions/%s/download", apiStoreConf.Endpoint, dynamoComponentRepositoryName, dynamoComponentVersion)
}
r.Recorder.Eventf(opt.DynamoNimRequest, corev1.EventTypeNormal, "GenerateImageBuilderPod", "Getting secret %s in namespace %s", yataiAPITokenSecretName, opt.DynamoNimRequest.Namespace)
_yataiAPITokenSecret := &corev1.Secret{}
err = r.Get(ctx, types.NamespacedName{Namespace: opt.DynamoNimRequest.Namespace, Name: yataiAPITokenSecretName}, _yataiAPITokenSecret)
isNotFound := k8serrors.IsNotFound(err)
if err != nil && !isNotFound {
err = errors.Wrapf(err, "failed to get secret %s", yataiAPITokenSecretName)
return
}
if isNotFound {
r.Recorder.Eventf(opt.DynamoNimRequest, corev1.EventTypeNormal, "GenerateImageBuilderPod", "Secret %s is not found, so creating it in namespace %s", yataiAPITokenSecretName, opt.DynamoNimRequest.Namespace)
err = r.Create(ctx, yataiAPITokenSecret)
isExists := k8serrors.IsAlreadyExists(err)
if err != nil && !isExists {
err = errors.Wrapf(err, "failed to create secret %s", yataiAPITokenSecretName)
return
}
r.Recorder.Eventf(opt.DynamoNimRequest, corev1.EventTypeNormal, "GenerateImageBuilderPod", "Secret %s is created in namespace %s", yataiAPITokenSecretName, opt.DynamoNimRequest.Namespace)
} else {
r.Recorder.Eventf(opt.DynamoNimRequest, corev1.EventTypeNormal, "GenerateImageBuilderPod", "Secret %s is found in namespace %s, so updating it", yataiAPITokenSecretName, opt.DynamoNimRequest.Namespace)
err = r.Update(ctx, yataiAPITokenSecret)
if err != nil {
err = errors.Wrapf(err, "failed to update secret %s", yataiAPITokenSecretName)
return
}
r.Recorder.Eventf(opt.DynamoNimRequest, corev1.EventTypeNormal, "GenerateImageBuilderPod", "Secret %s is updated in namespace %s", yataiAPITokenSecretName, opt.DynamoNimRequest.Namespace)
}
}
internalImages := commonconfig.GetInternalImages()
logrus.Infof("Image builder is using the images %v", *internalImages)
......@@ -1370,8 +1249,8 @@ func (r *DynamoNimRequestReconciler) generateImageBuilderPodTemplateSpec(ctx con
set -e
mkdir -p /workspace/buildcontext
url="{{.DynamoNimDownloadURL}}"
echo "Downloading dynamoNim {{.DynamoNimRepositoryName}}:{{.DynamoNimVersion}} to /tmp/downloaded.tar..."
url="{{.DynamoComponentDownloadURL}}"
echo "Downloading dynamoNim {{.DynamoComponentRepositoryName}}:{{.DynamoComponentVersion}} to /tmp/downloaded.tar..."
if [[ ${url} == s3://* ]]; then
echo "Downloading from s3..."
aws s3 cp ${url} /tmp/downloaded.tar
......@@ -1379,7 +1258,7 @@ elif [[ ${url} == gs://* ]]; then
echo "Downloading from GCS..."
gsutil cp ${url} /tmp/downloaded.tar
else
curl --fail -L -H "{{.DynamoNimDownloadHeader}}" ${url} --output /tmp/downloaded.tar --progress-bar
curl --fail -L ${url} --output /tmp/downloaded.tar --progress-bar
fi
cd /workspace/buildcontext
echo "Extracting dynamoNim tar file..."
......@@ -1401,11 +1280,10 @@ echo "Done"
var dynamoNimDownloadCommandBuffer bytes.Buffer
err = dynamoNimDownloadCommandTemplate.Execute(&dynamoNimDownloadCommandBuffer, map[string]interface{}{
"DynamoNimDownloadURL": dynamoNimDownloadURL,
"DynamoNimDownloadHeader": dynamoNimDownloadHeader,
"DynamoNimRepositoryName": dynamoNimRepositoryName,
"DynamoNimVersion": dynamoNimVersion,
"Privileged": privileged,
"DynamoComponentDownloadURL": dynamoComponentDownloadURL,
"DynamoComponentRepositoryName": dynamoComponentRepositoryName,
"DynamoComponentVersion": dynamoComponentVersion,
"Privileged": privileged,
})
if err != nil {
err = errors.Wrap(err, "failed to execute download command template")
......@@ -1427,20 +1305,10 @@ echo "Done"
downloaderContainerEnvFrom := opt.DynamoNimRequest.Spec.DownloaderContainerEnvFrom
if yataiAPITokenSecretName != "" {
downloaderContainerEnvFrom = append(downloaderContainerEnvFrom, corev1.EnvFromSource{
SecretRef: &corev1.SecretEnvSource{
LocalObjectReference: corev1.LocalObjectReference{
Name: yataiAPITokenSecretName,
},
},
})
}
initContainers := []corev1.Container{
{
Name: "dynamonim-downloader",
Image: internalImages.BentoDownloader,
Image: internalImages.DynamoComponentsDownloader,
Command: []string{
"bash",
"-c",
......@@ -1466,8 +1334,8 @@ echo "Done"
modelsSeen[model.Tag] = struct{}{}
}
if dynamoNim != nil {
for _, modelTag := range dynamoNim.Manifest.Models {
if dynamoComponent != nil {
for _, modelTag := range dynamoComponent.Manifest.Models {
if _, ok := modelsSeen[modelTag]; !ok {
models = append(models, nvidiacomv1alpha1.BentoModel{
Tag: modelTag,
......@@ -1483,13 +1351,13 @@ echo "Done"
var buildArgs []string
var builderArgs []string
configNamespace, err := commonconfig.GetYataiImageBuilderNamespace(ctx)
configNamespace, err := commonconfig.GetDynamoImageBuilderNamespace(ctx)
if err != nil {
err = errors.Wrap(err, "failed to get Yatai image builder namespace")
err = errors.Wrap(err, "failed to get dynamo image builder namespace")
return
}
configCmName := "yatai-image-builder-config"
configCmName := "dynamo-image-builder-config"
r.Recorder.Eventf(opt.DynamoNimRequest, corev1.EventTypeNormal, "GenerateImageBuilderPod", "Getting configmap %s from namespace %s", configCmName, configNamespace)
configCm := &corev1.ConfigMap{}
err = r.Get(ctx, types.NamespacedName{Name: configCmName, Namespace: configNamespace}, configCm)
......@@ -1589,11 +1457,11 @@ echo "Done"
kanikoCacheRepo := os.Getenv("KANIKO_CACHE_REPO")
if kanikoCacheRepo == "" {
kanikoCacheRepo = opt.ImageInfo.DockerRegistry.BentosRepositoryURIInCluster
kanikoCacheRepo = opt.ImageInfo.DockerRegistry.DynamoRepositoryURIInCluster
}
kubeAnnotations := make(map[string]string)
kubeAnnotations[KubeAnnotationDynamoNimRequestImageBuiderHash] = opt.DynamoNimRequest.Annotations[KubeAnnotationDynamoNimRequestImageBuiderHash]
kubeAnnotations[consts.KubeAnnotationDynamoNimRequestImageBuiderHash] = opt.DynamoNimRequest.Annotations[consts.KubeAnnotationDynamoNimRequestImageBuiderHash]
command := []string{
"/kaniko/executor",
......@@ -1710,10 +1578,10 @@ echo "Done"
}
// add other arguments to builder
args = append(args, builderArgs...)
logrus.Info("yatai-image-builder args: ", args)
logrus.Info("dynamo-image-builder args: ", args)
// nolint: gosec
buildArgsSecretName := "yatai-image-builder-build-args"
buildArgsSecretName := "dynamo-image-builder-build-args"
r.Recorder.Eventf(opt.DynamoNimRequest, corev1.EventTypeNormal, "GenerateImageBuilderPod", "Getting secret %s from namespace %s", buildArgsSecretName, configNamespace)
buildArgsSecret := &corev1.Secret{}
err = r.Get(ctx, types.NamespacedName{Name: buildArgsSecretName, Namespace: configNamespace}, buildArgsSecret)
......@@ -1896,7 +1764,7 @@ echo "Done"
if pod.Spec.ServiceAccountName == "" {
serviceAccounts := &corev1.ServiceAccountList{}
err = r.List(ctx, serviceAccounts, client.InNamespace(opt.DynamoNimRequest.Namespace), client.MatchingLabels{
commonconsts.KubeLabelYataiImageBuilderPod: commonconsts.KubeLabelValueTrue,
commonconsts.KubeLabelDynamoImageBuilderPod: commonconsts.KubeLabelValueTrue,
})
if err != nil {
err = errors.Wrapf(err, "failed to list service accounts in namespace %s", opt.DynamoNimRequest.Namespace)
......
......@@ -15,7 +15,7 @@
* limitations under the License.
*/
package nim
package dynamo
import (
"bytes"
......@@ -26,12 +26,11 @@ import (
"strings"
"emperror.dev/errors"
apiStoreClient "github.com/ai-dynamo/dynamo/deploy/dynamo/operator/api/dynamo/api_store_client"
compounaiCommon "github.com/ai-dynamo/dynamo/deploy/dynamo/operator/api/dynamo/common"
"github.com/ai-dynamo/dynamo/deploy/dynamo/operator/api/dynamo/schemas"
yataiclient "github.com/ai-dynamo/dynamo/deploy/dynamo/operator/api/dynamo/yatai-client"
"github.com/ai-dynamo/dynamo/deploy/dynamo/operator/api/v1alpha1"
commonconfig "github.com/ai-dynamo/dynamo/deploy/dynamo/operator/internal/config"
commonconsts "github.com/ai-dynamo/dynamo/deploy/dynamo/operator/internal/consts"
"github.com/huandu/xstrings"
corev1 "k8s.io/api/core/v1"
k8serrors "k8s.io/apimachinery/pkg/api/errors"
......@@ -89,58 +88,53 @@ func GetDefaultDynamoNamespace(ctx context.Context, dynamoDeployment *v1alpha1.D
return fmt.Sprintf("dynamo-%s", dynamoDeployment.Name)
}
func RetrieveDynamoNimDownloadURL(ctx context.Context, dynamoDeployment *v1alpha1.DynamoDeployment, recorder EventRecorder) (*string, *string, error) {
dynamoNimDownloadURL := ""
dynamoNimApiToken := ""
var dynamoNim *schemas.DynamoNIM
dynamoNimRepositoryName, _, dynamoNimVersion := xstrings.Partition(dynamoDeployment.Spec.DynamoNim, ":")
func RetrieveDynamoGraphDownloadURL(ctx context.Context, dynamoDeployment *v1alpha1.DynamoDeployment, recorder EventRecorder) (*string, error) {
dynamoGraphDownloadURL := ""
var dynamoComponent *schemas.DynamoComponent
dynamoComponentRepositoryName, _, dynamoComponentVersion := xstrings.Partition(dynamoDeployment.Spec.DynamoNim, ":")
var err error
var yataiClient_ **yataiclient.YataiClient
var yataiConf_ **commonconfig.YataiConfig
var apiStoreClient *apiStoreClient.ApiStoreClient
var apiStoreConf *commonconfig.ApiStoreConfig
yataiClient_, yataiConf_, err = GetYataiClient(ctx)
apiStoreClient, apiStoreConf, err = GetApiStoreClient(ctx)
if err != nil {
err = errors.Wrap(err, "get yatai client")
return nil, nil, err
err = errors.Wrap(err, "get api store client")
return nil, err
}
if yataiClient_ == nil || yataiConf_ == nil {
err = errors.New("can't get yatai client, please check yatai configuration")
return nil, nil, err
if apiStoreClient == nil || apiStoreConf == nil {
err = errors.New("can't get api store client, please check api store configuration")
return nil, err
}
yataiClient := *yataiClient_
yataiConf := *yataiConf_
recorder.Eventf(dynamoDeployment, corev1.EventTypeNormal, "GenerateImageBuilderPod", "Getting dynamoNim %s from yatai service", dynamoDeployment.Spec.DynamoNim)
dynamoNim, err = yataiClient.GetBento(ctx, dynamoNimRepositoryName, dynamoNimVersion)
recorder.Eventf(dynamoDeployment, corev1.EventTypeNormal, "GenerateImageBuilderPod", "Getting dynamo graph %s from api store service", dynamoDeployment.Spec.DynamoNim)
dynamoComponent, err = apiStoreClient.GetDynamoComponent(ctx, dynamoComponentRepositoryName, dynamoComponentVersion)
if err != nil {
err = errors.Wrap(err, "get dynamoNim")
return nil, nil, err
err = errors.Wrap(err, "get dynamo component")
return nil, err
}
recorder.Eventf(dynamoDeployment, corev1.EventTypeNormal, "GenerateImageBuilderPod", "Got dynamoNim %s from yatai service", dynamoDeployment.Spec.DynamoNim)
recorder.Eventf(dynamoDeployment, corev1.EventTypeNormal, "GenerateImageBuilderPod", "Got dynamo graph %s from api store service", dynamoDeployment.Spec.DynamoNim)
if dynamoNim.TransmissionStrategy != nil && *dynamoNim.TransmissionStrategy == schemas.TransmissionStrategyPresignedURL {
var dynamoNim_ *schemas.DynamoNIM
recorder.Eventf(dynamoDeployment, corev1.EventTypeNormal, "GenerateImageBuilderPod", "Getting presigned url for dynamoNim %s from yatai service", dynamoDeployment.Spec.DynamoNim)
dynamoNim_, err = yataiClient.PresignBentoDownloadURL(ctx, dynamoNimRepositoryName, dynamoNimVersion)
if dynamoComponent.TransmissionStrategy != nil && *dynamoComponent.TransmissionStrategy == schemas.TransmissionStrategyPresignedURL {
var dynamoComponent_ *schemas.DynamoComponent
recorder.Eventf(dynamoDeployment, corev1.EventTypeNormal, "GenerateImageBuilderPod", "Getting presigned url for dynamo graph %s from api store service", dynamoDeployment.Spec.DynamoNim)
dynamoComponent_, err = apiStoreClient.PresignDynamoComponentDownloadURL(ctx, dynamoComponentRepositoryName, dynamoComponentVersion)
if err != nil {
err = errors.Wrap(err, "presign dynamoNim download url")
return nil, nil, err
err = errors.Wrap(err, "presign dynamo component download url")
return nil, err
}
recorder.Eventf(dynamoDeployment, corev1.EventTypeNormal, "GenerateImageBuilderPod", "Got presigned url for dynamoNim %s from yatai service", dynamoDeployment.Spec.DynamoNim)
dynamoNimDownloadURL = dynamoNim_.PresignedDownloadUrl
recorder.Eventf(dynamoDeployment, corev1.EventTypeNormal, "GenerateImageBuilderPod", "Got presigned url for dynamo graph %s from api store service", dynamoDeployment.Spec.DynamoNim)
dynamoGraphDownloadURL = dynamoComponent_.PresignedDownloadUrl
} else {
dynamoNimDownloadURL = fmt.Sprintf("%s/api/v1/dynamo_nims/%s/versions/%s/download", yataiConf.Endpoint, dynamoNimRepositoryName, dynamoNimVersion)
dynamoNimApiToken = fmt.Sprintf("%s:%s:$%s", commonconsts.YataiImageBuilderComponentName, yataiConf.ClusterName, commonconsts.EnvYataiApiToken)
dynamoGraphDownloadURL = fmt.Sprintf("%s/api/v1/dynamo_nims/%s/versions/%s/download", apiStoreConf.Endpoint, dynamoComponentRepositoryName, dynamoComponentVersion)
}
return &dynamoNimDownloadURL, &dynamoNimApiToken, nil
return &dynamoGraphDownloadURL, nil
}
// ServicesConfig represents the top-level YAML structure of a dynamoNim yaml file stored in a dynamoNim tar file
type DynamoNIMConfig struct {
type DynamoGraphConfig struct {
DynamoTag string `yaml:"service"`
Services []ServiceConfig `yaml:"services"`
EntryService string `yaml:"entry_service"`
......@@ -150,12 +144,11 @@ type EventRecorder interface {
Eventf(obj runtime.Object, eventtype string, reason string, message string, args ...interface{})
}
func RetrieveDynamoNIMConfigurationFile(ctx context.Context, url string, yataiApiToken string) (*bytes.Buffer, error) {
func RetrieveDynamoGraphConfigurationFile(ctx context.Context, url string) (*bytes.Buffer, error) {
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return nil, err
}
req.Header.Set(commonconsts.YataiApiTokenHeaderName, yataiApiToken)
client := &http.Client{}
resp, err := client.Do(req)
......@@ -185,55 +178,53 @@ func RetrieveDynamoNIMConfigurationFile(ctx context.Context, url string, yataiAp
return yamlContent, nil
}
func GetYataiClient(ctx context.Context) (yataiClient **yataiclient.YataiClient, yataiConf **commonconfig.YataiConfig, err error) {
yataiConf_, err := commonconfig.GetYataiConfig(ctx)
func GetApiStoreClient(ctx context.Context) (*apiStoreClient.ApiStoreClient, *commonconfig.ApiStoreConfig, error) {
apiStoreConf, err := commonconfig.GetApiStoreConfig(ctx)
isNotFound := k8serrors.IsNotFound(err)
if err != nil && !isNotFound {
err = errors.Wrap(err, "get yatai config")
return
err = errors.Wrap(err, "get api store config")
return nil, nil, err
}
if isNotFound {
return
return nil, nil, errors.New("endpoint config not found")
}
if yataiConf_.Endpoint == "" {
return
if apiStoreConf.Endpoint == "" {
return nil, nil, errors.New("endpoint is empty")
}
if yataiConf_.ClusterName == "" {
yataiConf_.ClusterName = "default"
if apiStoreConf.ClusterName == "" {
apiStoreConf.ClusterName = "default"
}
yataiClient_ := yataiclient.NewYataiClient(yataiConf_.Endpoint, fmt.Sprintf("%s:%s:%s", commonconsts.YataiImageBuilderComponentName, yataiConf_.ClusterName, yataiConf_.ApiToken))
apiStoreClient := apiStoreClient.NewApiStoreClient(apiStoreConf.Endpoint)
yataiClient = &yataiClient_
yataiConf = &yataiConf_
return
return apiStoreClient, apiStoreConf, nil
}
func ParseDynamoNIMConfig(ctx context.Context, yamlContent *bytes.Buffer) (*DynamoNIMConfig, error) {
var config DynamoNIMConfig
func ParseDynamoGraphConfig(ctx context.Context, yamlContent *bytes.Buffer) (*DynamoGraphConfig, error) {
var config DynamoGraphConfig
logger := log.FromContext(ctx)
logger.Info("trying to parse dynamoNim config", "yamlContent", yamlContent.String())
logger.Info("trying to parse dynamo graph config", "yamlContent", yamlContent.String())
err := yaml.Unmarshal(yamlContent.Bytes(), &config)
return &config, err
}
func GetDynamoNIMConfig(ctx context.Context, dynamoDeployment *v1alpha1.DynamoDeployment, recorder EventRecorder) (*DynamoNIMConfig, error) {
dynamoNimDownloadURL, dynamoNimApiToken, err := RetrieveDynamoNimDownloadURL(ctx, dynamoDeployment, recorder)
func GetDynamoGraphConfig(ctx context.Context, dynamoDeployment *v1alpha1.DynamoDeployment, recorder EventRecorder) (*DynamoGraphConfig, error) {
dynamoGraphDownloadURL, err := RetrieveDynamoGraphDownloadURL(ctx, dynamoDeployment, recorder)
if err != nil {
return nil, err
}
yamlContent, err := RetrieveDynamoNIMConfigurationFile(ctx, *dynamoNimDownloadURL, *dynamoNimApiToken)
yamlContent, err := RetrieveDynamoGraphConfigurationFile(ctx, *dynamoGraphDownloadURL)
if err != nil {
return nil, err
}
return ParseDynamoNIMConfig(ctx, yamlContent)
return ParseDynamoGraphConfig(ctx, yamlContent)
}
// generate DynamoNIMDeployment from config
func GenerateDynamoNIMDeployments(ctx context.Context, parentDynamoDeployment *v1alpha1.DynamoDeployment, config *DynamoNIMConfig, ingressSpec *v1alpha1.IngressSpec) (map[string]*v1alpha1.DynamoNimDeployment, error) {
// GenerateDynamoComponentsDeployments generates a map of DynamoComponentDeployments from a DynamoGraphConfig
func GenerateDynamoComponentsDeployments(ctx context.Context, parentDynamoDeployment *v1alpha1.DynamoDeployment, config *DynamoGraphConfig, ingressSpec *v1alpha1.IngressSpec) (map[string]*v1alpha1.DynamoNimDeployment, error) {
dynamoServices := make(map[string]string)
deployments := make(map[string]*v1alpha1.DynamoNimDeployment)
for _, service := range config.Services {
......
......@@ -15,7 +15,7 @@
* limitations under the License.
*/
package nim
package dynamo
import (
"context"
......@@ -30,7 +30,7 @@ import (
func TestGenerateDynamoNIMDeployments(t *testing.T) {
type args struct {
parentDynamoDeployment *v1alpha1.DynamoDeployment
config *DynamoNIMConfig
config *DynamoGraphConfig
ingressSpec *v1alpha1.IngressSpec
}
tests := []struct {
......@@ -51,7 +51,7 @@ func TestGenerateDynamoNIMDeployments(t *testing.T) {
DynamoNim: "dynamonim:ac4e234",
},
},
config: &DynamoNIMConfig{
config: &DynamoGraphConfig{
DynamoTag: "dynamonim:MyService1",
Services: []ServiceConfig{
{
......@@ -159,7 +159,7 @@ func TestGenerateDynamoNIMDeployments(t *testing.T) {
DynamoNim: "dynamonim:ac4e234",
},
},
config: &DynamoNIMConfig{
config: &DynamoGraphConfig{
DynamoTag: "dynamonim:MyService2",
EntryService: "service1",
Services: []ServiceConfig{
......@@ -272,7 +272,7 @@ func TestGenerateDynamoNIMDeployments(t *testing.T) {
DynamoNim: "dynamonim:ac4e234",
},
},
config: &DynamoNIMConfig{
config: &DynamoGraphConfig{
DynamoTag: "dynamonim:MyService2",
EntryService: "service1",
Services: []ServiceConfig{
......@@ -378,7 +378,7 @@ func TestGenerateDynamoNIMDeployments(t *testing.T) {
DynamoNim: "dynamonim:ac4e234",
},
},
config: &DynamoNIMConfig{
config: &DynamoGraphConfig{
DynamoTag: "dynamonim:MyService3",
Services: []ServiceConfig{
{
......@@ -423,9 +423,9 @@ func TestGenerateDynamoNIMDeployments(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
g := gomega.NewGomegaWithT(t)
got, err := GenerateDynamoNIMDeployments(context.Background(), tt.args.parentDynamoDeployment, tt.args.config, tt.args.ingressSpec)
got, err := GenerateDynamoComponentsDeployments(context.Background(), tt.args.parentDynamoDeployment, tt.args.config, tt.args.ingressSpec)
if (err != nil) != tt.wantErr {
t.Errorf("GenerateDynamoNIMDeployments() error = %v, wantErr %v", err, tt.wantErr)
t.Errorf("GenerateDynamoComponentsDeployments() error = %v, wantErr %v", err, tt.wantErr)
return
}
g.Expect(got).To(gomega.Equal(tt.want))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment