"examples/git@developer.sourcefind.cn:OpenDAS/dynamo.git" did not exist on "f91d5488eab6343068b869393d9a7b47f3a58237"
Commit 99cc11e6 authored by julienmancuso's avatar julienmancuso Committed by GitHub
Browse files

feat: cleanup operator code (#529)

parent 53d30882
...@@ -94,7 +94,9 @@ dynamo-api-server: ...@@ -94,7 +94,9 @@ dynamo-api-server:
resource_scope: "user" resource_scope: "user"
image: image:
repository: gitlab-master.nvidia.com:5005/aire/microservices/compoundai/dynamo-api-server repository: gitlab-master.nvidia.com:5005/aire/microservices/compoundai/dynamo-api-server
tag: ${CI_COMMIT_SHA} # temporarily force to use old commit for api-server
tag: fccbb8777fbd2ac11dad4871c8a8ba6884525e07
#tag: ${CI_COMMIT_SHA}
pullPolicy: IfNotPresent pullPolicy: IfNotPresent
storeImage: storeImage:
repository: gitlab-master.nvidia.com:5005/aire/microservices/compoundai/dynamo-api-store repository: gitlab-master.nvidia.com:5005/aire/microservices/compoundai/dynamo-api-store
......
modelschemas and schemasv1 are from https://github.com/bentoml/yatai-schemas
common, yatai-client and conversion are from yatai-deployment operator
\ No newline at end of file
/*
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package schemas
import (
"encoding/json"
"errors"
"time"
)
type DynamoNIM struct {
PresignedDownloadUrl string `json:"presigned_download_url"`
TransmissionStrategy *TransmissionStrategy `json:"transmission_strategy"`
Manifest *DynamoNIMManifest `json:"manifest"`
}
type TransmissionStrategy string
const (
TransmissionStrategyPresignedURL TransmissionStrategy = "presigned_url"
TransmissionStrategyProxy TransmissionStrategy = "proxy"
)
type DynamoNIMManifest struct {
BentomlVersion string `json:"bentoml_version"`
Models []string `json:"models"`
}
type Duration time.Duration
func (d Duration) MarshalJSON() ([]byte, error) {
return json.Marshal(time.Duration(d).String())
}
func (d *Duration) UnmarshalJSON(b []byte) error {
var v any
if err := json.Unmarshal(b, &v); err != nil {
return err
}
switch value := v.(type) {
case float64:
*d = Duration(time.Duration(value))
case string:
tmp, err := time.ParseDuration(value)
if err != nil {
return err
}
*d = Duration(tmp)
default:
return errors.New("invalid duration")
}
return nil
}
type DeploymentStrategy string
const (
DeploymentStrategyRollingUpdate DeploymentStrategy = "RollingUpdate"
DeploymentStrategyRecreate DeploymentStrategy = "Recreate"
DeploymentStrategyRampedSlowRollout DeploymentStrategy = "RampedSlowRollout"
DeploymentStrategyBestEffortControlledRollout DeploymentStrategy = "BestEffortControlledRollout"
)
type DockerRegistrySchema struct {
BentosRepositoryURI string `json:"bentosRepositoryURI"`
ModelsRepositoryURI string `json:"modelsRepositoryURI"`
BentosRepositoryURIInCluster string `json:"bentosRepositoryURIInCluster"`
ModelsRepositoryURIInCluster string `json:"modelsRepositoryURIInCluster"`
Server string `json:"server"`
Username string `json:"username"`
Password string `json:"password"`
Secure bool `json:"secure"`
}
/* /*
* SPDX-FileCopyrightText: Copyright (c) 2022 Atalaya Tech. Inc
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0 * SPDX-License-Identifier: Apache-2.0
* *
...@@ -13,6 +14,7 @@ ...@@ -13,6 +14,7 @@
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
* Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES
*/ */
package yataiclient package yataiclient
...@@ -20,13 +22,15 @@ package yataiclient ...@@ -20,13 +22,15 @@ package yataiclient
import ( import (
"context" "context"
"fmt" "fmt"
"strings"
"github.com/ai-dynamo/dynamo/deploy/dynamo/operator/api/dynamo/modelschemas" "github.com/ai-dynamo/dynamo/deploy/dynamo/operator/api/dynamo/schemas"
"github.com/ai-dynamo/dynamo/deploy/dynamo/operator/api/dynamo/schemasv1" )
"github.com/ai-dynamo/dynamo/deploy/dynamo/operator/pkg/dynamo/consts" const (
"github.com/ai-dynamo/dynamo/deploy/dynamo/operator/pkg/dynamo/reqcli" YataiApiTokenHeaderName = "X-YATAI-API-TOKEN"
"github.com/ai-dynamo/dynamo/deploy/dynamo/operator/pkg/dynamo/utils" NgcOrganizationHeaderName = "Nv-Ngc-Org"
NgcUserHeaderName = "Nv-Actor-Id"
) )
type DynamoAuthHeaders struct { type DynamoAuthHeaders struct {
...@@ -51,147 +55,28 @@ func (c *YataiClient) SetAuth(headers DynamoAuthHeaders) { ...@@ -51,147 +55,28 @@ func (c *YataiClient) SetAuth(headers DynamoAuthHeaders) {
c.headers = headers c.headers = headers
} }
func (c *YataiClient) getJSONReqBuilder() *reqcli.JsonRequestBuilder { func (c *YataiClient) getHeaders() map[string]string {
return reqcli.NewJsonRequestBuilder().Headers(map[string]string{ return map[string]string{
consts.YataiApiTokenHeaderName: c.apiToken, YataiApiTokenHeaderName: c.apiToken,
consts.NgcOrganizationHeaderName: c.headers.OrgId, NgcOrganizationHeaderName: c.headers.OrgId,
consts.NgcUserHeaderName: c.headers.UserId, NgcUserHeaderName: c.headers.UserId,
}) }
}
func (c *YataiClient) ListBentos(ctx context.Context, req schemasv1.ListQuerySchema) (bentos *schemasv1.BentoWithRepositoryListSchema, err error) {
url_ := utils.UrlJoin(c.endpoint, "/api/v1/bentos", map[string]string{
"start": fmt.Sprintf("%d", req.Start),
"count": fmt.Sprintf("%d", req.Count),
"q": string(req.Q),
})
bentos = &schemasv1.BentoWithRepositoryListSchema{}
_, err = c.getJSONReqBuilder().Method("GET").Url(url_).Result(bentos).Do(ctx)
return
}
func (c *YataiClient) ListImageBuildStatusUnsyncedBentos(ctx context.Context) (bentos []*schemasv1.BentoWithRepositorySchema, err error) {
url_ := utils.UrlJoin(c.endpoint, "/api/v1/image_build_status_unsynced_bentos")
bentos = []*schemasv1.BentoWithRepositorySchema{}
_, err = c.getJSONReqBuilder().Method("GET").Url(url_).Result(&bentos).Do(ctx)
return
}
func (c *YataiClient) UpdateBentoImageBuildStatusSyncingAt(ctx context.Context, bentoRepositoryName, bentoVersion string) (err error) {
url_ := utils.UrlJoin(c.endpoint, fmt.Sprintf("/api/v1/dynamo_nims/%s/versions/%s/update_image_build_status_syncing_at", bentoRepositoryName, bentoVersion))
_, err = c.getJSONReqBuilder().Method("PATCH").Url(url_).Do(ctx)
return
}
func (c *YataiClient) UpdateBentoImageBuildStatus(ctx context.Context, bentoRepositoryName, bentoVersion string, status modelschemas.ImageBuildStatus) (err error) {
url_ := utils.UrlJoin(c.endpoint, fmt.Sprintf("/api/v1/dynamo_nims/%s/versions/%s/update_image_build_status", bentoRepositoryName, bentoVersion))
_, err = c.getJSONReqBuilder().Method("PATCH").Payload(map[string]string{
"image_build_status": string(status),
}).Url(url_).Do(ctx)
return
}
func (c *YataiClient) GetBento(ctx context.Context, bentoRepositoryName, bentoVersion string) (bento *schemasv1.BentoFullSchema, err error) {
url_ := utils.UrlJoin(c.endpoint, fmt.Sprintf("/api/v1/bento_repositories/%s/bentos/%s", bentoRepositoryName, bentoVersion))
bento = &schemasv1.BentoFullSchema{}
_, err = c.getJSONReqBuilder().Method("GET").Url(url_).Result(bento).Do(ctx)
return
}
func (c *YataiClient) GetModel(ctx context.Context, modelRepositoryName, modelVersion string) (model *schemasv1.ModelFullSchema, err error) {
url_ := utils.UrlJoin(c.endpoint, fmt.Sprintf("/api/v1/model_repositories/%s/models/%s", modelRepositoryName, modelVersion))
model = &schemasv1.ModelFullSchema{}
_, err = c.getJSONReqBuilder().Method("GET").Url(url_).Result(model).Do(ctx)
return
}
func (c *YataiClient) GetBentoRepository(ctx context.Context, bentoRepositoryName string) (bentoRepository *schemasv1.BentoRepositorySchema, err error) {
url_ := utils.UrlJoin(c.endpoint, fmt.Sprintf("/api/v1/bento_repositories/%s", bentoRepositoryName))
bentoRepository = &schemasv1.BentoRepositorySchema{}
_, err = c.getJSONReqBuilder().Method("GET").Url(url_).Result(bentoRepository).Do(ctx)
return
}
func (c *YataiClient) GetDeployment(ctx context.Context, clusterName, namespace, deploymentName string) (deployment *schemasv1.DeploymentSchema, err error) {
url_ := utils.UrlJoin(c.endpoint, fmt.Sprintf("/api/v1/clusters/%s/namespaces/%s/deployments/%s", clusterName, namespace, deploymentName))
deployment = &schemasv1.DeploymentSchema{}
_, err = c.getJSONReqBuilder().Method("GET").Url(url_).Result(deployment).Do(ctx)
return
}
func (c *YataiClient) SyncDeploymentStatus(ctx context.Context, clusterName, namespace, deploymentName string) (deployment *schemasv1.DeploymentSchema, err error) {
url_ := utils.UrlJoin(c.endpoint, fmt.Sprintf("/api/v1/clusters/%s/namespaces/%s/deployments/%s/sync_status", clusterName, namespace, deploymentName))
deployment = &schemasv1.DeploymentSchema{}
_, err = c.getJSONReqBuilder().Method("POST").Url(url_).Result(deployment).Do(ctx)
return
}
func (c *YataiClient) CreateDeployment(ctx context.Context, clusterName string, schema *schemasv1.CreateDeploymentSchema) (deployment *schemasv1.DeploymentSchema, err error) {
url_ := utils.UrlJoin(c.endpoint, fmt.Sprintf("/api/v1/clusters/%s/deployments", clusterName))
deployment = &schemasv1.DeploymentSchema{}
_, err = c.getJSONReqBuilder().Method("POST").Url(url_).Payload(schema).Result(deployment).Do(ctx)
return
}
func (c *YataiClient) UpdateDeployment(ctx context.Context, clusterName, namespace, deploymentName string, schema *schemasv1.UpdateDeploymentSchema) (deployment *schemasv1.DeploymentSchema, err error) {
url_ := utils.UrlJoin(c.endpoint, fmt.Sprintf("/api/v1/clusters/%s/namespaces/%s/deployments/%s", clusterName, namespace, deploymentName))
deployment = &schemasv1.DeploymentSchema{}
_, err = c.getJSONReqBuilder().Method("PATCH").Url(url_).Payload(schema).Result(deployment).Do(ctx)
return
}
func (c *YataiClient) GetDockerRegistryRef(ctx context.Context, clusterName string) (registryRef *modelschemas.DockerRegistryRefSchema, err error) {
url_ := utils.UrlJoin(c.endpoint, fmt.Sprintf("/api/v1/clusters/%s/docker_registry_ref", clusterName))
registryRef = &modelschemas.DockerRegistryRefSchema{}
_, err = c.getJSONReqBuilder().Method("GET").Url(url_).Result(registryRef).Do(ctx)
return
}
func (c *YataiClient) GetMajorCluster(ctx context.Context) (cluster *schemasv1.ClusterFullSchema, err error) {
url_ := utils.UrlJoin(c.endpoint, "/api/v1/current_org/major_cluster")
cluster = &schemasv1.ClusterFullSchema{}
_, err = c.getJSONReqBuilder().Method("GET").Url(url_).Result(cluster).Do(ctx)
return
}
func (c *YataiClient) GetVersion(ctx context.Context) (version *schemasv1.VersionSchema, err error) {
url_ := utils.UrlJoin(c.endpoint, "/api/v1/version")
version = &schemasv1.VersionSchema{}
_, err = c.getJSONReqBuilder().Method("GET").Url(url_).Result(version).Do(ctx)
return
}
func (c *YataiClient) GetOrganization(ctx context.Context) (organization *schemasv1.OrganizationFullSchema, err error) {
url_ := utils.UrlJoin(c.endpoint, "/api/v1/current_org")
organization = &schemasv1.OrganizationFullSchema{}
_, err = c.getJSONReqBuilder().Method("GET").Url(url_).Result(organization).Do(ctx)
return
}
func (c *YataiClient) GetCluster(ctx context.Context, clusterName string) (cluster *schemasv1.ClusterFullSchema, err error) {
url_ := utils.UrlJoin(c.endpoint, fmt.Sprintf("/api/v1/clusters/%s", clusterName))
cluster = &schemasv1.ClusterFullSchema{}
_, err = c.getJSONReqBuilder().Method("GET").Url(url_).Result(cluster).Do(ctx)
return
} }
func (c *YataiClient) RegisterYataiComponent(ctx context.Context, clusterName string, schema *schemasv1.RegisterYataiComponentSchema) (yataiComponent *schemasv1.YataiComponentSchema, err error) { func (c *YataiClient) GetBento(ctx context.Context, bentoRepositoryName, bentoVersion string) (bento *schemas.DynamoNIM, err error) {
url_ := utils.UrlJoin(c.endpoint, fmt.Sprintf("/api/v1/clusters/%s/yatai_components", clusterName)) url_ := urlJoin(c.endpoint, fmt.Sprintf("/api/v1/bento_repositories/%s/bentos/%s", bentoRepositoryName, bentoVersion))
yataiComponent = &schemasv1.YataiComponentSchema{} bento = &schemas.DynamoNIM{}
_, err = c.getJSONReqBuilder().Method("POST").Url(url_).Payload(schema).Result(yataiComponent).Do(ctx) _, err = DoJsonRequest(ctx, "GET", url_, c.getHeaders(), nil, nil, bento, nil)
return return
} }
func (c *YataiClient) PresignBentoDownloadURL(ctx context.Context, bentoRepositoryName, bentoVersion string) (bento *schemasv1.BentoSchema, err error) { func (c *YataiClient) PresignBentoDownloadURL(ctx context.Context, bentoRepositoryName, bentoVersion string) (bento *schemas.DynamoNIM, err error) {
url_ := utils.UrlJoin(c.endpoint, fmt.Sprintf("/api/v1/dynamo_nims/%s/versions/%s/presign_download_url", bentoRepositoryName, bentoVersion)) url_ := urlJoin(c.endpoint, fmt.Sprintf("/api/v1/dynamo_nims/%s/versions/%s/presign_download_url", bentoRepositoryName, bentoVersion))
bento = &schemasv1.BentoSchema{} bento = &schemas.DynamoNIM{}
_, err = c.getJSONReqBuilder().Method("PATCH").Url(url_).Result(bento).Do(ctx) _, err = DoJsonRequest(ctx, "PATCH", url_, c.getHeaders(), nil, nil, bento, nil)
return return
} }
func (c *YataiClient) PresignModelDownloadURL(ctx context.Context, modelRepositoryName, modelVersion string) (model *schemasv1.ModelSchema, err error) { func urlJoin(baseURL string, pathPart string) string {
url_ := utils.UrlJoin(c.endpoint, fmt.Sprintf("/api/v1/model_repositories/%s/models/%s/presign_download_url", modelRepositoryName, modelVersion)) return strings.TrimRight(baseURL, "/") + "/" + strings.TrimLeft(pathPart, "/")
model = &schemasv1.ModelSchema{}
_, err = c.getJSONReqBuilder().Method("PATCH").Url(url_).Result(model).Do(ctx)
return
} }
/*
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package yataiclient
import (
"context"
"crypto/tls"
"fmt"
"time"
"resty.dev/v3"
)
var defaultClient *resty.Client
func GetDefaultClient() *resty.Client {
if defaultClient == nil {
defaultClient = resty.New().
SetTimeout(90*time.Second).
SetRetryCount(3).
SetRetryWaitTime(2*time.Second).
SetRetryMaxWaitTime(10*time.Second).
SetHeader("Content-Type", "application/json").
SetTLSClientConfig(&tls.Config{InsecureSkipVerify: true}) // Optional: mirrors your custom transport
}
return defaultClient
}
func DoJsonRequest(ctx context.Context, method string, url string, headers map[string]string, query map[string]string, payload interface{}, result interface{}, timeout *time.Duration) (int, error) {
client := GetDefaultClient()
if timeout != nil {
client.SetTimeout(*timeout)
}
req := client.R().
SetContext(ctx).
SetBody(payload).
SetResult(result).
SetHeaders(headers).
SetQueryParams(query)
var resp *resty.Response
var err error
switch method {
case "GET":
resp, err = req.Get(url)
case "POST":
resp, err = req.Post(url)
case "PUT":
resp, err = req.Put(url)
case "DELETE":
resp, err = req.Delete(url)
case "PATCH":
resp, err = req.Patch(url)
default:
return 0, fmt.Errorf("unsupported method: %s", method)
}
if err != nil {
return 0, fmt.Errorf("request error: %w", err)
}
if resp.IsError() {
return resp.StatusCode(), fmt.Errorf("http %s %s failed with status %d: %s", method, url, resp.StatusCode(), resp.String())
}
return resp.StatusCode(), nil
}
...@@ -21,7 +21,7 @@ package v1alpha1 ...@@ -21,7 +21,7 @@ package v1alpha1
import ( import (
dynamoCommon "github.com/ai-dynamo/dynamo/deploy/dynamo/operator/api/dynamo/common" dynamoCommon "github.com/ai-dynamo/dynamo/deploy/dynamo/operator/api/dynamo/common"
"github.com/ai-dynamo/dynamo/deploy/dynamo/operator/api/dynamo/modelschemas" "github.com/ai-dynamo/dynamo/deploy/dynamo/operator/api/dynamo/schemas"
corev1 "k8s.io/api/core/v1" corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
) )
...@@ -53,7 +53,7 @@ type DynamoNimRequestSpec struct { ...@@ -53,7 +53,7 @@ type DynamoNimRequestSpec struct {
// +kubebuilder:validation:Optional // +kubebuilder:validation:Optional
Image string `json:"image,omitempty"` Image string `json:"image,omitempty"`
ImageBuildTimeout *modelschemas.Duration `json:"imageBuildTimeout,omitempty"` ImageBuildTimeout *schemas.Duration `json:"imageBuildTimeout,omitempty"`
// +kubebuilder:validation:Optional // +kubebuilder:validation:Optional
BuildArgs []string `json:"buildArgs,omitempty"` BuildArgs []string `json:"buildArgs,omitempty"`
......
...@@ -25,7 +25,7 @@ package v1alpha1 ...@@ -25,7 +25,7 @@ package v1alpha1
import ( import (
"github.com/ai-dynamo/dynamo/deploy/dynamo/operator/api/dynamo/common" "github.com/ai-dynamo/dynamo/deploy/dynamo/operator/api/dynamo/common"
"github.com/ai-dynamo/dynamo/deploy/dynamo/operator/api/dynamo/modelschemas" "github.com/ai-dynamo/dynamo/deploy/dynamo/operator/api/dynamo/schemas"
"k8s.io/api/autoscaling/v2" "k8s.io/api/autoscaling/v2"
corev1 "k8s.io/api/core/v1" corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1"
...@@ -570,7 +570,7 @@ func (in *DynamoNimRequestSpec) DeepCopyInto(out *DynamoNimRequestSpec) { ...@@ -570,7 +570,7 @@ func (in *DynamoNimRequestSpec) DeepCopyInto(out *DynamoNimRequestSpec) {
} }
if in.ImageBuildTimeout != nil { if in.ImageBuildTimeout != nil {
in, out := &in.ImageBuildTimeout, &out.ImageBuildTimeout in, out := &in.ImageBuildTimeout, &out.ImageBuildTimeout
*out = new(modelschemas.Duration) *out = new(schemas.Duration)
**out = **in **out = **in
} }
if in.BuildArgs != nil { if in.BuildArgs != nil {
......
...@@ -15,9 +15,7 @@ require ( ...@@ -15,9 +15,7 @@ require (
github.com/mitchellh/hashstructure/v2 v2.0.2 github.com/mitchellh/hashstructure/v2 v2.0.2
github.com/onsi/ginkgo/v2 v2.19.0 github.com/onsi/ginkgo/v2 v2.19.0
github.com/onsi/gomega v1.33.1 github.com/onsi/gomega v1.33.1
github.com/pkg/errors v0.9.1
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.71.2 github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.71.2
github.com/prometheus/common v0.55.0
github.com/prune998/docker-registry-client v0.0.0-20200114164314-f8cd511a014c github.com/prune998/docker-registry-client v0.0.0-20200114164314-f8cd511a014c
github.com/rs/xid v1.4.0 github.com/rs/xid v1.4.0
github.com/sergeymakinen/go-quote v1.1.0 github.com/sergeymakinen/go-quote v1.1.0
...@@ -30,6 +28,7 @@ require ( ...@@ -30,6 +28,7 @@ require (
k8s.io/apimachinery v0.31.3 k8s.io/apimachinery v0.31.3
k8s.io/client-go v0.31.3 k8s.io/client-go v0.31.3
k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 k8s.io/utils v0.0.0-20240711033017-18e509b52bc8
resty.dev/v3 v3.0.0-beta.2
sigs.k8s.io/controller-runtime v0.19.4 sigs.k8s.io/controller-runtime v0.19.4
volcano.sh/apis v1.11.0 volcano.sh/apis v1.11.0
) )
...@@ -70,8 +69,10 @@ require ( ...@@ -70,8 +69,10 @@ require (
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/opencontainers/go-digest v1.0.0 // indirect github.com/opencontainers/go-digest v1.0.0 // indirect
github.com/opencontainers/image-spec v1.1.0 // indirect github.com/opencontainers/image-spec v1.1.0 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/prometheus/client_golang v1.19.1 // indirect github.com/prometheus/client_golang v1.19.1 // indirect
github.com/prometheus/client_model v0.6.1 // indirect github.com/prometheus/client_model v0.6.1 // indirect
github.com/prometheus/common v0.55.0 // indirect
github.com/prometheus/procfs v0.15.1 // indirect github.com/prometheus/procfs v0.15.1 // indirect
github.com/spf13/pflag v1.0.5 // indirect github.com/spf13/pflag v1.0.5 // indirect
github.com/x448/float16 v0.8.4 // indirect github.com/x448/float16 v0.8.4 // indirect
......
...@@ -234,6 +234,8 @@ k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 h1:BZqlfIlq5YbRMFko6/PM7F ...@@ -234,6 +234,8 @@ k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 h1:BZqlfIlq5YbRMFko6/PM7F
k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340/go.mod h1:yD4MZYeKMBwQKVht279WycxKyM84kkAx2DPrTXaeb98= k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340/go.mod h1:yD4MZYeKMBwQKVht279WycxKyM84kkAx2DPrTXaeb98=
k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 h1:pUdcCO1Lk/tbT5ztQWOBi5HBgbBP1J8+AsQnQCKsi8A= k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 h1:pUdcCO1Lk/tbT5ztQWOBi5HBgbBP1J8+AsQnQCKsi8A=
k8s.io/utils v0.0.0-20240711033017-18e509b52bc8/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= k8s.io/utils v0.0.0-20240711033017-18e509b52bc8/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
resty.dev/v3 v3.0.0-beta.2 h1:xu4mGAdbCLuc3kbk7eddWfWm4JfhwDtdapwss5nCjnQ=
resty.dev/v3 v3.0.0-beta.2/go.mod h1:OgkqiPvTDtOuV4MGZuUDhwOpkY8enjOsjjMzeOHefy4=
sigs.k8s.io/controller-runtime v0.19.4 h1:SUmheabttt0nx8uJtoII4oIP27BVVvAKFvdvGFwV/Qo= sigs.k8s.io/controller-runtime v0.19.4 h1:SUmheabttt0nx8uJtoII4oIP27BVVvAKFvdvGFwV/Qo=
sigs.k8s.io/controller-runtime v0.19.4/go.mod h1:iRmWllt8IlaLjvTTDLhRBXIEtkCK6hwVBJJsYS9Ajf4= sigs.k8s.io/controller-runtime v0.19.4/go.mod h1:iRmWllt8IlaLjvTTDLhRBXIEtkCK6hwVBJJsYS9Ajf4=
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo=
......
/*
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package config
import (
"context"
"os"
"github.com/ai-dynamo/dynamo/deploy/dynamo/operator/internal/consts"
)
func GetYataiImageBuilderNamespace(ctx context.Context) (namespace string, err error) {
return os.Getenv(consts.EnvYataiImageBuilderNamespace), nil
}
type DockerRegistryConfig struct {
BentoRepositoryName string `yaml:"bento_repository_name"`
ModelRepositoryName string `yaml:"model_repository_name"`
Server string `yaml:"server"`
InClusterServer string `yaml:"in_cluster_server"`
Username string `yaml:"username"`
Password string `yaml:"password"`
Secure bool `yaml:"secure"`
}
func GetDockerRegistryConfig() (conf *DockerRegistryConfig, err error) {
return &DockerRegistryConfig{
BentoRepositoryName: os.Getenv(consts.EnvDockerRegistryBentoRepositoryName),
ModelRepositoryName: os.Getenv(consts.EnvDockerRegistryModelRepositoryName),
Server: os.Getenv(consts.EnvDockerRegistryServer),
InClusterServer: os.Getenv(consts.EnvDockerRegistryInClusterServer),
Username: os.Getenv(consts.EnvDockerRegistryUsername),
Password: os.Getenv(consts.EnvDockerRegistryPassword),
Secure: os.Getenv(consts.EnvDockerRegistrySecure) == "true",
}, nil
}
type YataiConfig struct {
Endpoint string `yaml:"endpoint"`
ClusterName string `yaml:"cluster_name"`
ApiToken string `yaml:"api_token"`
}
func GetYataiConfig(ctx context.Context) (conf *YataiConfig, err error) {
return &YataiConfig{
Endpoint: os.Getenv(consts.EnvYataiEndpoint),
ClusterName: os.Getenv(consts.EnvYataiClusterName),
ApiToken: os.Getenv(consts.EnvYataiApiToken),
}, nil
}
func getEnv(key, fallback string) string {
if value, ok := os.LookupEnv(key); ok {
return value
}
return fallback
}
type InternalImages struct {
BentoDownloader string
Kaniko string
MetricsTransformer string
Buildkit string
BuildkitRootless string
}
func GetInternalImages() (conf *InternalImages) {
conf = &InternalImages{}
conf.BentoDownloader = getEnv(consts.EnvInternalImagesBentoDownloader, consts.InternalImagesBentoDownloaderDefault)
conf.Kaniko = getEnv(consts.EnvInternalImagesKaniko, consts.InternalImagesKanikoDefault)
conf.MetricsTransformer = getEnv(consts.EnvInternalImagesMetricsTransformer, consts.InternalImagesMetricsTransformerDefault)
conf.Buildkit = getEnv(consts.EnvInternalImagesBuildkit, consts.InternalImagesBuildkitDefault)
conf.BuildkitRootless = getEnv(consts.EnvInternalImagesBuildkitRootless, consts.InternalImagesBuildkitRootlessDefault)
return
}
package consts
const (
HPACPUDefaultAverageUtilization = 80
// nolint: gosec
YataiApiTokenHeaderName = "X-YATAI-API-TOKEN"
NgcOrganizationHeaderName = "Nv-Ngc-Org"
NgcUserHeaderName = "Nv-Actor-Id"
DefaultUserId = "default"
DefaultOrgId = "default"
BentoServicePort = 3000
BentoServicePortName = "http"
BentoContainerPortName = "http"
YataiImageBuilderComponentName = "yatai-image-builder"
YataiDeploymentComponentName = "yatai-deployment"
YataiBentoDeploymentComponentApiServer = "api-server"
InternalImagesBentoDownloaderDefault = "quay.io/bentoml/bento-downloader:0.0.3"
InternalImagesKanikoDefault = "quay.io/bentoml/kaniko:1.9.1"
InternalImagesMetricsTransformerDefault = "quay.io/bentoml/yatai-bento-metrics-transformer:0.0.3"
InternalImagesBuildkitDefault = "quay.io/bentoml/buildkit:master"
InternalImagesBuildkitRootlessDefault = "quay.io/bentoml/buildkit:master-rootless"
EnvYataiEndpoint = "YATAI_ENDPOINT"
EnvYataiClusterName = "YATAI_CLUSTER_NAME"
// nolint: gosec
EnvYataiApiToken = "YATAI_API_TOKEN"
EnvBentoServicePort = "PORT"
// tracking envars
EnvYataiDeploymentUID = "YATAI_T_DEPLOYMENT_UID"
EnvYataiBentoDeploymentName = "YATAI_BENTO_DEPLOYMENT_NAME"
EnvYataiBentoDeploymentNamespace = "YATAI_BENTO_DEPLOYMENT_NAMESPACE"
EnvDockerRegistryServer = "DOCKER_REGISTRY_SERVER"
EnvDockerRegistryInClusterServer = "DOCKER_REGISTRY_IN_CLUSTER_SERVER"
EnvDockerRegistryUsername = "DOCKER_REGISTRY_USERNAME"
// nolint:gosec
EnvDockerRegistryPassword = "DOCKER_REGISTRY_PASSWORD"
EnvDockerRegistrySecure = "DOCKER_REGISTRY_SECURE"
EnvDockerRegistryBentoRepositoryName = "DOCKER_REGISTRY_BENTO_REPOSITORY_NAME"
EnvDockerRegistryModelRepositoryName = "DOCKER_REGISTRY_MODEL_REPOSITORY_NAME"
EnvInternalImagesBentoDownloader = "INTERNAL_IMAGES_BENTO_DOWNLOADER"
EnvInternalImagesKaniko = "INTERNAL_IMAGES_KANIKO"
EnvInternalImagesMetricsTransformer = "INTERNAL_IMAGES_METRICS_TRANSFORMER"
EnvInternalImagesBuildkit = "INTERNAL_IMAGES_BUILDKIT"
EnvInternalImagesBuildkitRootless = "INTERNAL_IMAGES_BUILDKIT_ROOTLESS"
EnvYataiSystemNamespace = "YATAI_SYSTEM_NAMESPACE"
EnvYataiImageBuilderNamespace = "YATAI_IMAGE_BUILDER_NAMESPACE"
EnvYataiDeploymentNamespace = "YATAI_DEPLOYMENT_NAMESPACE"
EnvBentoDeploymentNamespaces = "BENTO_DEPLOYMENT_NAMESPACES"
EnvImageBuildersNamespace = "IMAGE_BUILDERS_NAMESPACE"
KubeLabelYataiSelector = "yatai.ai/selector"
KubeLabelYataiBentoRepository = "yatai.ai/bento-repository"
KubeLabelYataiBento = "yatai.ai/bento"
KubeLabelYataiModelRepository = "yatai.ai/model-repository"
KubeLabelYataiModel = "yatai.ai/model"
KubeLabelYataiBentoDeployment = "yatai.ai/bento-deployment"
KubeLabelYataiBentoDeploymentComponentType = "yatai.ai/bento-deployment-component-type"
KubeLabelYataiBentoDeploymentTargetType = "yatai.ai/bento-deployment-target-type"
KubeLabelBentoRepository = "yatai.ai/bento-repository"
KubeLabelBentoVersion = "yatai.ai/bento-version"
KubeLabelCreator = "yatai.ai/creator"
KubeLabelIsBentoImageBuilder = "yatai.ai/is-bento-image-builder"
KubeLabelIsModelSeeder = "yatai.ai/is-model-seeder"
KubeLabelBentoRequest = "yatai.ai/bento-request"
KubeLabelValueFalse = "false"
KubeLabelValueTrue = "true"
KubeLabelYataiImageBuilderPod = "yatai.ai/yatai-image-builder-pod"
KubeLabelBentoDeploymentPod = "yatai.ai/bento-deployment-pod"
KubeAnnotationBentoRepository = "yatai.ai/bento-repository"
KubeAnnotationBentoVersion = "yatai.ai/bento-version"
KubeAnnotationDockerRegistryInsecure = "yatai.ai/docker-registry-insecure"
KubeAnnotationYataiImageBuilderSeparateModels = "yatai.ai/yatai-image-builder-separate-models"
KubeAnnotationIsMultiTenancy = "yatai.ai/is-multi-tenancy"
KubeResourceGPUNvidia = "nvidia.com/gpu"
// nolint: gosec
KubeSecretNameRegcred = "yatai-regcred"
)
...@@ -22,8 +22,6 @@ import ( ...@@ -22,8 +22,6 @@ import (
"strings" "strings"
"dario.cat/mergo" "dario.cat/mergo"
"emperror.dev/errors"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/types"
...@@ -107,7 +105,7 @@ func (r *DynamoDeploymentReconciler) Reconcile(ctx context.Context, req ctrl.Req ...@@ -107,7 +105,7 @@ func (r *DynamoDeploymentReconciler) Reconcile(ctx context.Context, req ctrl.Req
}() }()
// fetch the DynamoNIMConfig // fetch the DynamoNIMConfig
dynamoNIMConfig, err := nim.GetDynamoNIMConfig(ctx, dynamoDeployment, r.getSecret, r.Recorder) dynamoNIMConfig, err := nim.GetDynamoNIMConfig(ctx, dynamoDeployment, r.Recorder)
if err != nil { if err != nil {
reason = "failed_to_get_the_DynamoNIMConfig" reason = "failed_to_get_the_DynamoNIMConfig"
return ctrl.Result{}, err return ctrl.Result{}, err
...@@ -179,12 +177,6 @@ func (r *DynamoDeploymentReconciler) Reconcile(ctx context.Context, req ctrl.Req ...@@ -179,12 +177,6 @@ func (r *DynamoDeploymentReconciler) Reconcile(ctx context.Context, req ctrl.Req
} }
func (r *DynamoDeploymentReconciler) getSecret(ctx context.Context, namespace, name string) (*corev1.Secret, error) {
secret := &corev1.Secret{}
err := r.Get(ctx, types.NamespacedName{Namespace: namespace, Name: name}, secret)
return secret, errors.Wrap(err, "get secret")
}
// SetupWithManager sets up the controller with the Manager. // SetupWithManager sets up the controller with the Manager.
func (r *DynamoDeploymentReconciler) SetupWithManager(mgr ctrl.Manager) error { func (r *DynamoDeploymentReconciler) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr). return ctrl.NewControllerManagedBy(mgr).
......
...@@ -27,12 +27,11 @@ import ( ...@@ -27,12 +27,11 @@ import (
"emperror.dev/errors" "emperror.dev/errors"
compounaiCommon "github.com/ai-dynamo/dynamo/deploy/dynamo/operator/api/dynamo/common" compounaiCommon "github.com/ai-dynamo/dynamo/deploy/dynamo/operator/api/dynamo/common"
"github.com/ai-dynamo/dynamo/deploy/dynamo/operator/api/dynamo/modelschemas" "github.com/ai-dynamo/dynamo/deploy/dynamo/operator/api/dynamo/schemas"
"github.com/ai-dynamo/dynamo/deploy/dynamo/operator/api/dynamo/schemasv1"
yataiclient "github.com/ai-dynamo/dynamo/deploy/dynamo/operator/api/dynamo/yatai-client" yataiclient "github.com/ai-dynamo/dynamo/deploy/dynamo/operator/api/dynamo/yatai-client"
"github.com/ai-dynamo/dynamo/deploy/dynamo/operator/api/v1alpha1" "github.com/ai-dynamo/dynamo/deploy/dynamo/operator/api/v1alpha1"
commonconfig "github.com/ai-dynamo/dynamo/deploy/dynamo/operator/pkg/dynamo/config" commonconfig "github.com/ai-dynamo/dynamo/deploy/dynamo/operator/internal/config"
commonconsts "github.com/ai-dynamo/dynamo/deploy/dynamo/operator/pkg/dynamo/consts" commonconsts "github.com/ai-dynamo/dynamo/deploy/dynamo/operator/internal/consts"
"github.com/huandu/xstrings" "github.com/huandu/xstrings"
corev1 "k8s.io/api/core/v1" corev1 "k8s.io/api/core/v1"
k8serrors "k8s.io/apimachinery/pkg/api/errors" k8serrors "k8s.io/apimachinery/pkg/api/errors"
...@@ -79,17 +78,17 @@ type ServiceConfig struct { ...@@ -79,17 +78,17 @@ type ServiceConfig struct {
Config Config `yaml:"config"` Config Config `yaml:"config"`
} }
func RetrieveDynamoNimDownloadURL(ctx context.Context, dynamoDeployment *v1alpha1.DynamoDeployment, secretGetter SecretGetter, recorder EventRecorder) (*string, *string, error) { func RetrieveDynamoNimDownloadURL(ctx context.Context, dynamoDeployment *v1alpha1.DynamoDeployment, recorder EventRecorder) (*string, *string, error) {
dynamoNimDownloadURL := "" dynamoNimDownloadURL := ""
dynamoNimApiToken := "" dynamoNimApiToken := ""
var dynamoNim *schemasv1.BentoFullSchema var dynamoNim *schemas.DynamoNIM
dynamoNimRepositoryName, _, dynamoNimVersion := xstrings.Partition(dynamoDeployment.Spec.DynamoNim, ":") dynamoNimRepositoryName, _, dynamoNimVersion := xstrings.Partition(dynamoDeployment.Spec.DynamoNim, ":")
var err error var err error
var yataiClient_ **yataiclient.YataiClient var yataiClient_ **yataiclient.YataiClient
var yataiConf_ **commonconfig.YataiConfig var yataiConf_ **commonconfig.YataiConfig
yataiClient_, yataiConf_, err = GetYataiClient(ctx, secretGetter) yataiClient_, yataiConf_, err = GetYataiClient(ctx)
if err != nil { if err != nil {
err = errors.Wrap(err, "get yatai client") err = errors.Wrap(err, "get yatai client")
return nil, nil, err return nil, nil, err
...@@ -111,8 +110,8 @@ func RetrieveDynamoNimDownloadURL(ctx context.Context, dynamoDeployment *v1alpha ...@@ -111,8 +110,8 @@ func RetrieveDynamoNimDownloadURL(ctx context.Context, dynamoDeployment *v1alpha
} }
recorder.Eventf(dynamoDeployment, corev1.EventTypeNormal, "GenerateImageBuilderPod", "Got dynamoNim %s from yatai service", dynamoDeployment.Spec.DynamoNim) recorder.Eventf(dynamoDeployment, corev1.EventTypeNormal, "GenerateImageBuilderPod", "Got dynamoNim %s from yatai service", dynamoDeployment.Spec.DynamoNim)
if dynamoNim.TransmissionStrategy != nil && *dynamoNim.TransmissionStrategy == modelschemas.TransmissionStrategyPresignedURL { if dynamoNim.TransmissionStrategy != nil && *dynamoNim.TransmissionStrategy == schemas.TransmissionStrategyPresignedURL {
var dynamoNim_ *schemasv1.BentoSchema var dynamoNim_ *schemas.DynamoNIM
recorder.Eventf(dynamoDeployment, corev1.EventTypeNormal, "GenerateImageBuilderPod", "Getting presigned url for dynamoNim %s from yatai service", dynamoDeployment.Spec.DynamoNim) recorder.Eventf(dynamoDeployment, corev1.EventTypeNormal, "GenerateImageBuilderPod", "Getting presigned url for dynamoNim %s from yatai service", dynamoDeployment.Spec.DynamoNim)
dynamoNim_, err = yataiClient.PresignBentoDownloadURL(ctx, dynamoNimRepositoryName, dynamoNimVersion) dynamoNim_, err = yataiClient.PresignBentoDownloadURL(ctx, dynamoNimRepositoryName, dynamoNimVersion)
if err != nil { if err != nil {
...@@ -175,35 +174,8 @@ func RetrieveDynamoNIMConfigurationFile(ctx context.Context, url string, yataiAp ...@@ -175,35 +174,8 @@ func RetrieveDynamoNIMConfigurationFile(ctx context.Context, url string, yataiAp
return yamlContent, nil return yamlContent, nil
} }
func GetYataiClientWithAuth(ctx context.Context, dynamoNimRequest *v1alpha1.DynamoNimRequest, secretGetter SecretGetter) (**yataiclient.YataiClient, **commonconfig.YataiConfig, error) { func GetYataiClient(ctx context.Context) (yataiClient **yataiclient.YataiClient, yataiConf **commonconfig.YataiConfig, err error) {
orgId, ok := dynamoNimRequest.Labels[commonconsts.NgcOrganizationHeaderName] yataiConf_, err := commonconfig.GetYataiConfig(ctx)
if !ok {
orgId = commonconsts.DefaultOrgId
}
userId, ok := dynamoNimRequest.Labels[commonconsts.NgcUserHeaderName]
if !ok {
userId = commonconsts.DefaultUserId
}
auth := yataiclient.DynamoAuthHeaders{
OrgId: orgId,
UserId: userId,
}
client, yataiConf, err := GetYataiClient(ctx, secretGetter)
if err != nil {
return nil, nil, err
}
(*client).SetAuth(auth)
return client, yataiConf, err
}
type SecretGetter func(ctx context.Context, namespace, name string) (*corev1.Secret, error)
func GetYataiClient(ctx context.Context, secretGetter SecretGetter) (yataiClient **yataiclient.YataiClient, yataiConf **commonconfig.YataiConfig, err error) {
yataiConf_, err := commonconfig.GetYataiConfig(ctx, secretGetter, commonconsts.YataiImageBuilderComponentName, false)
isNotFound := k8serrors.IsNotFound(err) isNotFound := k8serrors.IsNotFound(err)
if err != nil && !isNotFound { if err != nil && !isNotFound {
err = errors.Wrap(err, "get yatai config") err = errors.Wrap(err, "get yatai config")
...@@ -237,8 +209,8 @@ func ParseDynamoNIMConfig(ctx context.Context, yamlContent *bytes.Buffer) (*Dyna ...@@ -237,8 +209,8 @@ func ParseDynamoNIMConfig(ctx context.Context, yamlContent *bytes.Buffer) (*Dyna
return &config, err return &config, err
} }
func GetDynamoNIMConfig(ctx context.Context, dynamoDeployment *v1alpha1.DynamoDeployment, secretGetter SecretGetter, recorder EventRecorder) (*DynamoNIMConfig, error) { func GetDynamoNIMConfig(ctx context.Context, dynamoDeployment *v1alpha1.DynamoDeployment, recorder EventRecorder) (*DynamoNIMConfig, error) {
dynamoNimDownloadURL, dynamoNimApiToken, err := RetrieveDynamoNimDownloadURL(ctx, dynamoDeployment, secretGetter, recorder) dynamoNimDownloadURL, dynamoNimApiToken, err := RetrieveDynamoNimDownloadURL(ctx, dynamoDeployment, recorder)
if err != nil { if err != nil {
return nil, err return nil, err
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment