Commit 602352ce authored by Neelay Shah's avatar Neelay Shah Committed by GitHub
Browse files

chore: rename dynamo (#44)


Co-authored-by: default avatarBiswa Panda <biswa.panda@gmail.com>
parent ecf53ce2
/*
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package schemasv1
import "github.com/dynemo-ai/dynemo/deploy/compoundai/operator/api/compoundai/modelschemas"
type UserSchema struct {
ResourceSchema
FirstName string `json:"first_name"`
LastName string `json:"last_name"`
Email string `json:"email"`
AvatarUrl string `json:"avatar_url"`
IsSuperAdmin bool `json:"is_super_admin"`
}
type UserListSchema struct {
BaseListSchema
Items []*UserSchema `json:"items"`
}
type RegisterUserSchema struct {
Name string `json:"name" validate:"required"`
FirstName string `json:"first_name"`
LastName string `json:"last_name"`
Email string `json:"email" validate:"required"`
Password string `json:"password" validate:"required"`
}
type LoginUserSchema struct {
NameOrEmail string `json:"name_or_email" validate:"required"`
Password string `json:"password" validate:"required"`
}
type UpdateUserSchema struct {
FirstName string `json:"first_name" validate:"required"`
LastName string `json:"last_name" validate:"required"`
}
type ResetPasswordSchema struct {
CurrentPassword string `json:"current_password"`
NewPassword string `json:"new_password"`
}
type CreateUserSchema struct {
Name string `json:"name" validate:"required"`
Email string `json:"email" validate:"required"`
Password string `json:"password" validate:"required"`
Role modelschemas.MemberRole `json:"role" enum:"guest,developer,admin"`
}
/*
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package schemasv1
type VersionSchema struct {
Version string `json:"version"`
GitCommit string `json:"git_commit"`
BuildDate string `json:"build_date"`
}
/*
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package schemasv1
type WsReqType string
const (
WsReqTypeData WsReqType = "data"
WsReqTypeHeartbeat WsReqType = "heartbeat"
)
type WsReqSchema struct {
Type WsReqType `json:"type"`
Payload interface{} `json:"payload"`
}
type WsRespType string
const (
WsRespTypeSuccess WsRespType = "success"
WsRespTypeError WsRespType = "error"
)
type WsRespSchema struct {
Type WsRespType `json:"type"`
Message string `json:"message"`
Payload interface{} `json:"payload"`
}
/*
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package schemasv1
import (
"time"
"github.com/dynemo-ai/dynemo/deploy/compoundai/operator/api/compoundai/modelschemas"
)
type YataiComponentSchema struct {
ResourceSchema
Creator *UserSchema `json:"creator"`
Cluster *ClusterFullSchema `json:"cluster"`
Description string `json:"description"`
Version string `json:"version"`
KubeNamespace string `json:"kube_namespace"`
Manifest *modelschemas.YataiComponentManifestSchema `json:"manifest"`
LatestInstalledAt *time.Time `json:"latest_installed_at"`
LatestHeartbeatAt *time.Time `json:"latest_heartbeat_at"`
}
type RegisterYataiComponentSchema struct {
Name modelschemas.YataiComponentName `json:"name"`
Version string `json:"version"`
KubeNamespace string `json:"kube_namespace"`
SelectorLabels map[string]string `json:"selector_labels,omitempty"`
Manifest *modelschemas.YataiComponentManifestSchema `json:"manifest"`
}
all these packages are from https://github.com/bentoml/yatai-common
\ No newline at end of file
/*
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package config
import (
"context"
"os"
"strings"
"github.com/pkg/errors"
corev1 "k8s.io/api/core/v1"
k8serrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"
"github.com/dynemo-ai/dynemo/deploy/compoundai/operator/pkg/compoundai/consts"
)
func GetYataiSystemNamespaceFromEnv() string {
return getEnv(consts.EnvYataiSystemNamespace, consts.DefaultKubeNamespaceYataiSystem)
}
func GetYataiImageBuilderNamespace(ctx context.Context, secretGetter func(ctx context.Context, namespace, name string) (*corev1.Secret, error)) (namespace string, err error) {
namespace = os.Getenv(consts.EnvYataiImageBuilderNamespace)
if namespace != "" {
return
}
yataiSystemNamespace := GetYataiSystemNamespaceFromEnv()
yataiImageBuilderSharedEnvSecretName := consts.KubeSecretNameYataiImageBuilderSharedEnv
secret, err := secretGetter(ctx, yataiSystemNamespace, yataiImageBuilderSharedEnvSecretName)
if err != nil {
if k8serrors.IsNotFound(err) {
err = errors.Wrapf(err, "secret %s not found in namespace %s", yataiImageBuilderSharedEnvSecretName, yataiSystemNamespace)
}
return
}
namespace = string(secret.Data[consts.EnvYataiImageBuilderNamespace])
if namespace == "" {
namespace = consts.DefaultKubeNamespaceYataiImageBuilderComponent
}
return
}
func GetYataiDeploymentNamespace(ctx context.Context, secretGetter func(ctx context.Context, namespace, name string) (*corev1.Secret, error)) (namespace string, err error) {
namespace = os.Getenv(consts.EnvYataiDeploymentNamespace)
if namespace != "" {
return
}
yataiSystemNamespace := GetYataiSystemNamespaceFromEnv()
yataiDeploymentSharedEnvSecretName := consts.KubeSecretNameYataiDeploymentSharedEnv
secret, err := secretGetter(ctx, yataiSystemNamespace, yataiDeploymentSharedEnvSecretName)
if err != nil {
if k8serrors.IsNotFound(err) {
err = errors.Wrapf(err, "secret %s not found in namespace %s", yataiDeploymentSharedEnvSecretName, yataiSystemNamespace)
}
return
}
namespace = string(secret.Data[consts.EnvYataiDeploymentNamespace])
if namespace == "" {
namespace = consts.DefaultKubeNamespaceYataiDeploymentComponent
}
return
}
func GetImageBuildersNamespace(ctx context.Context, cliset *kubernetes.Clientset) (namespace string, err error) {
namespace = os.Getenv(consts.EnvImageBuildersNamespace)
if namespace != "" {
return
}
yataiSystemNamespace := GetYataiSystemNamespaceFromEnv()
yataiImageBuilderSharedEnvSecretName := consts.KubeSecretNameYataiImageBuilderSharedEnv
secret, err := cliset.CoreV1().Secrets(yataiSystemNamespace).Get(ctx, yataiImageBuilderSharedEnvSecretName, metav1.GetOptions{})
if err != nil {
if k8serrors.IsNotFound(err) {
err = errors.Wrapf(err, "secret %s not found in namespace %s", yataiImageBuilderSharedEnvSecretName, yataiSystemNamespace)
}
return
}
namespace = string(secret.Data[consts.EnvImageBuildersNamespace])
if namespace == "" {
namespace = consts.DefaultKubeNamespaceImageBuilders
}
return
}
func GetBentoDeploymentNamespaces(ctx context.Context, secretGetter func(ctx context.Context, namespace, name string) (*corev1.Secret, error)) (namespaces []string, err error) {
namespaces_ := os.Getenv(consts.EnvBentoDeploymentNamespaces)
if namespaces_ != "" {
namespaces = strings.Split(namespaces_, ",")
return
}
yataiSystemNamespace := GetYataiSystemNamespaceFromEnv()
yataiDeploymentSharedEnvSecretName := consts.KubeSecretNameYataiDeploymentSharedEnv
secret, err := secretGetter(ctx, yataiSystemNamespace, yataiDeploymentSharedEnvSecretName)
if err != nil {
if k8serrors.IsNotFound(err) {
err = errors.Wrapf(err, "secret %s not found in namespace %s", yataiDeploymentSharedEnvSecretName, yataiSystemNamespace)
}
return
}
namespaces_ = string(secret.Data[consts.EnvBentoDeploymentNamespaces])
if namespaces_ == "" {
namespaces = []string{consts.DefaultKubeNamespaceBentoDeployment}
} else {
namespaces = strings.Split(namespaces_, ",")
}
return
}
type DockerRegistryConfig struct {
BentoRepositoryName string `yaml:"bento_repository_name"`
ModelRepositoryName string `yaml:"model_repository_name"`
Server string `yaml:"server"`
InClusterServer string `yaml:"in_cluster_server"`
Username string `yaml:"username"`
Password string `yaml:"password"`
Secure bool `yaml:"secure"`
}
func GetDockerRegistryConfig(ctx context.Context, secretGetter func(ctx context.Context, namespace, name string) (*corev1.Secret, error)) (conf *DockerRegistryConfig, err error) {
conf = &DockerRegistryConfig{}
conf.BentoRepositoryName = os.Getenv(consts.EnvDockerRegistryBentoRepositoryName)
conf.ModelRepositoryName = os.Getenv(consts.EnvDockerRegistryModelRepositoryName)
conf.Server = os.Getenv(consts.EnvDockerRegistryServer)
conf.InClusterServer = os.Getenv(consts.EnvDockerRegistryInClusterServer)
conf.Username = os.Getenv(consts.EnvDockerRegistryUsername)
conf.Password = os.Getenv(consts.EnvDockerRegistryPassword)
conf.Secure = os.Getenv(consts.EnvDockerRegistrySecure) == "true"
if conf.Server == "" {
yataiSystemNamespace := GetYataiSystemNamespaceFromEnv()
yataiImageBuilderSharedEnvSecretName := consts.KubeSecretNameYataiImageBuilderSharedEnv
var secret *corev1.Secret
secret, err = secretGetter(ctx, yataiSystemNamespace, yataiImageBuilderSharedEnvSecretName)
if err != nil {
if k8serrors.IsNotFound(err) {
err = errors.Wrapf(err, "secret %s not found in namespace %s", yataiImageBuilderSharedEnvSecretName, yataiSystemNamespace)
}
return
}
conf.BentoRepositoryName = string(secret.Data[consts.EnvDockerRegistryBentoRepositoryName])
conf.ModelRepositoryName = string(secret.Data[consts.EnvDockerRegistryModelRepositoryName])
conf.Server = string(secret.Data[consts.EnvDockerRegistryServer])
conf.InClusterServer = string(secret.Data[consts.EnvDockerRegistryInClusterServer])
conf.Username = string(secret.Data[consts.EnvDockerRegistryUsername])
conf.Password = string(secret.Data[consts.EnvDockerRegistryPassword])
conf.Secure = string(secret.Data[consts.EnvDockerRegistrySecure]) == "true"
}
if conf.Server == "" {
err = errors.Wrapf(errors.New("not found"), "the environment variable %s is not set", consts.EnvDockerRegistryServer)
}
return
}
type YataiConfig struct {
Endpoint string `yaml:"endpoint"`
ClusterName string `yaml:"cluster_name"`
ApiToken string `yaml:"api_token"`
}
func GetYataiConfig(ctx context.Context, secretGetter func(ctx context.Context, namespace, name string) (*corev1.Secret, error), yataiComponentName string, ignoreEnv bool) (conf *YataiConfig, err error) {
conf = &YataiConfig{}
if !ignoreEnv {
conf.Endpoint = os.Getenv(consts.EnvYataiEndpoint)
conf.ClusterName = os.Getenv(consts.EnvYataiClusterName)
conf.ApiToken = os.Getenv(consts.EnvYataiApiToken)
}
yataiSystemNamespace := GetYataiSystemNamespaceFromEnv()
if conf.Endpoint == "" {
var secret *corev1.Secret
secret, err = secretGetter(ctx, yataiSystemNamespace, consts.KubeSecretNameYataiCommonEnv)
if err != nil {
if k8serrors.IsNotFound(err) {
err = errors.Wrapf(err, "secret %s not found in namespace %s", consts.KubeSecretNameYataiCommonEnv, yataiSystemNamespace)
}
return
}
conf.Endpoint = string(secret.Data[consts.EnvYataiEndpoint])
conf.ClusterName = string(secret.Data[consts.EnvYataiClusterName])
}
if conf.ApiToken == "" {
var secret *corev1.Secret
var secretName string
var secretNamespace string
if yataiComponentName == consts.YataiImageBuilderComponentName {
secretName = consts.KubeSecretNameYataiImageBuilderEnv
secretNamespace, err = GetYataiImageBuilderNamespace(ctx, secretGetter)
if err != nil {
err = errors.Wrapf(err, "failed to get namespace for %s", yataiComponentName)
return
}
} else if yataiComponentName == consts.YataiDeploymentComponentName {
secretName = consts.KubeSecretNameYataiDeploymentEnv
secretNamespace, err = GetYataiDeploymentNamespace(ctx, secretGetter)
if err != nil {
err = errors.Wrapf(err, "failed to get namespace for %s", yataiComponentName)
return
}
} else {
err = errors.Errorf("invalid yatai component name %s", yataiComponentName)
return
}
secret, err = secretGetter(ctx, secretNamespace, secretName)
if err != nil {
if k8serrors.IsNotFound(err) {
err = errors.Errorf("the secret %s in namespace %s does not exist", secretName, secretNamespace)
} else {
err = errors.Wrapf(err, "failed to get secret %s in namespace %s", secretName, secretNamespace)
}
return
}
conf.ApiToken = string(secret.Data[consts.EnvYataiApiToken])
}
return
}
// if key found in environ return value else return fallback
func getEnv(key, fallback string) string {
if value, ok := os.LookupEnv(key); ok {
return value
}
return fallback
}
type InternalImages struct {
BentoDownloader string
Curl string
Kaniko string
MetricsTransformer string
Buildkit string
BuildkitRootless string
Buildah string
}
func GetInternalImages() (conf *InternalImages) {
conf = &InternalImages{}
conf.BentoDownloader = getEnv(consts.EnvInternalImagesBentoDownloader, consts.InternalImagesBentoDownloaderDefault)
conf.Curl = getEnv(consts.EnvInternalImagesCurl, consts.InternalImagesCurlDefault)
conf.Kaniko = getEnv(consts.EnvInternalImagesKaniko, consts.InternalImagesKanikoDefault)
conf.MetricsTransformer = getEnv(consts.EnvInternalImagesMetricsTransformer, consts.InternalImagesMetricsTransformerDefault)
conf.Buildkit = getEnv(consts.EnvInternalImagesBuildkit, consts.InternalImagesBuildkitDefault)
conf.BuildkitRootless = getEnv(consts.EnvInternalImagesBuildkitRootless, consts.InternalImagesBuildkitRootlessDefault)
conf.Buildah = getEnv(consts.EnvInternalImagesBuildah, consts.InternalImagesBuildahDefault)
return
}
/*
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package consts
const (
DefaultETCDTimeoutSeconds = 5
DefaultETCDDialKeepaliveTimeSeconds = 30
DefaultETCDDialKeepaliveTimeoutSeconds = 10
HPADefaultMaxReplicas = 10
HPACPUDefaultAverageUtilization = 80
YataiDebugImg = "yatai.ai/yatai-infras/debug"
YataiKubectlNamespace = "default"
YataiKubectlContainerName = "main"
YataiKubectlImage = "yatai.ai/yatai-infras/k8s"
TracingContextKey = "tracing-context"
// nolint: gosec
YataiApiTokenHeaderName = "X-YATAI-API-TOKEN"
YataiOrganizationHeaderName = "X-Yatai-Organization"
NgcOrganizationHeaderName = "Nv-Ngc-Org"
NgcUserHeaderName = "Nv-Actor-Id"
DefaultUserId = "default"
DefaultOrgId = "default"
BentoServicePort = 3000
BentoContainerDefaultPort = 3000
BentoServicePortName = "http"
BentoContainerPortName = "http"
NoneStr = "None"
AmazonS3Endpoint = "s3.amazonaws.com"
YataiImageBuilderComponentName = "yatai-image-builder"
YataiDeploymentComponentName = "yatai-deployment"
// nolint: gosec
YataiK8sBotApiTokenName = "yatai-k8s-bot"
YataiBentoDeploymentComponentApiServer = "api-server"
YataiBentoDeploymentComponentRunner = "runner"
InternalImagesBentoDownloaderDefault = "quay.io/bentoml/bento-downloader:0.0.3"
InternalImagesCurlDefault = "quay.io/bentoml/curl:0.0.1"
InternalImagesKanikoDefault = "quay.io/bentoml/kaniko:1.9.1"
InternalImagesMetricsTransformerDefault = "quay.io/bentoml/yatai-bento-metrics-transformer:0.0.3"
InternalImagesBuildkitDefault = "quay.io/bentoml/buildkit:master"
InternalImagesBuildkitRootlessDefault = "quay.io/bentoml/buildkit:master-rootless"
InternalImagesBuildahDefault = "quay.io/bentoml/bentoml-buildah:0.0.1"
)
/*
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package consts
const (
EnvYataiEndpoint = "YATAI_ENDPOINT"
EnvYataiClusterName = "YATAI_CLUSTER_NAME"
// nolint: gosec
EnvYataiApiToken = "YATAI_API_TOKEN"
EnvBentoServicePort = "PORT"
// tracking envars
EnvYataiVersion = "YATAI_T_VERSION"
EnvYataiOrgUID = "YATAI_T_ORG_UID"
EnvYataiDeploymentUID = "YATAI_T_DEPLOYMENT_UID"
EnvYataiClusterUID = "YATAI_T_CLUSTER_UID"
EnvYataiBentoDeploymentName = "YATAI_BENTO_DEPLOYMENT_NAME"
EnvYataiBentoDeploymentNamespace = "YATAI_BENTO_DEPLOYMENT_NAMESPACE"
EnvS3Endpoint = "S3_ENDPOINT"
EnvS3Region = "S3_REGION"
EnvS3BucketName = "S3_BUCKET_NAME"
EnvS3AccessKey = "S3_ACCESS_KEY"
// nolint:gosec
EnvS3SecretKey = "S3_SECRET_KEY"
EnvS3Secure = "S3_SECURE"
EnvDockerRegistryServer = "DOCKER_REGISTRY_SERVER"
EnvDockerRegistryInClusterServer = "DOCKER_REGISTRY_IN_CLUSTER_SERVER"
EnvDockerRegistryUsername = "DOCKER_REGISTRY_USERNAME"
// nolint:gosec
EnvDockerRegistryPassword = "DOCKER_REGISTRY_PASSWORD"
EnvDockerRegistrySecure = "DOCKER_REGISTRY_SECURE"
EnvDockerRegistryBentoRepositoryName = "DOCKER_REGISTRY_BENTO_REPOSITORY_NAME"
EnvDockerRegistryModelRepositoryName = "DOCKER_REGISTRY_MODEL_REPOSITORY_NAME"
EnvInternalImagesBentoDownloader = "INTERNAL_IMAGES_BENTO_DOWNLOADER"
EnvInternalImagesCurl = "INTERNAL_IMAGES_CURL"
EnvInternalImagesKaniko = "INTERNAL_IMAGES_KANIKO"
EnvInternalImagesMetricsTransformer = "INTERNAL_IMAGES_METRICS_TRANSFORMER"
EnvInternalImagesBuildkit = "INTERNAL_IMAGES_BUILDKIT"
EnvInternalImagesBuildkitRootless = "INTERNAL_IMAGES_BUILDKIT_ROOTLESS"
EnvInternalImagesBuildah = "INTERNAL_IMAGES_BUILDAH"
EnvYataiSystemNamespace = "YATAI_SYSTEM_NAMESPACE"
EnvYataiImageBuilderNamespace = "YATAI_IMAGE_BUILDER_NAMESPACE"
EnvYataiDeploymentNamespace = "YATAI_DEPLOYMENT_NAMESPACE"
EnvBentoDeploymentNamespaces = "BENTO_DEPLOYMENT_NAMESPACES"
EnvImageBuildersNamespace = "IMAGE_BUILDERS_NAMESPACE"
EnvAWSAccessKeyID = "AWS_ACCESS_KEY_ID"
EnvGCPAccessKeyID = "GCP_ACCESS_KEY_ID"
EnvAWSSecretAccessKey = "AWS_SECRET_ACCESS_KEY"
EnvGCPSecretAccessKey = "GCP_SECRET_ACCESS_KEY"
EnvAWSECRWithIAMRole = "AWS_ECR_WITH_IAM_ROLE"
EnvAWSECRRegion = "AWS_ECR_REGION"
)
/*
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package consts
import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/labels"
)
const (
KubeIngressClassName = "yatai-ingress"
KubeLabelYataiSelector = "yatai.ai/selector"
KubeLabelYataiBentoRepository = "yatai.ai/bento-repository"
KubeLabelYataiBento = "yatai.ai/bento"
KubeLabelYataiModelRepository = "yatai.ai/model-repository"
KubeLabelYataiModel = "yatai.ai/model"
KubeHPAQPSMetric = "http_request"
KubeHPAGPUMetric = "container_accelerator_duty_cycle"
DefaultKubeNamespaceBentoDeployment = "yatai"
DefaultKubeNamespaceImageBuilders = "yatai-builders"
DefaultKubeNamespaceYataiSystem = "yatai-system"
DefaultKubeNamespaceYataiImageBuilderComponent = "yatai-image-builder"
DefaultKubeNamespaceYataiDeploymentComponent = "yatai-deployment"
KubeLabelYataiBentoDeployment = "yatai.ai/bento-deployment"
KubeLabelYataiBentoDeploymentComponentType = "yatai.ai/bento-deployment-component-type"
KubeLabelYataiBentoDeploymentComponentName = "yatai.ai/bento-deployment-component-name"
KubeLabelYataiBentoDeploymentTargetType = "yatai.ai/bento-deployment-target-type"
KubeLabelYataiBentoDeploymentRunner = "yatai.ai/bento-deployment-runner"
KubeLabelBentoRepository = "yatai.ai/bento-repository"
KubeLabelBentoVersion = "yatai.ai/bento-version"
KubeLabelCreator = "yatai.ai/creator"
// nolint: gosec
KubeLabelYataiDeployToken = "yatai.ai/deploy-token"
KubeLabelIsBentoImageBuilder = "yatai.ai/is-bento-image-builder"
KubeLabelIsModelSeeder = "yatai.ai/is-model-seeder"
KubeLabelBentoRequest = "yatai.ai/bento-request"
KubeLabelYataiOwnerReference = "yatai.ai/owner-reference"
KubeLabelGPUAccelerator = "gpu-accelerator"
KubeLabelHostName = "kubernetes.io/hostname"
KubeLabelArch = "kubernetes.io/arch"
KubeLabelValueFalse = "false"
KubeLabelValueTrue = "true"
KubeLabelYataiImageBuilderPod = "yatai.ai/yatai-image-builder-pod"
KubeLabelBentoDeploymentPod = "yatai.ai/bento-deployment-pod"
KubeLabelManagedBy = "app.kubernetes.io/managed-by"
KubeLabelHelmHeritage = "heritage"
KubeLabelHelmRelease = "release"
KubeAnnotationBentoRepository = "yatai.ai/bento-repository"
KubeAnnotationBentoVersion = "yatai.ai/bento-version"
KubeAnnotationYataiDeploymentId = "yatai.ai/deployment-id"
KubeAnnotationDockerRegistryInsecure = "yatai.ai/docker-registry-insecure"
KubeAnnotationHelmReleaseName = "meta.helm.sh/release-name"
KubeAnnotationPrometheusScrape = "prometheus.io/scrape"
KubeAnnotationPrometheusPort = "prometheus.io/port"
KubeAnnotationPrometheusPath = "prometheus.io/path"
KubeAnnotationARMSAutoEnable = "armsPilotAutoEnable"
KubeAnnotationARMSAppName = "armsPilotCreateAppName"
KubeAnnotationYataiImageBuilderSeparateModels = "yatai.ai/yatai-image-builder-separate-models"
KubeAnnotationAWSAccessKeySecretName = "yatai.ai/aws-access-key-secret-name"
KubeAnnotationGCPAccessKeySecretName = "yatai.ai/gcp-access-key-secret"
KubeAnnotationIsMultiTenancy = "yatai.ai/is-multi-tenancy"
KubeCreator = "yatai"
KubeResourceGPUNvidia = "nvidia.com/gpu"
KubeEventResourceKindPod = "Pod"
KubeEventResourceKindHPA = "HorizontalPodAutoscaler"
KubeEventResourceKindReplicaSet = "ReplicaSet"
KubeTaintKeyDedicatedNodeGroup = "mcd.io/dedicated-node-group"
KubeLabelDedicatedNodeGroup = "mcd.io/dedicated-node-group"
KubeImageCSIDriver = "image.csi.k8s.io"
KubeImageCSIDriverWarmMetal = "csi-image.warm-metal.tech"
KubeConfigMapNameNetworkConfig = "network"
KubeConfigMapKeyNetworkConfigDomainSuffix = "domain-suffix"
KubeConfigMapKeyNetworkConfigIngressClass = "ingress-class"
KubeConfigMapKeyNetworkConfigIngressAnnotations = "ingress-annotations"
KubeConfigMapKeyNetworkConfigIngressPath = "ingress-path"
KubeConfigMapKeyNetworkConfigIngressPathType = "ingress-path-type"
KubeConfigMapNameYataiConfig = "yatai"
KubeConfigMapKeyYataiConfigEndpoint = "endpoint"
KubeConfigMapKeyYataiConfigClusterName = "cluster-name"
KubeConfigMapKeyYataiConfigApiTokenSecretName = "api-token-secret-name"
KubeConfigMapKeyYataiConfigApiTokenSecretKey = "api-token-secret-key"
// nolint: gosec
KubeSecretNameRegcred = "yatai-regcred"
KubeSecretNameYataiCommonEnv = "yatai-common-env"
KubeSecretNameYataiImageBuilderSharedEnv = "yatai-image-builder-shared-env"
KubeSecretNameYataiDeploymentSharedEnv = "compoundai-deployment-shared-env"
KubeSecretNameYataiImageBuilderEnv = "yatai-image-builder-env"
KubeSecretNameYataiDeploymentEnv = "compoundai-deployment-env"
)
var KubeListEverything = metav1.ListOptions{
LabelSelector: labels.Everything().String(),
FieldSelector: fields.Everything().String(),
}
/*
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package reqcli
import (
"bytes"
"context"
"crypto/tls"
"encoding/json"
"fmt"
"io"
"net"
"net/http"
"sync"
"time"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
)
var (
dftHttpCli *http.Client
cliLoadOnce sync.Once
)
var httpTimeout = 90 * time.Second
func getDefaultTransPort() *http.Transport {
return &http.Transport{
Proxy: http.ProxyFromEnvironment,
DialContext: (&net.Dialer{
Timeout: 30 * time.Second,
KeepAlive: 60 * time.Second,
DualStack: true,
}).DialContext,
ForceAttemptHTTP2: true,
MaxIdleConns: 100,
IdleConnTimeout: 180 * time.Second,
TLSHandshakeTimeout: 10 * time.Second,
ExpectContinueTimeout: 1 * time.Second,
// nolint: gosec
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
}
}
func GetDefaultHttpClient() *http.Client {
cliLoadOnce.Do(func() {
dftHttpCli = &http.Client{
Timeout: httpTimeout,
Transport: getDefaultTransPort(),
}
})
return dftHttpCli
}
func NewHttpCli() (*http.Client, error) {
return GetDefaultHttpClient(), nil
}
func NewHttpCliWithTimeout(timeout time.Duration) (*http.Client, error) {
httpCli := &http.Client{
Timeout: timeout,
Transport: getDefaultTransPort(),
}
return httpCli, nil
}
type JsonRequestBuilder struct {
timeout *time.Duration
method string
url string
query map[string]string
headers map[string]string
payload interface{}
result interface{}
reqProcessors []func(req *http.Request)
}
func NewJsonRequestBuilder() *JsonRequestBuilder {
builder := JsonRequestBuilder{}
return &builder
}
func (b *JsonRequestBuilder) Timeout(timeout time.Duration) *JsonRequestBuilder {
b.timeout = &timeout
return b
}
func (b *JsonRequestBuilder) Method(method string) *JsonRequestBuilder {
b.method = method
return b
}
func (b *JsonRequestBuilder) Url(url string) *JsonRequestBuilder {
b.url = url
return b
}
func (b *JsonRequestBuilder) Query(query map[string]string) *JsonRequestBuilder {
b.query = query
return b
}
func (b *JsonRequestBuilder) Headers(headers map[string]string) *JsonRequestBuilder {
b.headers = headers
return b
}
func (b *JsonRequestBuilder) Payload(payload interface{}) *JsonRequestBuilder {
b.payload = payload
return b
}
func (b *JsonRequestBuilder) Result(result interface{}) *JsonRequestBuilder {
b.result = result
return b
}
func (b *JsonRequestBuilder) ProcessReq(processor func(req *http.Request)) *JsonRequestBuilder {
b.reqProcessors = append(b.reqProcessors, processor)
return b
}
func (b *JsonRequestBuilder) Do(ctx context.Context) (statusCode int, err error) {
var req *http.Request
defer func() {
if err != nil {
err = errors.Wrapf(err, "DoJsonRequest Error: [%s]%s", b.method, b.url)
}
}()
if b.payload == nil {
req, err = http.NewRequestWithContext(ctx, b.method, b.url, nil)
} else {
switch p := b.payload.(type) {
case io.Reader:
req, err = http.NewRequestWithContext(ctx, b.method, b.url, p)
default:
var data []byte
data, err = json.Marshal(b.payload)
if err != nil {
return
}
req, err = http.NewRequestWithContext(ctx, b.method, b.url, bytes.NewBuffer(data))
}
}
if err != nil {
return
}
req.Header.Set("Content-Type", "application/json")
if b.headers != nil {
for k, v := range b.headers {
req.Header.Set(k, v)
}
}
for _, reqProcessor := range b.reqProcessors {
reqProcessor(req)
}
q := req.URL.Query()
for key, value := range b.query {
q.Add(key, value)
}
req.URL.RawQuery = q.Encode()
cli := GetDefaultHttpClient()
if b.timeout != nil {
cli.Timeout = *b.timeout
}
logrus.Debugf("http %s %s", b.method, b.url)
var resp *http.Response
resp, err = cli.Do(req)
if err != nil {
return
}
statusCode = resp.StatusCode
defer func() {
_ = resp.Body.Close()
}()
body, err := io.ReadAll(resp.Body)
if err != nil {
logrus.Errorf("resp.Body error, %s", err)
return
}
if resp.StatusCode != 200 {
msg := fmt.Sprintf("%s %s status=%d, %s", b.method, b.url, resp.StatusCode, body)
logrus.Error(msg)
err = errors.New(msg)
return
}
if b.result != nil {
err = errors.Wrap(json.Unmarshal(body, b.result), "json unmarshal")
}
return
}
func DoJsonRequest(ctx context.Context, method string, url string, headers map[string]string, payload, result interface{}) (err error) {
_, err = NewJsonRequestBuilder().Method(method).Url(url).Headers(headers).Payload(payload).Result(result).Do(ctx)
return
}
/*
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package system
import (
"os"
"sync"
"github.com/sirupsen/logrus"
)
const (
// NamespaceEnvKey is the environment variable that specifies the system namespace.
NamespaceEnvKey = "SYSTEM_NAMESPACE"
// ResourceLabelEnvKey is the environment variable that specifies the system resource
// label.
ResourceLabelEnvKey = "SYSTEM_RESOURCE_LABEL"
DefaultNamespace = "yatai-deployment"
MagicDNSEnvKey = "MAGIC_DNS"
DefaultMagicDNS = "sslip.io"
)
var (
once sync.Once
)
// GetNamespace returns the name of the K8s namespace where our system components
// run.
func GetNamespace() string {
if ns := os.Getenv(NamespaceEnvKey); ns != "" {
return ns
}
once.Do(func() {
logrus.Infof("%s environment variable not set, using default namespace %s", NamespaceEnvKey, DefaultNamespace)
})
return DefaultNamespace
}
// GetResourceLabel returns the label key identifying K8s objects our system
// components source their configuration from.
func GetResourceLabel() string {
return os.Getenv(ResourceLabelEnvKey)
}
func GetMagicDNS() string {
magicDNS := os.Getenv(MagicDNSEnvKey)
if magicDNS == "" {
magicDNS = DefaultMagicDNS
}
return magicDNS
}
/*
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package system
import (
"context"
"encoding/json"
"fmt"
"net"
"os"
"strings"
"time"
"github.com/pkg/errors"
"github.com/rs/xid"
"github.com/sirupsen/logrus"
corev1 "k8s.io/api/core/v1"
networkingv1 "k8s.io/api/networking/v1"
k8serrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/kubernetes"
"github.com/dynemo-ai/dynemo/deploy/compoundai/operator/pkg/compoundai/consts"
)
type IngressConfig struct {
ClassName *string
Annotations map[string]string
Path string
PathType networkingv1.PathType
}
func GetIngressConfig(ctx context.Context, configmapGetter func(ctx context.Context, namespace, name string) (*corev1.ConfigMap, error)) (ingressConfig *IngressConfig, err error) {
configMap, err := GetNetworkConfigConfigMap(ctx, configmapGetter)
if err != nil {
err = errors.Wrapf(err, "failed to get configmap %s", consts.KubeConfigMapNameNetworkConfig)
return
}
var className *string
className_ := strings.TrimSpace(configMap.Data[consts.KubeConfigMapKeyNetworkConfigIngressClass])
if className_ != "" {
className = &className_
}
annotations := make(map[string]string)
annotations_ := strings.TrimSpace(configMap.Data[consts.KubeConfigMapKeyNetworkConfigIngressAnnotations])
if annotations_ != "" {
err = json.Unmarshal([]byte(annotations_), &annotations)
if err != nil {
err = errors.Wrapf(err, "failed to json unmarshal %s in configmap %s: %s", consts.KubeConfigMapKeyNetworkConfigIngressAnnotations, consts.KubeConfigMapNameNetworkConfig, annotations_)
return
}
}
path := strings.TrimSpace(configMap.Data[consts.KubeConfigMapKeyNetworkConfigIngressPath])
if path == "" {
path = "/"
}
pathType := networkingv1.PathTypeImplementationSpecific
pathType_ := strings.TrimSpace(configMap.Data[consts.KubeConfigMapKeyNetworkConfigIngressPathType])
if pathType_ != "" {
pathType = networkingv1.PathType(pathType_)
}
ingressConfig = &IngressConfig{
ClassName: className,
Annotations: annotations,
Path: path,
PathType: pathType,
}
return
}
func GetIngressIP(ctx context.Context, configmapGetter func(ctx context.Context, namespace, name string) (*corev1.ConfigMap, error), cliset *kubernetes.Clientset) (ip string, err error) {
ingressConfig, err := GetIngressConfig(ctx, configmapGetter)
if err != nil {
err = errors.Wrapf(err, "failed to get ingress config")
return
}
ingressClassName := ingressConfig.ClassName
ingressAnnotations := ingressConfig.Annotations
ingressCli := cliset.NetworkingV1().Ingresses(GetNamespace())
ingName := "default-domain-"
pathType := networkingv1.PathTypeImplementationSpecific
podName := os.Getenv("POD_NAME")
if podName == "" {
// random string
guid := xid.New()
podName = fmt.Sprintf("a%s", strings.ToLower(guid.String()))
}
logrus.Infof("Creating ingress %s to get a ingress IP automatically", ingName)
ing, err := ingressCli.Create(ctx, &networkingv1.Ingress{
ObjectMeta: metav1.ObjectMeta{
GenerateName: ingName,
Namespace: GetNamespace(),
Annotations: ingressAnnotations,
},
Spec: networkingv1.IngressSpec{
IngressClassName: ingressClassName,
Rules: []networkingv1.IngressRule{{
Host: fmt.Sprintf("%s.this-is-yatai-in-order-to-generate-the-default-domain-suffix.yeah", podName),
IngressRuleValue: networkingv1.IngressRuleValue{
HTTP: &networkingv1.HTTPIngressRuleValue{
Paths: []networkingv1.HTTPIngressPath{
{
Path: "/",
PathType: &pathType,
Backend: networkingv1.IngressBackend{
Service: &networkingv1.IngressServiceBackend{
Name: "default-domain-service",
Port: networkingv1.ServiceBackendPort{
Number: consts.BentoServicePort,
},
},
},
},
},
},
},
}},
},
}, metav1.CreateOptions{})
if err != nil && !k8serrors.IsAlreadyExists(err) {
err = errors.Wrapf(err, "failed to create ingress %s", ingName)
return
}
defer func() {
_ = ingressCli.Delete(ctx, ing.Name, metav1.DeleteOptions{})
}()
// Interval to poll for objects.
pollInterval := 10 * time.Second
// How long to wait for objects.
waitTimeout := 20 * time.Minute
logrus.Infof("Waiting for ingress %s to be ready", ing.Name)
// Wait for the Ingress to be Ready.
if err = wait.PollUntilContextTimeout(ctx, pollInterval, waitTimeout, false, func(ctx context.Context) (done bool, err error) {
ing, err = ingressCli.Get(
ctx, ing.Name, metav1.GetOptions{})
if err != nil {
return true, err
}
return len(ing.Status.LoadBalancer.Ingress) > 0, nil
}); err != nil {
err = errors.Wrapf(err, "failed to wait for ingress %s to be ready", ing.Name)
return
}
logrus.Infof("Ingress %s is ready", ing.Name)
address := ing.Status.LoadBalancer.Ingress[0]
ip = address.IP
if ip == "" {
if address.Hostname == "" {
err = errors.Errorf("the ingress %s status has no IP or hostname", ing.Name)
return
}
var ipAddr *net.IPAddr
ipAddr, err = net.ResolveIPAddr("ip4", address.Hostname)
if err != nil {
err = errors.Wrapf(err, "failed to resolve ip address for hostname %s", address.Hostname)
return
}
ip = ipAddr.String()
}
return
}
func GetDomainSuffix(ctx context.Context, configmapGetter func(ctx context.Context, namespace, name string) (*corev1.ConfigMap, error), cliset *kubernetes.Clientset) (domainSuffix string, err error) {
configMap, err := GetNetworkConfigConfigMap(ctx, configmapGetter)
if err != nil {
err = errors.Wrapf(err, "failed to get configmap %s", consts.KubeConfigMapNameNetworkConfig)
return
}
domainSuffix = strings.TrimSpace(configMap.Data[consts.KubeConfigMapKeyNetworkConfigDomainSuffix])
if domainSuffix != "" {
logrus.Infof("The %s in the network config has already set to `%s`", consts.KubeConfigMapKeyNetworkConfigDomainSuffix, domainSuffix)
return
}
magicDNS := GetMagicDNS()
var ip string
ip, err = GetIngressIP(ctx, configmapGetter, cliset)
if err != nil {
return
}
domainSuffix = fmt.Sprintf("%s.%s", ip, magicDNS)
logrus.Infof("you have not set the %s in the network config, so use magic DNS to generate a domain suffix automatically: `%s`, and set it to the network config", consts.KubeConfigMapKeyNetworkConfigDomainSuffix, domainSuffix)
configMapCli := cliset.CoreV1().ConfigMaps(configMap.Namespace)
_, err = configMapCli.Patch(ctx, configMap.Name, types.MergePatchType, []byte(fmt.Sprintf(`{"data":{"%s":"%s"}}`, consts.KubeConfigMapKeyNetworkConfigDomainSuffix, domainSuffix)), metav1.PatchOptions{})
if err != nil {
err = errors.Wrapf(err, "failed to patch configmap %s", consts.KubeConfigMapNameNetworkConfig)
return
}
return
}
/*
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package system
import (
"context"
corev1 "k8s.io/api/core/v1"
"github.com/dynemo-ai/dynemo/deploy/compoundai/operator/pkg/compoundai/consts"
)
func GetNetworkConfigConfigMap(ctx context.Context, configmapGetter func(ctx context.Context, namespace, name string) (*corev1.ConfigMap, error)) (configMap *corev1.ConfigMap, err error) {
configMap, err = configmapGetter(ctx, GetNamespace(), consts.KubeConfigMapNameNetworkConfig)
return
}
/*
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package utils
import (
"net/url"
"path"
)
func UrlJoin(baseUrl string, extra string, params ...map[string]string) string {
u, err := url.Parse(baseUrl)
if err != nil {
return baseUrl
}
u.Path = path.Join(u.Path, extra)
q := u.Query()
for _, p := range params {
for k, v := range p {
q.Add(k, v)
}
}
u.RawQuery = q.Encode()
return u.String()
}
func UrlJoinWithQuery(baseUrl string, extra string, query url.Values) string {
u, err := url.Parse(baseUrl)
if err != nil {
return baseUrl
}
u.Path = path.Join(u.Path, extra)
u.RawQuery = query.Encode()
return u.String()
}
......@@ -11,7 +11,7 @@ This directory contains configuration for visualizing metrics from the metrics a
1. Make sure Docker and Docker Compose are installed on your system
2. Start the `components/metrics` application to begin monitoring for metric events from dynemo workers
2. Start the `components/metrics` application to begin monitoring for metric events from dynamo workers
and aggregating them on a prometheus metrics endpoint: `http://localhost:9091/metrics`.
3. Start worker(s) that publishes KV Cache metrics.
......
......@@ -96,8 +96,8 @@ By default the server will run on port 8080.
Add model to the server:
```bash
llmctl http add chat TinyLlama/TinyLlama-1.1B-Chat-v1.0 dynemo.tensorrt-llm.chat/completions
llmctl http add completion TinyLlama/TinyLlama-1.1B-Chat-v1.0 dynemo.tensorrt-llm.completions
llmctl http add chat TinyLlama/TinyLlama-1.1B-Chat-v1.0 dynamo.tensorrt-llm.chat/completions
llmctl http add completion TinyLlama/TinyLlama-1.1B-Chat-v1.0 dynamo.tensorrt-llm.completions
```
#### 2. Workers
......@@ -220,8 +220,8 @@ By default the server will run on port 8080.
Add model to the server:
```bash
llmctl http add chat TinyLlama/TinyLlama-1.1B-Chat-v1.0 dynemo.router.chat/completions
llmctl http add completion TinyLlama/TinyLlama-1.1B-Chat-v1.0 dynemo.router.completions
llmctl http add chat TinyLlama/TinyLlama-1.1B-Chat-v1.0 dynamo.router.chat/completions
llmctl http add completion TinyLlama/TinyLlama-1.1B-Chat-v1.0 dynamo.router.completions
```
#### 2. Workers
......
......@@ -19,12 +19,12 @@ import asyncio
import uvloop
from dynemo.runtime import DistributedRuntime, dynemo_worker
from dynamo.runtime import DistributedRuntime, dynamo_worker
from .protocol import Request
@dynemo_worker()
@dynamo_worker()
async def worker(
runtime: DistributedRuntime,
component: str,
......@@ -38,7 +38,7 @@ async def worker(
"""
# create client
client = (
await runtime.namespace("dynemo")
await runtime.namespace("dynamo")
.component(component)
.endpoint("generate")
.client()
......
......@@ -26,7 +26,7 @@ from common.protocol import (
from tensorrt_llm.logger import logger
from tensorrt_llm.serve.openai_protocol import CompletionRequest, DisaggregatedParams
from dynemo.runtime import DistributedRuntime, dynemo_endpoint, dynemo_worker
from dynamo.runtime import DistributedRuntime, dynamo_endpoint, dynamo_worker
logger.set_level("debug")
......@@ -69,7 +69,7 @@ class Router:
# Disagg params should be in under the choices field in the response object.
# This is the case for completions but not for chat.
@dynemo_endpoint(CompletionRequest, DisaggCompletionStreamResponse)
@dynamo_endpoint(CompletionRequest, DisaggCompletionStreamResponse)
async def generate_completion(self, request):
# These settings are needed to satisfy request checks.
request.skip_special_tokens = False
......@@ -102,7 +102,7 @@ class Router:
)
yield json.loads(gen_resp_obj.model_dump_json(exclude_unset=True))
@dynemo_endpoint(DisaggChatCompletionRequest, DisaggChatCompletionStreamResponse)
@dynamo_endpoint(DisaggChatCompletionRequest, DisaggChatCompletionStreamResponse)
async def generate_chat(self, request):
# These settings are needed to satisfy request checks.
request.skip_special_tokens = False
......@@ -136,35 +136,35 @@ class Router:
yield json.loads(gen_resp_obj.model_dump_json(exclude_unset=True))
@dynemo_worker()
@dynamo_worker()
async def worker(runtime: DistributedRuntime):
"""
Instantiate a `backend` component and serve the `generate` endpoint
A `Component` can serve multiple endpoints
"""
component = runtime.namespace("dynemo").component("router")
component = runtime.namespace("dynamo").component("router")
await component.create_service()
ctx_completion_client = (
await runtime.namespace("dynemo")
await runtime.namespace("dynamo")
.component("tensorrt-llm-ctx")
.endpoint("completions")
.client()
)
gen_completion_client = (
await runtime.namespace("dynemo")
await runtime.namespace("dynamo")
.component("tensorrt-llm-gen")
.endpoint("completions")
.client()
)
ctx_chat_client = (
await runtime.namespace("dynemo")
await runtime.namespace("dynamo")
.component("tensorrt-llm-ctx")
.endpoint("chat/completions")
.client()
)
gen_chat_client = (
await runtime.namespace("dynemo")
await runtime.namespace("dynamo")
.component("tensorrt-llm-gen")
.endpoint("chat/completions")
.client()
......
......@@ -44,7 +44,7 @@ from tensorrt_llm.llmapi.disagg_utils import (
from tensorrt_llm.logger import logger
from tensorrt_llm.serve.openai_protocol import CompletionRequest
from dynemo.runtime import DistributedRuntime, dynemo_endpoint, dynemo_worker
from dynamo.runtime import DistributedRuntime, dynamo_endpoint, dynamo_worker
logger.set_level("debug")
......@@ -85,7 +85,7 @@ class TensorrtLLMEngine(BaseTensorrtLLMEngine):
engine_config.extra_args["_mpi_session"] = self._mpi_session
super().__init__(engine_config)
@dynemo_endpoint(DisaggChatCompletionRequest, DisaggChatCompletionStreamResponse)
@dynamo_endpoint(DisaggChatCompletionRequest, DisaggChatCompletionStreamResponse)
async def generate_chat(self, request):
if self._llm_engine is None:
raise RuntimeError("Engine not initialized")
......@@ -164,7 +164,7 @@ class TensorrtLLMEngine(BaseTensorrtLLMEngine):
self._ongoing_request_count -= 1
@dynemo_endpoint(CompletionRequest, DisaggCompletionStreamResponse)
@dynamo_endpoint(CompletionRequest, DisaggCompletionStreamResponse)
async def generate_completions(self, request):
if self._llm_engine is None:
raise RuntimeError("Engine not initialized")
......@@ -211,7 +211,7 @@ class TensorrtLLMEngine(BaseTensorrtLLMEngine):
self._ongoing_request_count -= 1
@dynemo_worker()
@dynamo_worker()
async def worker(
runtime: DistributedRuntime,
engine_config: LLMAPIConfig,
......@@ -226,7 +226,7 @@ async def worker(
server_type = disagg_config.server_configs[instance_idx].type
logger.info(f"Starting {server_type} server")
component = runtime.namespace("dynemo").component(f"tensorrt-llm-{server_type}")
component = runtime.namespace("dynamo").component(f"tensorrt-llm-{server_type}")
await component.create_service()
completions_endpoint = component.endpoint("completions")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment