Unverified Commit 5b4e5496 authored by mohammedabdulwahhab's avatar mohammedabdulwahhab Committed by GitHub
Browse files

fix: deploy command should support passing config (#626)


Signed-off-by: default avatarmohammedabdulwahhab <furkhan324@berkeley.edu>
Co-authored-by: default avatarhhzhang16 <54051230+hhzhang16@users.noreply.github.com>
Co-authored-by: default avatarJulien Mancuso <jmancuso@nvidia.com>
parent 2d746153
...@@ -89,6 +89,7 @@ async def create_deployment(deployment: CreateDeploymentSchema): ...@@ -89,6 +89,7 @@ async def create_deployment(deployment: CreateDeploymentSchema):
"ngc-organization": ownership["organization_id"], "ngc-organization": ownership["organization_id"],
"ngc-user": ownership["user_id"], "ngc-user": ownership["user_id"],
}, },
envs=deployment.envs,
) )
# Create response schema # Create response schema
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from typing import Any, Dict from typing import Any, Dict, List, Optional
from kubernetes import client, config from kubernetes import client, config
...@@ -46,7 +46,11 @@ def create_custom_resource( ...@@ -46,7 +46,11 @@ def create_custom_resource(
def create_dynamo_deployment( def create_dynamo_deployment(
name: str, namespace: str, dynamo_nim: str, labels: Dict[str, str] name: str,
namespace: str,
dynamo_nim: str,
labels: Dict[str, str],
envs: Optional[List[Dict[str, str]]] = None,
) -> Dict[str, Any]: ) -> Dict[str, Any]:
""" """
Create a DynamoDeployment custom resource. Create a DynamoDeployment custom resource.
...@@ -56,6 +60,7 @@ def create_dynamo_deployment( ...@@ -56,6 +60,7 @@ def create_dynamo_deployment(
namespace: Target namespace namespace: Target namespace
dynamo_nim: Bento name and version (format: name:version) dynamo_nim: Bento name and version (format: name:version)
labels: Resource labels labels: Resource labels
envs: Optional list of environment variables
Returns: Returns:
Created deployment Created deployment
...@@ -64,7 +69,7 @@ def create_dynamo_deployment( ...@@ -64,7 +69,7 @@ def create_dynamo_deployment(
"apiVersion": "nvidia.com/v1alpha1", "apiVersion": "nvidia.com/v1alpha1",
"kind": "DynamoDeployment", "kind": "DynamoDeployment",
"metadata": {"name": name, "namespace": namespace, "labels": labels}, "metadata": {"name": name, "namespace": namespace, "labels": labels},
"spec": {"dynamoNim": dynamo_nim, "services": {}}, "spec": {"dynamoNim": dynamo_nim, "services": {}, "envs": envs if envs else []},
} }
return create_custom_resource( return create_custom_resource(
......
...@@ -43,6 +43,72 @@ spec: ...@@ -43,6 +43,72 @@ spec:
properties: properties:
dynamoNim: dynamoNim:
type: string type: string
envs:
items:
properties:
name:
type: string
value:
type: string
valueFrom:
properties:
configMapKeyRef:
properties:
key:
type: string
name:
default: ""
type: string
optional:
type: boolean
required:
- key
type: object
x-kubernetes-map-type: atomic
fieldRef:
properties:
apiVersion:
type: string
fieldPath:
type: string
required:
- fieldPath
type: object
x-kubernetes-map-type: atomic
resourceFieldRef:
properties:
containerName:
type: string
divisor:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource:
type: string
required:
- resource
type: object
x-kubernetes-map-type: atomic
secretKeyRef:
properties:
key:
type: string
name:
default: ""
type: string
optional:
type: boolean
required:
- key
type: object
x-kubernetes-map-type: atomic
type: object
required:
- name
type: object
type: array
services: services:
additionalProperties: additionalProperties:
properties: properties:
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
package v1alpha1 package v1alpha1
import ( import (
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
) )
...@@ -36,6 +37,9 @@ type DynamoDeploymentSpec struct { ...@@ -36,6 +37,9 @@ type DynamoDeploymentSpec struct {
// if not set, the DynamoNimDeployment will be used as is // if not set, the DynamoNimDeployment will be used as is
// +kubebuilder:validation:Optional // +kubebuilder:validation:Optional
Services map[string]*DynamoNimDeployment `json:"services,omitempty"` Services map[string]*DynamoNimDeployment `json:"services,omitempty"`
// Environment variables to be set in the deployment
// +kubebuilder:validation:Optional
Envs []corev1.EnvVar `json:"envs,omitempty"`
} }
// DynamoDeploymentStatus defines the observed state of DynamoDeployment. // DynamoDeploymentStatus defines the observed state of DynamoDeployment.
......
//go:build !ignore_autogenerated //go:build !ignore_autogenerated
/* /*
* SPDX-FileCopyrightText: Copyright (c) 2022 Atalaya Tech. Inc
* SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2022 Atalaya Tech, Inc
* SPDX-License-Identifier: Apache-2.0 * SPDX-License-Identifier: Apache-2.0
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
...@@ -27,8 +27,8 @@ import ( ...@@ -27,8 +27,8 @@ import (
"github.com/ai-dynamo/dynamo/deploy/dynamo/operator/api/dynamo/common" "github.com/ai-dynamo/dynamo/deploy/dynamo/operator/api/dynamo/common"
"github.com/ai-dynamo/dynamo/deploy/dynamo/operator/api/dynamo/schemas" "github.com/ai-dynamo/dynamo/deploy/dynamo/operator/api/dynamo/schemas"
"k8s.io/api/autoscaling/v2" "k8s.io/api/autoscaling/v2"
corev1 "k8s.io/api/core/v1" "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
runtime "k8s.io/apimachinery/pkg/runtime" runtime "k8s.io/apimachinery/pkg/runtime"
) )
...@@ -84,7 +84,7 @@ func (in *BaseStatus) DeepCopyInto(out *BaseStatus) { ...@@ -84,7 +84,7 @@ func (in *BaseStatus) DeepCopyInto(out *BaseStatus) {
*out = *in *out = *in
if in.Conditions != nil { if in.Conditions != nil {
in, out := &in.Conditions, &out.Conditions in, out := &in.Conditions, &out.Conditions
*out = make([]v1.Condition, len(*in)) *out = make([]metav1.Condition, len(*in))
for i := range *in { for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i]) (*in)[i].DeepCopyInto(&(*out)[i])
} }
...@@ -214,6 +214,13 @@ func (in *DynamoDeploymentSpec) DeepCopyInto(out *DynamoDeploymentSpec) { ...@@ -214,6 +214,13 @@ func (in *DynamoDeploymentSpec) DeepCopyInto(out *DynamoDeploymentSpec) {
(*out)[key] = outVal (*out)[key] = outVal
} }
} }
if in.Envs != nil {
in, out := &in.Envs, &out.Envs
*out = make([]v1.EnvVar, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
} }
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DynamoDeploymentSpec. // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DynamoDeploymentSpec.
...@@ -231,7 +238,7 @@ func (in *DynamoDeploymentStatus) DeepCopyInto(out *DynamoDeploymentStatus) { ...@@ -231,7 +238,7 @@ func (in *DynamoDeploymentStatus) DeepCopyInto(out *DynamoDeploymentStatus) {
*out = *in *out = *in
if in.Conditions != nil { if in.Conditions != nil {
in, out := &in.Conditions, &out.Conditions in, out := &in.Conditions, &out.Conditions
*out = make([]v1.Condition, len(*in)) *out = make([]metav1.Condition, len(*in))
for i := range *in { for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i]) (*in)[i].DeepCopyInto(&(*out)[i])
} }
...@@ -363,7 +370,7 @@ func (in *DynamoNimDeploymentSpec) DeepCopyInto(out *DynamoNimDeploymentSpec) { ...@@ -363,7 +370,7 @@ func (in *DynamoNimDeploymentSpec) DeepCopyInto(out *DynamoNimDeploymentSpec) {
} }
if in.Envs != nil { if in.Envs != nil {
in, out := &in.Envs, &out.Envs in, out := &in.Envs, &out.Envs
*out = make([]corev1.EnvVar, len(*in)) *out = make([]v1.EnvVar, len(*in))
for i := range *in { for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i]) (*in)[i].DeepCopyInto(&(*out)[i])
} }
...@@ -408,12 +415,12 @@ func (in *DynamoNimDeploymentSpec) DeepCopyInto(out *DynamoNimDeploymentSpec) { ...@@ -408,12 +415,12 @@ func (in *DynamoNimDeploymentSpec) DeepCopyInto(out *DynamoNimDeploymentSpec) {
} }
if in.LivenessProbe != nil { if in.LivenessProbe != nil {
in, out := &in.LivenessProbe, &out.LivenessProbe in, out := &in.LivenessProbe, &out.LivenessProbe
*out = new(corev1.Probe) *out = new(v1.Probe)
(*in).DeepCopyInto(*out) (*in).DeepCopyInto(*out)
} }
if in.ReadinessProbe != nil { if in.ReadinessProbe != nil {
in, out := &in.ReadinessProbe, &out.ReadinessProbe in, out := &in.ReadinessProbe, &out.ReadinessProbe
*out = new(corev1.Probe) *out = new(v1.Probe)
(*in).DeepCopyInto(*out) (*in).DeepCopyInto(*out)
} }
if in.Replicas != nil { if in.Replicas != nil {
...@@ -438,7 +445,7 @@ func (in *DynamoNimDeploymentStatus) DeepCopyInto(out *DynamoNimDeploymentStatus ...@@ -438,7 +445,7 @@ func (in *DynamoNimDeploymentStatus) DeepCopyInto(out *DynamoNimDeploymentStatus
*out = *in *out = *in
if in.Conditions != nil { if in.Conditions != nil {
in, out := &in.Conditions, &out.Conditions in, out := &in.Conditions, &out.Conditions
*out = make([]v1.Condition, len(*in)) *out = make([]metav1.Condition, len(*in))
for i := range *in { for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i]) (*in)[i].DeepCopyInto(&(*out)[i])
} }
...@@ -590,14 +597,14 @@ func (in *DynamoNimRequestSpec) DeepCopyInto(out *DynamoNimRequestSpec) { ...@@ -590,14 +597,14 @@ func (in *DynamoNimRequestSpec) DeepCopyInto(out *DynamoNimRequestSpec) {
} }
if in.ImageBuilderExtraContainerEnv != nil { if in.ImageBuilderExtraContainerEnv != nil {
in, out := &in.ImageBuilderExtraContainerEnv, &out.ImageBuilderExtraContainerEnv in, out := &in.ImageBuilderExtraContainerEnv, &out.ImageBuilderExtraContainerEnv
*out = make([]corev1.EnvVar, len(*in)) *out = make([]v1.EnvVar, len(*in))
for i := range *in { for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i]) (*in)[i].DeepCopyInto(&(*out)[i])
} }
} }
if in.ImageBuilderContainerResources != nil { if in.ImageBuilderContainerResources != nil {
in, out := &in.ImageBuilderContainerResources, &out.ImageBuilderContainerResources in, out := &in.ImageBuilderContainerResources, &out.ImageBuilderContainerResources
*out = new(corev1.ResourceRequirements) *out = new(v1.ResourceRequirements)
(*in).DeepCopyInto(*out) (*in).DeepCopyInto(*out)
} }
if in.OCIRegistryInsecure != nil { if in.OCIRegistryInsecure != nil {
...@@ -607,7 +614,7 @@ func (in *DynamoNimRequestSpec) DeepCopyInto(out *DynamoNimRequestSpec) { ...@@ -607,7 +614,7 @@ func (in *DynamoNimRequestSpec) DeepCopyInto(out *DynamoNimRequestSpec) {
} }
if in.DownloaderContainerEnvFrom != nil { if in.DownloaderContainerEnvFrom != nil {
in, out := &in.DownloaderContainerEnvFrom, &out.DownloaderContainerEnvFrom in, out := &in.DownloaderContainerEnvFrom, &out.DownloaderContainerEnvFrom
*out = make([]corev1.EnvFromSource, len(*in)) *out = make([]v1.EnvFromSource, len(*in))
for i := range *in { for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i]) (*in)[i].DeepCopyInto(&(*out)[i])
} }
...@@ -629,7 +636,7 @@ func (in *DynamoNimRequestStatus) DeepCopyInto(out *DynamoNimRequestStatus) { ...@@ -629,7 +636,7 @@ func (in *DynamoNimRequestStatus) DeepCopyInto(out *DynamoNimRequestStatus) {
*out = *in *out = *in
if in.Conditions != nil { if in.Conditions != nil {
in, out := &in.Conditions, &out.Conditions in, out := &in.Conditions, &out.Conditions
*out = make([]v1.Condition, len(*in)) *out = make([]metav1.Condition, len(*in))
for i := range *in { for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i]) (*in)[i].DeepCopyInto(&(*out)[i])
} }
...@@ -663,7 +670,7 @@ func (in *DynamoNimSpec) DeepCopyInto(out *DynamoNimSpec) { ...@@ -663,7 +670,7 @@ func (in *DynamoNimSpec) DeepCopyInto(out *DynamoNimSpec) {
} }
if in.ImagePullSecrets != nil { if in.ImagePullSecrets != nil {
in, out := &in.ImagePullSecrets, &out.ImagePullSecrets in, out := &in.ImagePullSecrets, &out.ImagePullSecrets
*out = make([]corev1.LocalObjectReference, len(*in)) *out = make([]v1.LocalObjectReference, len(*in))
copy(*out, *in) copy(*out, *in)
} }
} }
......
...@@ -43,6 +43,72 @@ spec: ...@@ -43,6 +43,72 @@ spec:
properties: properties:
dynamoNim: dynamoNim:
type: string type: string
envs:
items:
properties:
name:
type: string
value:
type: string
valueFrom:
properties:
configMapKeyRef:
properties:
key:
type: string
name:
default: ""
type: string
optional:
type: boolean
required:
- key
type: object
x-kubernetes-map-type: atomic
fieldRef:
properties:
apiVersion:
type: string
fieldPath:
type: string
required:
- fieldPath
type: object
x-kubernetes-map-type: atomic
resourceFieldRef:
properties:
containerName:
type: string
divisor:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource:
type: string
required:
- resource
type: object
x-kubernetes-map-type: atomic
secretKeyRef:
properties:
key:
type: string
name:
default: ""
type: string
optional:
type: boolean
required:
- key
type: object
x-kubernetes-map-type: atomic
type: object
required:
- name
type: object
type: array
services: services:
additionalProperties: additionalProperties:
properties: properties:
......
...@@ -23,6 +23,7 @@ import ( ...@@ -23,6 +23,7 @@ import (
"strings" "strings"
"dario.cat/mergo" "dario.cat/mergo"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/types"
...@@ -130,6 +131,13 @@ func (r *DynamoDeploymentReconciler) Reconcile(ctx context.Context, req ctrl.Req ...@@ -130,6 +131,13 @@ func (r *DynamoDeploymentReconciler) Reconcile(ctx context.Context, req ctrl.Req
} }
} }
// Set common env vars on each of the dynamoNimDeployments
for _, deployment := range dynamoNimDeployments {
if len(dynamoDeployment.Spec.Envs) > 0 {
deployment.Spec.Envs = mergeEnvs(dynamoDeployment.Spec.Envs, deployment.Spec.Envs)
}
}
// reconcile the dynamoNimRequest // reconcile the dynamoNimRequest
dynamoNimRequest := &nvidiacomv1alpha1.DynamoNimRequest{ dynamoNimRequest := &nvidiacomv1alpha1.DynamoNimRequest{
ObjectMeta: metav1.ObjectMeta{ ObjectMeta: metav1.ObjectMeta{
...@@ -182,6 +190,27 @@ func (r *DynamoDeploymentReconciler) Reconcile(ctx context.Context, req ctrl.Req ...@@ -182,6 +190,27 @@ func (r *DynamoDeploymentReconciler) Reconcile(ctx context.Context, req ctrl.Req
} }
func mergeEnvs(common, specific []corev1.EnvVar) []corev1.EnvVar {
envMap := make(map[string]corev1.EnvVar)
// Add all common environment variables.
for _, env := range common {
envMap[env.Name] = env
}
// Override or add with service-specific environment variables.
for _, env := range specific {
envMap[env.Name] = env
}
// Convert the map back to a slice.
merged := make([]corev1.EnvVar, 0, len(envMap))
for _, env := range envMap {
merged = append(merged, env)
}
return merged
}
// SetupWithManager sets up the controller with the Manager. // SetupWithManager sets up the controller with the Manager.
func (r *DynamoDeploymentReconciler) SetupWithManager(mgr ctrl.Manager) error { func (r *DynamoDeploymentReconciler) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr). return ctrl.NewControllerManagedBy(mgr).
......
/*
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package controller
import (
"reflect"
"sort"
"testing"
corev1 "k8s.io/api/core/v1"
)
func Test_mergeEnvs(t *testing.T) {
type args struct {
common []corev1.EnvVar
specific []corev1.EnvVar
}
tests := []struct {
name string
args args
want []corev1.EnvVar
}{
{
name: "no_common_envs",
args: args{
common: []corev1.EnvVar{},
specific: []corev1.EnvVar{{Name: "FOO", Value: "BAR"}},
},
want: []corev1.EnvVar{{Name: "FOO", Value: "BAR"}},
},
{
name: "no_specific_envs",
args: args{
common: []corev1.EnvVar{{Name: "FOO", Value: "BAR"}},
specific: []corev1.EnvVar{},
},
want: []corev1.EnvVar{{Name: "FOO", Value: "BAR"}},
},
{
name: "common_and_specific_envs",
args: args{
specific: []corev1.EnvVar{{Name: "BAZ", Value: "QUX"}},
common: []corev1.EnvVar{{Name: "FOO", Value: "BAR"}},
},
want: []corev1.EnvVar{{Name: "BAZ", Value: "QUX"}, {Name: "FOO", Value: "BAR"}},
},
{
name: "common_and_specific_envs_with_same_name",
args: args{
common: []corev1.EnvVar{{Name: "FOO", Value: "BAR"}},
specific: []corev1.EnvVar{{Name: "FOO", Value: "QUX"}},
},
want: []corev1.EnvVar{{Name: "FOO", Value: "QUX"}},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := mergeEnvs(tt.args.common, tt.args.specific)
sort.Slice(got, func(i, j int) bool {
return got[i].Name < got[j].Name
})
if !reflect.DeepEqual(got, tt.want) {
t.Errorf("mergeEnvs() = %v, want %v", got, tt.want)
}
})
}
}
...@@ -19,27 +19,24 @@ from __future__ import annotations ...@@ -19,27 +19,24 @@ from __future__ import annotations
import json import json
import logging import logging
import os
import typing as t import typing as t
from http import HTTPStatus from http import HTTPStatus
import bentoml.deployment
import click import click
import rich
import rich.style
import yaml
from bentoml._internal.cloud.base import Spinner from bentoml._internal.cloud.base import Spinner
from bentoml._internal.cloud.deployment import Deployment, DeploymentConfigParameters from bentoml._internal.cloud.deployment import Deployment, DeploymentConfigParameters
from bentoml._internal.cloud.schemas.modelschemas import DeploymentStrategy
from bentoml._internal.configuration.containers import BentoMLContainer from bentoml._internal.configuration.containers import BentoMLContainer
from bentoml._internal.utils import add_experimental_docstring from bentoml._internal.utils import add_experimental_docstring
from bentoml.exceptions import BentoMLException, CLIException from bentoml.exceptions import BentoMLException
from bentoml_cli.utils import BentoMLCommandGroup
from rich.console import Console from rich.console import Console
from rich.syntax import Syntax
from rich.table import Table
from simple_di import Provide, inject from simple_di import Provide, inject
from dynamo.sdk.lib.logging import configure_server_logging
from .utils import resolve_service_config
configure_server_logging()
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
if t.TYPE_CHECKING: if t.TYPE_CHECKING:
...@@ -60,23 +57,6 @@ def raise_deployment_config_error(err: BentoMLException, action: str) -> t.NoRet ...@@ -60,23 +57,6 @@ def raise_deployment_config_error(err: BentoMLException, action: str) -> t.NoRet
) from None ) from None
def convert_env_to_dict(env: tuple[str] | None) -> list[dict[str, str]] | None:
if env is None:
return None
collected_envs: list[dict[str, str]] = []
if env:
for item in env:
if "=" in item:
name, value = item.split("=", 1)
else:
name = item
if name not in os.environ:
raise CLIException(f"Environment variable {name} not found")
value = os.environ[name]
collected_envs.append({"name": name, "value": value})
return collected_envs
@click.command(name="deploy") @click.command(name="deploy")
@click.argument( @click.argument(
"bento", "bento",
...@@ -89,50 +69,6 @@ def convert_env_to_dict(env: tuple[str] | None) -> list[dict[str, str]] | None: ...@@ -89,50 +69,6 @@ def convert_env_to_dict(env: tuple[str] | None) -> list[dict[str, str]] | None:
type=click.STRING, type=click.STRING,
help="Deployment name", help="Deployment name",
) )
@click.option(
"--cluster",
type=click.STRING,
help="Name of the cluster",
)
@click.option(
"--access-authorization",
type=click.BOOL,
help="Enable access authorization",
)
@click.option(
"--scaling-min",
type=click.INT,
help="Minimum scaling value",
)
@click.option(
"--scaling-max",
type=click.INT,
help="Maximum scaling value",
)
@click.option(
"--instance-type",
type=click.STRING,
help="Type of instance",
)
@click.option(
"--strategy",
type=click.Choice(
[deployment_strategy.value for deployment_strategy in DeploymentStrategy]
),
help="Deployment strategy",
)
@click.option(
"--env",
type=click.STRING,
help="List of environment variables pass by --env key[=value] --env ...",
multiple=True,
)
@click.option(
"--secret",
type=click.STRING,
help="List of secret names pass by --secret name1, --secret name2, ...",
multiple=True,
)
@click.option( @click.option(
"-f", "-f",
"--config-file", "--config-file",
...@@ -140,12 +76,6 @@ def convert_env_to_dict(env: tuple[str] | None) -> list[dict[str, str]] | None: ...@@ -140,12 +76,6 @@ def convert_env_to_dict(env: tuple[str] | None) -> list[dict[str, str]] | None:
help="Configuration file path", help="Configuration file path",
default=None, default=None,
) )
@click.option(
"--config-dict",
type=click.STRING,
help="Configuration json string",
default=None,
)
@click.option( @click.option(
"--wait/--no-wait", "--wait/--no-wait",
type=click.BOOL, type=click.BOOL,
...@@ -159,20 +89,13 @@ def convert_env_to_dict(env: tuple[str] | None) -> list[dict[str, str]] | None: ...@@ -159,20 +89,13 @@ def convert_env_to_dict(env: tuple[str] | None) -> list[dict[str, str]] | None:
default=3600, default=3600,
help="Timeout for deployment to be ready in seconds", help="Timeout for deployment to be ready in seconds",
) )
@click.pass_context
@add_experimental_docstring @add_experimental_docstring
def deploy_command( def deploy_command(
ctx: click.Context,
bento: str | None, bento: str | None,
name: str | None, name: str | None,
cluster: str | None,
access_authorization: bool | None,
scaling_min: int | None,
scaling_max: int | None,
instance_type: str | None,
strategy: str | None,
env: tuple[str] | None,
secret: tuple[str] | None,
config_file: str | t.TextIO | None, config_file: str | t.TextIO | None,
config_dict: str | None,
wait: bool, wait: bool,
timeout: int, timeout: int,
) -> None: ) -> None:
...@@ -184,300 +107,19 @@ def deploy_command( ...@@ -184,300 +107,19 @@ def deploy_command(
create_deployment( create_deployment(
bento=bento, bento=bento,
name=name, name=name,
cluster=cluster,
access_authorization=access_authorization,
scaling_min=scaling_min,
scaling_max=scaling_max,
instance_type=instance_type,
strategy=strategy,
env=env,
secret=secret,
config_file=config_file, config_file=config_file,
config_dict=config_dict,
wait=wait, wait=wait,
timeout=timeout, timeout=timeout,
args=ctx.args,
) )
output_option = click.option(
"-o",
"--output",
type=click.Choice(["yaml", "json"]),
default="yaml",
help="Display the output of this command.",
)
def shared_decorator(
f: t.Callable[..., t.Any] | None = None,
) -> t.Callable[..., t.Any]:
def decorate(f: t.Callable[..., t.Any]) -> t.Callable[..., t.Any]:
options = [
click.option(
"--cluster",
type=click.STRING,
default=None,
help="Name of the cluster.",
),
]
for opt in reversed(options):
f = opt(f)
return f
if f:
return decorate(f)
else:
return decorate
def build_deployment_command() -> click.Group: def build_deployment_command() -> click.Group:
@click.group(name="deployment", cls=BentoMLCommandGroup) @click.group(name="deployment")
@add_experimental_docstring @add_experimental_docstring
def deployment_command(): def deployment_command():
"""Deploy Dynamo applications to Kubernetes cluster""" """Deploy Dynamo applications to Kubernetes cluster"""
@deployment_command.command()
@shared_decorator()
@click.argument(
"name",
type=click.STRING,
required=False,
)
@click.option(
"--bento",
type=click.STRING,
help="Bento name or path to Bento project directory",
)
@click.option(
"--access-authorization",
type=click.BOOL,
help="Enable access authorization",
)
@click.option(
"--scaling-min",
type=click.INT,
help="Minimum scaling value",
)
@click.option(
"--scaling-max",
type=click.INT,
help="Maximum scaling value",
)
@click.option(
"--instance-type",
type=click.STRING,
help="Type of instance",
)
@click.option(
"--strategy",
type=click.Choice(
[deployment_strategy.value for deployment_strategy in DeploymentStrategy]
),
help="Deployment strategy",
)
@click.option(
"--env",
type=click.STRING,
help="List of environment variables pass by --env key[=value] --env ...",
multiple=True,
)
@click.option(
"--secret",
type=click.STRING,
help="List of secret names pass by --secret name1, --secret name2, ...",
multiple=True,
)
@click.option(
"-f",
"--config-file",
type=click.File(),
help="Configuration file path, mututally exclusive with other config options",
default=None,
)
@click.option(
"--config-dict",
type=click.STRING,
help="Configuration json string",
default=None,
)
@inject
def update( # type: ignore
name: str | None,
cluster: str | None,
bento: str | None,
access_authorization: bool | None,
scaling_min: int | None,
scaling_max: int | None,
instance_type: str | None,
strategy: str | None,
env: tuple[str] | None,
secret: tuple[str] | None,
config_file: t.TextIO | None,
config_dict: str | None,
_cloud_client: BentoCloudClient = Provide[BentoMLContainer.bentocloud_client],
) -> None:
"""Update a deployment on BentoCloud.
\b
A deployment can be updated using parameters, or using config yaml file.
You can also update bento by providing a project path or existing bento.
"""
cfg_dict = None
if config_dict is not None and config_dict != "":
cfg_dict = json.loads(config_dict)
config_params = DeploymentConfigParameters(
name=name,
bento=bento,
cluster=cluster,
access_authorization=access_authorization,
scaling_max=scaling_max,
scaling_min=scaling_min,
instance_type=instance_type,
strategy=strategy,
envs=convert_env_to_dict(env),
secrets=list(secret) if secret is not None else None,
config_file=config_file,
config_dict=cfg_dict,
cli=True,
)
try:
config_params.verify(create=False)
except BentoMLException as e:
raise_deployment_config_error(e, "update")
deployment_info = _cloud_client.deployment.update(
deployment_config_params=config_params
)
rich.print(
f"Deployment [green]'{deployment_info.name}'[/] updated successfully."
)
@deployment_command.command()
@click.argument(
"bento",
type=click.STRING,
required=False,
)
@click.option(
"-n",
"--name",
type=click.STRING,
help="Deployment name",
)
@click.option(
"--cluster",
type=click.STRING,
help="Name of the cluster",
)
@click.option(
"--access-authorization",
type=click.BOOL,
help="Enable access authorization",
)
@click.option(
"--scaling-min",
type=click.INT,
help="Minimum scaling value",
)
@click.option(
"--scaling-max",
type=click.INT,
help="Maximum scaling value",
)
@click.option(
"--instance-type",
type=click.STRING,
help="Type of instance",
)
@click.option(
"--strategy",
type=click.Choice(
[deployment_strategy.value for deployment_strategy in DeploymentStrategy]
),
help="Deployment strategy",
)
@click.option(
"--env",
type=click.STRING,
help="List of environment variables pass by --env key[=value] --env ...",
multiple=True,
)
@click.option(
"--secret",
type=click.STRING,
help="List of secret names pass by --secret name1, --secret name2, ...",
multiple=True,
)
@click.option(
"-f",
"--config-file",
type=click.File(),
help="Configuration file path",
default=None,
)
@click.option(
"-f",
"--config-file",
help="Configuration file path, mututally exclusive with other config options",
default=None,
)
@click.option(
"--config-dict",
type=click.STRING,
help="Configuration json string",
default=None,
)
@inject
def apply( # type: ignore
bento: str | None,
name: str | None,
cluster: str | None,
access_authorization: bool | None,
scaling_min: int | None,
scaling_max: int | None,
instance_type: str | None,
strategy: str | None,
env: tuple[str] | None,
secret: tuple[str] | None,
config_file: str | t.TextIO | None,
config_dict: str | None,
_cloud_client: BentoCloudClient = Provide[BentoMLContainer.bentocloud_client],
) -> None:
"""Apply a deployment on BentoCloud.
\b
A deployment can be applied using config yaml file.
"""
cfg_dict = None
if config_dict is not None and config_dict != "":
cfg_dict = json.loads(config_dict)
config_params = DeploymentConfigParameters(
name=name,
bento=bento,
cluster=cluster,
access_authorization=access_authorization,
scaling_max=scaling_max,
scaling_min=scaling_min,
instance_type=instance_type,
strategy=strategy,
envs=convert_env_to_dict(env),
secrets=list(secret) if secret is not None else None,
config_file=config_file,
config_dict=cfg_dict,
cli=True,
)
try:
config_params.verify(create=False)
except BentoMLException as e:
raise_deployment_config_error(e, "apply")
deployment_info = _cloud_client.deployment.apply(
deployment_config_params=config_params
)
rich.print(
f"Deployment [green]'{deployment_info.name}'[/] applied successfully."
)
@deployment_command.command() @deployment_command.command()
@click.argument( @click.argument(
"bento", "bento",
...@@ -490,50 +132,6 @@ def build_deployment_command() -> click.Group: ...@@ -490,50 +132,6 @@ def build_deployment_command() -> click.Group:
type=click.STRING, type=click.STRING,
help="Deployment name", help="Deployment name",
) )
@click.option(
"--cluster",
type=click.STRING,
help="Name of the cluster",
)
@click.option(
"--access-authorization",
type=click.BOOL,
help="Enable access authorization",
)
@click.option(
"--scaling-min",
type=click.INT,
help="Minimum scaling value",
)
@click.option(
"--scaling-max",
type=click.INT,
help="Maximum scaling value",
)
@click.option(
"--instance-type",
type=click.STRING,
help="Type of instance",
)
@click.option(
"--strategy",
type=click.Choice(
[deployment_strategy.value for deployment_strategy in DeploymentStrategy]
),
help="Deployment strategy",
)
@click.option(
"--env",
type=click.STRING,
help="List of environment variables pass by --env key[=value] --env ...",
multiple=True,
)
@click.option(
"--secret",
type=click.STRING,
help="List of secret names pass by --secret name1, --secret name2, ...",
multiple=True,
)
@click.option( @click.option(
"-f", "-f",
"--config-file", "--config-file",
...@@ -541,12 +139,6 @@ def build_deployment_command() -> click.Group: ...@@ -541,12 +139,6 @@ def build_deployment_command() -> click.Group:
help="Configuration file path", help="Configuration file path",
default=None, default=None,
) )
@click.option(
"--config-dict",
type=click.STRING,
help="Configuration json string",
default=None,
)
@click.option( @click.option(
"--wait/--no-wait", "--wait/--no-wait",
type=click.BOOL, type=click.BOOL,
...@@ -560,19 +152,12 @@ def build_deployment_command() -> click.Group: ...@@ -560,19 +152,12 @@ def build_deployment_command() -> click.Group:
default=3600, default=3600,
help="Timeout for deployment to be ready in seconds", help="Timeout for deployment to be ready in seconds",
) )
@click.pass_context
def create( def create(
ctx: click.Context,
bento: str | None, bento: str | None,
name: str | None, name: str | None,
cluster: str | None,
access_authorization: bool | None,
scaling_min: int | None,
scaling_max: int | None,
instance_type: str | None,
strategy: str | None,
env: tuple[str] | None,
secret: tuple[str] | None,
config_file: str | t.TextIO | None, config_file: str | t.TextIO | None,
config_dict: str | None,
wait: bool, wait: bool,
timeout: int, timeout: int,
) -> None: ) -> None:
...@@ -584,182 +169,12 @@ def build_deployment_command() -> click.Group: ...@@ -584,182 +169,12 @@ def build_deployment_command() -> click.Group:
create_deployment( create_deployment(
bento=bento, bento=bento,
name=name, name=name,
cluster=cluster,
access_authorization=access_authorization,
scaling_min=scaling_min,
scaling_max=scaling_max,
instance_type=instance_type,
strategy=strategy,
env=env,
secret=secret,
config_file=config_file, config_file=config_file,
config_dict=config_dict,
wait=wait, wait=wait,
timeout=timeout, timeout=timeout,
args=ctx.args,
) )
@deployment_command.command()
@shared_decorator
@click.argument(
"name",
type=click.STRING,
required=True,
)
@output_option
def get( # type: ignore
name: str,
cluster: str | None,
output: t.Literal["json", "default"],
) -> None:
"""Get a deployment on BentoCloud."""
d = bentoml.deployment.get(name, cluster=cluster)
if output == "json":
info = json.dumps(d.to_dict(), indent=2, default=str)
rich.print_json(info)
else:
info = yaml.dump(d.to_dict(), indent=2, sort_keys=False)
rich.print(Syntax(info, "yaml", background_color="default"))
@deployment_command.command()
@shared_decorator
@click.argument(
"name",
type=click.STRING,
required=True,
)
@click.option(
"--wait", is_flag=True, help="Wait for the deployment to be terminated"
)
def terminate(name: str, cluster: str | None, wait: bool) -> None: # type: ignore
"""Terminate a deployment on BentoCloud."""
bentoml.deployment.terminate(name, cluster=cluster, wait=wait)
rich.print(f"Deployment [green]'{name}'[/] terminated successfully.")
@deployment_command.command()
@click.argument(
"name",
type=click.STRING,
required=True,
)
@shared_decorator
def delete(name: str, cluster: str | None) -> None: # type: ignore
"""Delete a deployment on BentoCloud."""
bentoml.deployment.delete(name, cluster=cluster)
rich.print(f"Deployment [green]'{name}'[/] deleted successfully.")
@deployment_command.command(name="list")
@click.option(
"--cluster", type=click.STRING, default=None, help="Name of the cluster."
)
@click.option(
"--search", type=click.STRING, default=None, help="Search for list request."
)
@click.option(
"-o",
"--output",
help="Display the output of this command.",
type=click.Choice(["json", "yaml", "table"]),
default="table",
)
@click.option(
"--label",
"labels",
type=click.STRING,
multiple=True,
default=None,
help="Filter deployments by label(s).",
metavar="KEY=VALUE",
)
def list_command( # type: ignore
cluster: str | None,
search: str | None,
labels: tuple[str, ...] | None,
output: t.Literal["json", "yaml", "table"],
) -> None:
"""List existing deployments on BentoCloud."""
if labels is not None:
# For labels like ["env=prod", "team=infra"]
# This will output: "label:env=prod label:team=infra"
labels_query = " ".join(f"label:{label}" for label in labels)
try:
d_list = bentoml.deployment.list(
cluster=cluster, search=search, q=labels_query
)
except BentoMLException as e:
raise_deployment_config_error(e, "list")
res: list[dict[str, t.Any]] = [d.to_dict() for d in d_list]
if output == "table":
table = Table(box=None, expand=True)
table.add_column("Deployment", overflow="fold")
table.add_column("created_at", overflow="fold")
table.add_column("Bento", overflow="fold")
table.add_column("Status", overflow="fold")
table.add_column("Region", overflow="fold")
for info in d_list:
table.add_row(
info.name,
info.created_at,
info.get_bento(refetch=False),
info.get_status(refetch=False).status,
info.cluster,
)
rich.print(table)
elif output == "json":
info = json.dumps(res, indent=2, default=str)
rich.print_json(info)
else:
info = yaml.dump(res, indent=2, sort_keys=False)
rich.print(Syntax(info, "yaml", background_color="default"))
@deployment_command.command()
@click.option(
"--cluster", type=click.STRING, default=None, help="Name of the cluster."
)
@click.option(
"-o",
"--output",
help="Display the output of this command.",
type=click.Choice(["json", "yaml", "table"]),
default="table",
)
@inject
def list_instance_types( # type: ignore
cluster: str | None,
output: t.Literal["json", "yaml", "table"],
_cloud_client: BentoCloudClient = Provide[BentoMLContainer.bentocloud_client],
) -> None:
"""List existing instance types in cluster on BentoCloud."""
try:
d_list = _cloud_client.deployment.list_instance_types(cluster=cluster)
except BentoMLException as e:
raise_deployment_config_error(e, "list_instance_types")
res: list[dict[str, t.Any]] = [d.to_dict() for d in d_list]
if output == "table":
table = Table(box=None, expand=True)
table.add_column("Name", overflow="fold")
table.add_column("Price", overflow="fold")
table.add_column("CPU", overflow="fold")
table.add_column("Memory", overflow="fold")
table.add_column("GPU", overflow="fold")
table.add_column("GPU Type", overflow="fold")
for info in d_list:
table.add_row(
info.name,
info.price,
info.cpu,
info.memory,
info.gpu,
info.gpu_type,
)
rich.print(table)
elif output == "json":
info = json.dumps(res, indent=2, default=str)
rich.print_json(info)
else:
info = yaml.dump(res, indent=2, sort_keys=False)
rich.print(Syntax(info, "yaml", background_color="default"))
return deployment_command return deployment_command
...@@ -770,38 +185,27 @@ deployment_command = build_deployment_command() ...@@ -770,38 +185,27 @@ deployment_command = build_deployment_command()
def create_deployment( def create_deployment(
bento: str | None = None, bento: str | None = None,
name: str | None = None, name: str | None = None,
cluster: str | None = None,
access_authorization: bool | None = None,
scaling_min: int | None = None,
scaling_max: int | None = None,
instance_type: str | None = None,
strategy: str | None = None,
env: tuple[str] | None = None,
secret: tuple[str] | None = None,
config_file: str | t.TextIO | None = None, config_file: str | t.TextIO | None = None,
config_dict: str | None = None,
wait: bool = True, wait: bool = True,
timeout: int = 3600, timeout: int = 3600,
dev: bool = False, dev: bool = False,
args: list[str] | None = None,
_cloud_client: BentoCloudClient = Provide[BentoMLContainer.bentocloud_client], _cloud_client: BentoCloudClient = Provide[BentoMLContainer.bentocloud_client],
) -> Deployment: ) -> Deployment:
cfg_dict = None # Load config from file and serialize to env
if config_dict is not None and config_dict != "": service_configs = resolve_service_config(config_file=config_file, args=args)
cfg_dict = json.loads(config_dict) print(f"service_configs: {service_configs}")
env_dicts = []
if service_configs:
config_json = json.dumps(service_configs)
logger.info(f"Deployment service configuration: {config_json}")
env_dicts.append({"name": "DYN_DEPLOYMENT_CONFIG", "value": config_json})
config_params = DeploymentConfigParameters( config_params = DeploymentConfigParameters(
name=name, name=name,
bento=bento, bento=bento,
cluster=cluster, envs=env_dicts,
access_authorization=access_authorization, secrets=None,
scaling_max=scaling_max,
scaling_min=scaling_min,
instance_type=instance_type,
strategy=strategy,
envs=convert_env_to_dict(env),
secrets=list(secret) if secret is not None else None,
config_file=config_file,
config_dict=cfg_dict,
cli=True, cli=True,
dev=dev, dev=dev,
) )
......
...@@ -17,7 +17,6 @@ ...@@ -17,7 +17,6 @@
from __future__ import annotations from __future__ import annotations
import collections
import json import json
import logging import logging
import os import os
...@@ -27,7 +26,8 @@ from typing import Optional ...@@ -27,7 +26,8 @@ from typing import Optional
import click import click
import rich import rich
import yaml
from .utils import resolve_service_config
if t.TYPE_CHECKING: if t.TYPE_CHECKING:
P = t.ParamSpec("P") # type: ignore P = t.ParamSpec("P") # type: ignore
...@@ -36,96 +36,6 @@ if t.TYPE_CHECKING: ...@@ -36,96 +36,6 @@ if t.TYPE_CHECKING:
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def _parse_service_arg(arg_name: str, arg_value: str) -> tuple[str, str, t.Any]:
"""Parse a single CLI argument into service name, key, and value."""
parts = arg_name.split(".")
service = parts[0]
nested_keys = parts[1:]
# Special case: if this is a ServiceArgs.envs.* path, keep value as string
if (
len(nested_keys) >= 2
and nested_keys[0] == "ServiceArgs"
and nested_keys[1] == "envs"
):
value: t.Union[str, int, float, bool, dict, list] = arg_value
else:
# Parse value based on type for non-env vars
try:
value = json.loads(arg_value)
except json.JSONDecodeError:
if arg_value.isdigit():
value = int(arg_value)
elif arg_value.replace(".", "", 1).isdigit() and arg_value.count(".") <= 1:
value = float(arg_value)
elif arg_value.lower() in ("true", "false"):
value = arg_value.lower() == "true"
else:
value = arg_value
# Build nested dict structure
result = value
for key in reversed(nested_keys[1:]):
result = {key: result}
return service, nested_keys[0], result
def _parse_service_args(args: list[str]) -> t.Dict[str, t.Any]:
service_configs: t.DefaultDict[str, t.Dict[str, t.Any]] = collections.defaultdict(
dict
)
def deep_update(d: dict, key: str, value: t.Any):
"""
Recursively updates nested dictionaries. We use this to process arguments like
---Worker.ServiceArgs.env.CUDA_VISIBLE_DEVICES="0,1"
The _parse_service_arg function will parse this into:
service = "Worker"
nested_keys = ["ServiceArgs", "envs", "CUDA_VISIBLE_DEVICES"]
And returns returns: ("VllmWorker", "ServiceArgs", {"envs": {"CUDA_VISIBLE_DEVICES": "0,1"}})
We then use deep_update to update the service_configs dictionary with this nested value.
"""
if isinstance(value, dict) and key in d and isinstance(d[key], dict):
for k, v in value.items():
deep_update(d[key], k, v)
else:
d[key] = value
index = 0
while index < len(args):
next_arg = args[index]
if not (next_arg.startswith("--") or "." not in next_arg):
continue
try:
if "=" in next_arg:
arg_name, arg_value = next_arg.split("=", 1)
index += 1
elif args[index + 1] == "=":
arg_name = next_arg
arg_value = args[index + 2]
index += 3
else:
arg_name = next_arg
arg_value = args[index + 1]
index += 2
if arg_value.startswith("-"):
raise ValueError("Service arg value can not start with -")
arg_name = arg_name[2:]
service, key, value = _parse_service_arg(arg_name, arg_value)
deep_update(service_configs[service], key, value)
except Exception:
raise ValueError(f"Error parsing service arg: {args[index]}")
return service_configs
def build_serve_command() -> click.Group: def build_serve_command() -> click.Group:
from dynamo.sdk.lib.logging import configure_server_logging from dynamo.sdk.lib.logging import configure_server_logging
...@@ -215,27 +125,8 @@ def build_serve_command() -> click.Group: ...@@ -215,27 +125,8 @@ def build_serve_command() -> click.Group:
from dynamo.sdk.lib.service import LinkedServices from dynamo.sdk.lib.service import LinkedServices
service_configs: dict[str, dict[str, t.Any]] = {} # Resolve service configs from yaml file, command line args into a python dict
service_configs = resolve_service_config(file, ctx.args)
# Load file if provided
if file:
with open(file) as f:
yaml_configs = yaml.safe_load(f)
# Initialize service_configs as empty dict if it's None
# Convert nested YAML structure to flat dict with dot notation
for service, configs in yaml_configs.items():
if service not in service_configs:
service_configs[service] = {}
for key, value in configs.items():
service_configs[service][key] = value
# Process service-specific options
cmdline_overrides: t.Dict[str, t.Any] = _parse_service_args(ctx.args)
for service, configs in cmdline_overrides.items():
if service not in service_configs:
service_configs[service] = {}
for key, value in configs.items():
service_configs[service][key] = value
# Process depends # Process depends
if depends: if depends:
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
# limitations under the License. # limitations under the License.
# Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES # Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES
import collections
import contextlib import contextlib
import json import json
import logging import logging
...@@ -26,8 +27,13 @@ import typing as t ...@@ -26,8 +27,13 @@ import typing as t
import click import click
import psutil import psutil
import yaml
from click import Command, Context from click import Command, Context
from dynamo.sdk.lib.logging import configure_server_logging
configure_server_logging()
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
DYN_LOCAL_STATE_DIR = "DYN_LOCAL_STATE_DIR" DYN_LOCAL_STATE_DIR = "DYN_LOCAL_STATE_DIR"
...@@ -182,3 +188,150 @@ def save_dynamo_state( ...@@ -182,3 +188,150 @@ def save_dynamo_state(
json.dump(state, f) json.dump(state, f)
logger.warning(f"Saved state to {state_file}") logger.warning(f"Saved state to {state_file}")
def _parse_service_arg(arg_name: str, arg_value: str) -> tuple[str, str, t.Any]:
"""Parse a single CLI argument into service name, key, and value."""
parts = arg_name.split(".")
service = parts[0]
nested_keys = parts[1:]
# Special case: if this is a ServiceArgs.envs.* path, keep value as string
if (
len(nested_keys) >= 2
and nested_keys[0] == "ServiceArgs"
and nested_keys[1] == "envs"
):
value: t.Union[str, int, float, bool, dict, list] = arg_value
else:
# Parse value based on type for non-env vars
try:
value = json.loads(arg_value)
except json.JSONDecodeError:
if arg_value.isdigit():
value = int(arg_value)
elif arg_value.replace(".", "", 1).isdigit() and arg_value.count(".") <= 1:
value = float(arg_value)
elif arg_value.lower() in ("true", "false"):
value = arg_value.lower() == "true"
else:
value = arg_value
# Build nested dict structure
result = value
for key in reversed(nested_keys[1:]):
result = {key: result}
return service, nested_keys[0], result
def _parse_service_args(args: list[str]) -> t.Dict[str, t.Any]:
service_configs: t.DefaultDict[str, t.Dict[str, t.Any]] = collections.defaultdict(
dict
)
def deep_update(d: dict, key: str, value: t.Any):
"""
Recursively updates nested dictionaries. We use this to process arguments like
---Worker.ServiceArgs.env.CUDA_VISIBLE_DEVICES="0,1"
The _parse_service_arg function will parse this into:
service = "Worker"
nested_keys = ["ServiceArgs", "envs", "CUDA_VISIBLE_DEVICES"]
And returns: ("VllmWorker", "ServiceArgs", {"envs": {"CUDA_VISIBLE_DEVICES": "0,1"}})
We then use deep_update to update the service_configs dictionary with this nested value.
"""
if isinstance(value, dict) and key in d and isinstance(d[key], dict):
for k, v in value.items():
deep_update(d[key], k, v)
else:
d[key] = value
index = 0
while index < len(args):
next_arg = args[index]
if not (next_arg.startswith("--") or "." not in next_arg):
continue
try:
if "=" in next_arg:
arg_name, arg_value = next_arg.split("=", 1)
index += 1
elif args[index + 1] == "=":
arg_name = next_arg
arg_value = args[index + 2]
index += 3
else:
arg_name = next_arg
arg_value = args[index + 1]
index += 2
if arg_value.startswith("-"):
raise ValueError("Service arg value can not start with -")
arg_name = arg_name[2:]
service, key, value = _parse_service_arg(arg_name, arg_value)
deep_update(service_configs[service], key, value)
except Exception:
raise ValueError(f"Error parsing service arg: {args[index]}")
return service_configs
def resolve_service_config(
config_file: str | t.TextIO | None = None,
args: list[str] | None = None,
) -> dict[str, dict[str, t.Any]]:
"""Resolve service configuration from file and command line arguments.
Args:
config_file: Path to YAML config file or file object
args: List of command line arguments
Returns:
Dictionary mapping service names to their configurations
"""
service_configs: dict[str, dict[str, t.Any]] = {}
# Check for deployment config first
if "DYN_DEPLOYMENT_CONFIG" in os.environ:
try:
deployment_config = yaml.safe_load(os.environ["DYN_DEPLOYMENT_CONFIG"])
# Use deployment config directly
service_configs = deployment_config
logger.info(f"Successfully loaded deployment config: {service_configs}")
logger.warning(
"DYN_DEPLOYMENT_CONFIG found in environment - ignoring configuration file and command line arguments"
)
except Exception as e:
logger.warning(f"Failed to parse DYN_DEPLOYMENT_CONFIG: {e}")
else:
# Load file if provided
if config_file:
with open(config_file) if isinstance(
config_file, str
) else contextlib.nullcontext(config_file) as f:
yaml_configs = yaml.safe_load(f)
logger.debug(f"Loaded config from file: {yaml_configs}")
# Initialize service_configs as empty dict if it's None
# Convert nested YAML structure to flat dict with dot notation
for service, configs in yaml_configs.items():
if service not in service_configs:
service_configs[service] = {}
for key, value in configs.items():
service_configs[service][key] = value
# Process service-specific options
if args:
cmdline_overrides = _parse_service_args(args)
logger.debug(f"Applying command line overrides: {cmdline_overrides}")
for service, configs in cmdline_overrides.items():
if service not in service_configs:
service_configs[service] = {}
for key, value in configs.items():
service_configs[service][key] = value
logger.debug(f"Final resolved config: {service_configs}")
return service_configs
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Frontend:
message: "earth"
port: 8000
ServiceArgs:
workers: 1
resources:
cpu: "1"
Middle:
message: "moon"
ServiceArgs:
workers: 1
resources:
cpu: "1"
Backend:
message: "mars"
ServiceArgs:
workers: 2
resources:
cpu: "1"
...@@ -18,7 +18,11 @@ import logging ...@@ -18,7 +18,11 @@ import logging
from pydantic import BaseModel from pydantic import BaseModel
from dynamo.sdk import DYNAMO_IMAGE, api, depends, dynamo_endpoint, service from dynamo.sdk import DYNAMO_IMAGE, api, depends, dynamo_endpoint, service
from dynamo.sdk.lib.config import ServiceConfig
from dynamo.sdk.lib.logging import configure_server_logging
# Configure logging
configure_server_logging()
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
""" """
...@@ -61,13 +65,16 @@ class ResponseType(BaseModel): ...@@ -61,13 +65,16 @@ class ResponseType(BaseModel):
class Backend: class Backend:
def __init__(self) -> None: def __init__(self) -> None:
logger.info("Starting backend") logger.info("Starting backend")
config = ServiceConfig.get_instance()
self.message = config.get("Backend", {}).get("message", "back")
logger.info(f"Backend config message: {self.message}")
@dynamo_endpoint() @dynamo_endpoint()
async def generate(self, req: RequestType): async def generate(self, req: RequestType):
"""Generate tokens.""" """Generate tokens."""
req_text = req.text req_text = req.text
logger.info(f"Backend received: {req_text}") logger.info(f"Backend received: {req_text}")
text = f"{req_text}-back" text = f"{req_text}-{self.message}"
for token in text.split(): for token in text.split():
yield f"Backend: {token}" yield f"Backend: {token}"
...@@ -81,13 +88,16 @@ class Middle: ...@@ -81,13 +88,16 @@ class Middle:
def __init__(self) -> None: def __init__(self) -> None:
logger.info("Starting middle") logger.info("Starting middle")
config = ServiceConfig.get_instance()
self.message = config.get("Middle", {}).get("message", "mid")
logger.info(f"Middle config message: {self.message}")
@dynamo_endpoint() @dynamo_endpoint()
async def generate(self, req: RequestType): async def generate(self, req: RequestType):
"""Forward requests to backend.""" """Forward requests to backend."""
req_text = req.text req_text = req.text
logger.info(f"Middle received: {req_text}") logger.info(f"Middle received: {req_text}")
text = f"{req_text}-mid" text = f"{req_text}-{self.message}"
next_request = RequestType(text=text).model_dump_json() next_request = RequestType(text=text).model_dump_json()
async for response in self.backend.generate(next_request): async for response in self.backend.generate(next_request):
logger.info(f"Middle received response: {response}") logger.info(f"Middle received response: {response}")
...@@ -101,14 +111,19 @@ class Frontend: ...@@ -101,14 +111,19 @@ class Frontend:
middle = depends(Middle) middle = depends(Middle)
def __init__(self) -> None: def __init__(self) -> None:
print("Starting frontend") logger.info("Starting frontend")
config = ServiceConfig.get_instance()
self.message = config.get("Frontend", {}).get("message", "front")
self.port = config.get("Frontend", {}).get("port", 8000)
logger.info(f"Frontend config message: {self.message}")
logger.info(f"Frontend config port: {self.port}")
@api @api
async def generate(self, text): async def generate(self, text):
"""Stream results from the pipeline.""" """Stream results from the pipeline."""
print(f"Frontend received: {text}") logger.info(f"Frontend received: {text}")
print(f"Frontend received type: {type(text)}") logger.info(f"Frontend received type: {type(text)}")
txt = RequestType(text=text) txt = RequestType(text=text)
print(f"Frontend sending: {type(txt)}") logger.info(f"Frontend sending: {type(txt)}")
async for response in self.middle.generate(txt.model_dump_json()): async for response in self.middle.generate(txt.model_dump_json()):
yield f"Frontend: {response}" yield f"Frontend: {response}"
...@@ -30,6 +30,7 @@ dependencies = [ ...@@ -30,6 +30,7 @@ dependencies = [
"types-psutil==7.0.0.20250218", "types-psutil==7.0.0.20250218",
"kubernetes==32.0.1", "kubernetes==32.0.1",
"ai-dynamo-runtime==0.1.1", "ai-dynamo-runtime==0.1.1",
"distro",
] ]
classifiers = [ classifiers = [
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment