"git@developer.sourcefind.cn:OpenDAS/openpcdet.git" did not exist on "093efb9a81af77aac0f396b157f11cd5a197fa74"
Unverified Commit 403344e5 authored by hhzhang16's avatar hhzhang16 Committed by GitHub
Browse files

refactor: refactor dynamo deploy subfolder (#927)

parent 99cd9d85
......@@ -53,10 +53,10 @@ cargo build --locked --profile dev --features mistralrs,sglang,vllm,python
cargo doc --no-deps
# create symlinks for the binaries in the deploy directory
mkdir -p $HOME/dynamo/deploy/dynamo/sdk/src/dynamo/sdk/cli/bin
ln -sf $HOME/dynamo/.build/target/debug/dynamo-run $HOME/dynamo/deploy/dynamo/sdk/src/dynamo/sdk/cli/bin/dynamo-run
ln -sf $HOME/dynamo/.build/target/debug/http $HOME/dynamo/deploy/dynamo/sdk/src/dynamo/sdk/cli/bin/http
ln -sf $HOME/dynamo/.build/target/debug/llmctl $HOME/dynamo/deploy/dynamo/sdk/src/dynamo/sdk/cli/bin/llmctl
mkdir -p $HOME/dynamo/deploy/sdk/src/dynamo/sdk/cli/bin
ln -sf $HOME/dynamo/.build/target/debug/dynamo-run $HOME/dynamo/deploy/sdk/src/dynamo/sdk/cli/bin/dynamo-run
ln -sf $HOME/dynamo/.build/target/debug/http $HOME/dynamo/deploy/sdk/src/dynamo/sdk/cli/bin/http
ln -sf $HOME/dynamo/.build/target/debug/llmctl $HOME/dynamo/deploy/sdk/src/dynamo/sdk/cli/bin/llmctl
# install the python bindings in editable mode
cd $HOME/dynamo/lib/bindings/python && retry uv pip install -e .
......
......@@ -90,4 +90,4 @@ TensorRT-LLM
# Local build artifacts for devcontainer
.build/
# Copied binaries to ignore
deploy/dynamo/sdk/src/dynamo/sdk/cli/bin
\ No newline at end of file
deploy/sdk/src/dynamo/sdk/cli/bin
\ No newline at end of file
......@@ -13,9 +13,6 @@ Cargo.toml @ryanolson @grahamking @paulhendricks @biswapanda @tmonty12 @guanluo
# Container/Environments
/container/ @rmccorm4 @tanmayv25 @ptarasiewiczNV @ishandhanani @alec-flowers @nnshah1 @ai-dynamo/Devops
# Kubernetes
/deploy/Kubernetes/ @whoisj @hutm @biswapanda @mohammedabdulwahhab
# Dynamo deploy
/deploy/ @hutm @biswapanda @ishandhanani @julienmancuso @hhzhang16 @nnshah1 @mohammedabdulwahhab
......
......@@ -26,7 +26,7 @@ golang-base:
operator-src:
FROM +golang-base
COPY ./deploy/dynamo/operator /artifacts/operator
COPY ./deploy/cloud/operator /artifacts/operator
SAVE ARTIFACT /artifacts/operator
......@@ -116,13 +116,13 @@ dynamo-build:
cargo doc --no-deps
# Create symlinks for wheel building
RUN mkdir -p /workspace/deploy/dynamo/sdk/src/dynamo/sdk/cli/bin/ && \
RUN mkdir -p /workspace/deploy/sdk/src/dynamo/sdk/cli/bin/ && \
# Remove existing symlinks
rm -f /workspace/deploy/dynamo/sdk/src/dynamo/sdk/cli/bin/* && \
rm -f /workspace/deploy/sdk/src/dynamo/sdk/cli/bin/* && \
# Create new symlinks pointing to the correct location
ln -sf /workspace/target/release/dynamo-run /workspace/deploy/dynamo/sdk/src/dynamo/sdk/cli/bin/dynamo-run && \
ln -sf /workspace/target/release/http /workspace/deploy/dynamo/sdk/src/dynamo/sdk/cli/bin/http && \
ln -sf /workspace/target/release/llmctl /workspace/deploy/dynamo/sdk/src/dynamo/sdk/cli/bin/llmctl
ln -sf /workspace/target/release/dynamo-run /workspace/deploy/sdk/src/dynamo/sdk/cli/bin/dynamo-run && \
ln -sf /workspace/target/release/http /workspace/deploy/sdk/src/dynamo/sdk/cli/bin/http && \
ln -sf /workspace/target/release/llmctl /workspace/deploy/sdk/src/dynamo/sdk/cli/bin/llmctl
RUN cd /workspace/lib/bindings/python && \
......@@ -174,16 +174,16 @@ dynamo-base-docker:
############### ALL TARGETS ##############################
all-test:
BUILD ./deploy/dynamo/operator+test
BUILD ./deploy/cloud/operator+test
all-docker:
ARG DOCKER_SERVER=my-registry
ARG IMAGE_TAG=latest
BUILD ./deploy/dynamo/operator+docker --DOCKER_SERVER=$DOCKER_SERVER --IMAGE_TAG=$IMAGE_TAG
BUILD ./deploy/dynamo/api-store+docker --DOCKER_SERVER=$DOCKER_SERVER --IMAGE_TAG=$IMAGE_TAG
BUILD ./deploy/cloud/operator+docker --DOCKER_SERVER=$DOCKER_SERVER --IMAGE_TAG=$IMAGE_TAG
BUILD ./deploy/cloud/api-store+docker --DOCKER_SERVER=$DOCKER_SERVER --IMAGE_TAG=$IMAGE_TAG
all-lint:
BUILD ./deploy/dynamo/operator+lint
BUILD ./deploy/cloud/operator+lint
all:
BUILD +all-test
......
......@@ -21,7 +21,7 @@ limitations under the License.
[![GitHub Release](https://img.shields.io/github/v/release/ai-dynamo/dynamo)](https://github.com/ai-dynamo/dynamo/releases/latest)
[![Discord](https://dcbadge.limes.pink/api/server/D92uqZRjCZ?style=flat)](https://discord.gg/nvidia-dynamo)
| **[Roadmap](https://github.com/ai-dynamo/dynamo/issues/762)** | **[Support Matrix](support_matrix.md)** | **[Guides](docs/guides)** | **[Architecture and Features](docs/architecture.md)** | **[APIs](lib/bindings/python/README.md)** | **[SDK](deploy/dynamo/sdk/README.md)** |
| **[Roadmap](https://github.com/ai-dynamo/dynamo/issues/762)** | **[Support Matrix](support_matrix.md)** | **[Guides](docs/guides)** | **[Architecture and Features](docs/architecture.md)** | **[APIs](lib/bindings/python/README.md)** | **[SDK](deploy/sdk/README.md)** |
NVIDIA Dynamo is a high-throughput low-latency inference framework designed for serving generative AI and reasoning models in multi-node distributed environments. Dynamo is designed to be inference engine agnostic (supports TRT-LLM, vLLM, SGLang or others) and captures LLM-specific capabilities such as:
......@@ -112,7 +112,7 @@ example.
First start the Dynamo Distributed Runtime services:
```bash
docker compose -f deploy/docker-compose.yml up -d
docker compose -f deploy/metrics/docker-compose.yml up -d
```
#### Start Dynamo LLM Serving Components
......@@ -151,13 +151,13 @@ Otherwise, to develop locally, we recommend working inside of the container
./container/run.sh -it --mount-workspace
cargo build --release
mkdir -p /workspace/deploy/dynamo/sdk/src/dynamo/sdk/cli/bin
cp /workspace/target/release/http /workspace/deploy/dynamo/sdk/src/dynamo/sdk/cli/bin
cp /workspace/target/release/llmctl /workspace/deploy/dynamo/sdk/src/dynamo/sdk/cli/bin
cp /workspace/target/release/dynamo-run /workspace/deploy/dynamo/sdk/src/dynamo/sdk/cli/bin
mkdir -p /workspace/deploy/sdk/src/dynamo/sdk/cli/bin
cp /workspace/target/release/http /workspace/deploy/sdk/src/dynamo/sdk/cli/bin
cp /workspace/target/release/llmctl /workspace/deploy/sdk/src/dynamo/sdk/cli/bin
cp /workspace/target/release/dynamo-run /workspace/deploy/sdk/src/dynamo/sdk/cli/bin
uv pip install -e .
export PYTHONPATH=$PYTHONPATH:/workspace/deploy/dynamo/sdk/src:/workspace/components/planner/src
export PYTHONPATH=$PYTHONPATH:/workspace/deploy/sdk/src:/workspace/components/planner/src
```
......@@ -180,7 +180,7 @@ cd ../../../
pip install .[all]
# To test
docker compose -f deploy/docker-compose.yml up -d
docker compose -f deploy/metrics/docker-compose.yml up -d
cd examples/llm
dynamo serve graphs.agg:Frontend -f configs/agg.yaml
```
......@@ -90,7 +90,7 @@ To visualize the metrics being exposed on the Prometheus endpoint,
see the Prometheus and Grafana configurations in
[deploy/metrics](../../deploy/metrics):
```bash
docker compose -f deploy/docker-compose.yml --profile metrics up -d
docker compose -f deploy/metrics/docker-compose.yml --profile metrics up -d
```
## Metrics Collection Modes
......@@ -167,7 +167,6 @@ To view the metrics hosted on the PushGateway:
# curl http://<pushgateway_ip>:<pushgateway_port>/metrics
curl 127.0.0.1:9091/metrics
```
## Building/Running from Source
For easy iteration while making edits to the metrics component, you can use `cargo run`
......@@ -177,3 +176,4 @@ to build and run with your local changes:
cargo run --bin metrics -- --component my_component --endpoint my_endpoint
```
......@@ -170,7 +170,7 @@ COPY hatch_build.py /workspace/
COPY lib/ /workspace/lib/
COPY components /workspace/components
COPY launch /workspace/launch
COPY deploy/dynamo/sdk /workspace/deploy/dynamo/sdk
COPY deploy/sdk /workspace/deploy/sdk
# Build Rust crate binaries packaged with the wheel
RUN cargo build --release --locked --features mistralrs,sglang,vllm,python \
......
......@@ -302,7 +302,7 @@ COPY hatch_build.py /workspace/
COPY lib/ /workspace/lib/
COPY components /workspace/components
COPY launch /workspace/launch
COPY deploy/dynamo/sdk /workspace/deploy/dynamo/sdk
COPY deploy/sdk /workspace/deploy/sdk
# Build Rust crate binaries packaged with the wheel
RUN cargo build --release --locked --features mistralrs,sglang,vllm,python \
......
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set-strictmode -version latest
$global:_init_path = "${env:PWD}"
$global:_is_debug = $null
$global:_git_branch = $null
$global:_local = $null
$global:_local_srcdir = $null
$global:_repository_root = $null
$global:_verbosity = $null
$global:colors = @{
error = 'Red'
high = 'Cyan'
low = 'DarkGray'
medium = 'DarkBlue'
test = @{
failed = 'Red'
passed = 'Green'
}
title = 'Blue'
warning = 'Yellow'
}
function cleanup_after {
write-debug "<cleanup_after>"
$(reset_environment)
$global:DebugPreference = 'SilentlyContinue'
}
function create_directory([string] $path, [switch] $recreate) {
write-debug "<create_directory> path = '${path}'."
write-debug "<create_directory> recreate = ${recreate}"
$path_local = $(to_local_path $path)
write-debug "<ensure_directory> path_local = '${path_local}'."
if (test-path $path_local -pathType Container) {
if ($recreate) {
remove-item $path_local -Recurse | out-null
new-item $path_local -itemtype Directory | out-null
}
}
else {
new-item $path_local -itemtype Directory | out-null
}
}
function default_is_debug {
$value = $false
write-debug "<default_is_debug> -> ${value}."
return $value
}
function default_git_branch {
if (is_installed 'git') {
$value = "$(git branch --show-current)"
}
else {
$value = 'main'
}
write-debug "<default_git_branch> -> '${value}'."
return $value
}
function default_local_srcdir {
$value = $(& git rev-parse --show-toplevel)
write-debug "<default_local_srcdir> -> '${value}'."
return $value;
}
function default_verbosity {
$value = 'NORMAL'
write-debug "<default_verbosity> -> '${value}'."
return $value
}
function env_get_is_debug {
$value = $env:NVBUILD_DEBUG_TRACE
if (('true' -ieq $value) -or ('1' -eq $value) -or ('yes' -ieq $value)) {
$value = $true
}
elseif (('false' -ieq $value) -or ('0' -eq $value) -or ('no' -ieq $value)) {
$value = $false
}
# value can be $null, $true, or $false
write-debug "<env_get_is_debug> -> $(value_or_default $value '<null>')."
return $value
}
function env_get_git_branch {
$value = $env:NVBUILD_GIT_BRANCH
write-debug "<env_get_git_branch> -> '${value}'."
return $value
}
function env_get_local_srcdir {
$value = $env:NVBUILD_LOCAL_SRCDIR
write-debug "<env_get_local_srcdir> -> '${value}'."
return $value
}
function env_get_verbosity {
$value = $env:NVBUILD_VERBOSITY
write-debug "<env_get_verbosity> -> '${value}'."
return $value
}
function env_set_git_branch([string] $value) {
if ($null -eq $env:NVBUILD_NOSET) {
write-debug "<env_set_git_branch> value: '${value}'."
$env:NVBUILD_GIT_BRANCH = $value
}
}
function env_set_local_srcdir([string] $value) {
if ($null -eq $env:NVBUILD_NOSET) {
write-debug "<env_set_local_srcdir> value: '${value}'."
$env:NVBUILD_LOCAL_SRCDIR = $value
}
}
function env_set_verbosity([string] $value) {
if ($null -eq $env:NVBUILD_NOSET) {
write-debug "<env_set_verbosity> value: '${value}'."
$env:NVBUILD_VERBOSITY = $value
}
}
function fatal_exit([string] $message) {
write-error "fatal: ${message}"
cleanup_after
exit 1
}
function get_is_debug {
if ($null -eq $global:_is_debug) {
$value = $(env_get_is_debug)
if ($null -ne $value) {
set_is_debug $value
}
else {
set_is_debug $(default_is_debug)
}
}
write-debug "<get_is_debug> -> ${global:_is_debug}."
return $global:_is_debug
}
function get_git_branch {
if ($null -eq $global:_git_branch) {
$value = $(env_set_git_branch)
if ($null -ne $value) {
set_git_branch $value
}
else {
set_git_branch $(default_git_branch)
}
}
write-debug "<get_git_branch> -> '${global:_git_branch}'."
return $global:_git_branch
}
function get_local_srcdir {
if ($null -eq $global:_local_srcdir) {
$value = $(env_get_local_srcdir)
if ($null -ne $value) {
set_local_srcdir $value
}
else {
set_local_srcdir $(default_local_srcdir)
}
}
write-debug "<get_local_srcdir> -> '${global:_local_srcdir}'."
return $global:_local_srcdir
}
function get_repository_root {
if ($null -eq $global:_repository_root) {
$global:_repository_root = $(& git rev-parse --show-toplevel)
}
write-debug "<get_repository_root> '${global:_repository_root}'."
return $global:_repository_root
}
function get_verbosity {
if ($null -eq $global:_verbosity) {
$value = $(env_get_verbosity)
if ($null -ne $value) {
set_verbosity $value
}
else {
set_verbosity $(default_verbosity)
}
}
write-debug "<get_verbosity> -> '${global:_verbosity}'."
return $global:_verbosity
}
function is_empty([string] $value) {
return [System.String]::IsNullOrWhiteSpace($value)
}
function is_git_ignored([string] $path) {
$repo_root = $(get_repository_root)
if (starts_with $path $repo_root) {
$path = $path.substring($(len $repo_root))
}
if (starts_with $path '/') {
$path = $path.substring(1)
}
$result = $(& git check-ignore $path)
return (0 -eq $result)
}
function is_installed([string] $command) {
write-debug "<is_installed> command = '${command}'."
$out = $null -ne $(get-command "${command}" -errorAction SilentlyContinue)
write-debug "<is_installed> -> ${out}."
return $out
}
function is_tty {
return -not(([System.Console]::IsOutputRedirected) -or ([System.Console]::IsErrorRedirected))
}
function is_verbosity_valid([string] $value) {
return (('NORMAL' -eq $value) -or ('MINIMAL' -eq $value) -or ('DETAILED' -eq $value))
}
function normalize_path([string] $path) {
write-debug "<normalize-path> path: '${path}'."
# $out = $path
# if (-not [System.IO.Path]::IsPathRooted($path)) {
# $out = [System.IO.Path]::GetFullPath($path)
# }
$out = resolve-path "${path}"
write-debug "<normalize-path> '${path}' -> '${out}'."
return $out
}
function read_content([string] $path, [switch] $lines, [switch] $bytes) {
if (is_empty $path) {
throw 'Argument `path` cannot be `null` or empty.'
}
if ($lines -and $bytes) {
throw 'Arguments `lines` and `bytes` are mutually exclusive.'
}
write-debug "<read_content> path: '${path}'."
write-debug "<read_content> bytes: ${bytes}."
write-debug "<read_content> lines: ${lines}."
$path = $(to_local_path $path)
if ($bytes) {
return get-content -path $path -asbytestream -raw
}
if ($lines)
{
return get-content -path $path
}
return get-content -path $path -raw
}
function reset_environment {
write-debug "<reset_environment>"
$overrides = @()
foreach ($entry in $(& get-childitem env:)) {
# We're only looking for environment variables which are used directly by the build scripts (starts with 'NVBUILD_`);
# and we're looking at environment variables which would indirectly affect the build scripts (i.e. `PATH`).
if (starts_with $entry.key 'NVBUILD_') {
$overrides += $entry
}
}
if ($(len $overrides) -gt 0) {
foreach ($entry in $overrides) {
$expression = '$env:' + "$($entry.Key)" + ' = $null'
invoke-expression "${expression}"
if ("$($entry.Key)" -ne 'NVBUILD_NOSET') {
write-debug "<reset_environment> removed '$($entry.Key)'."
}
}
}
}
function run([string] $command) {
if ($null -eq $command) {
throw 'Argument `command` cannot be `null`.'
}
write-debug "<run> command = '${command}'."
if ('MINIMAL' -ne $(get_verbosity)) {
write-high "${command}"
}
invoke-expression "${command}" | out-default
$exit_code = $LASTEXITCODE
write-debug "<run> exit_code = ${exit_code}."
if ($exit_code -ne 0) {
write-error "fatal: Command ""${command}"" failed, returned ${exit_code}." -category fromStdErr
exit $exit_code
}
}
function set_is_debug([bool] $value) {
write-debug "<set_is_debug> value = '${value}'."
$global:_is_debug = $value
if ($value) {
$global:DebugPreference = 'Continue'
}
else {
$global:DebugPreference = 'SilentlyContinue'
}
}
function set_git_branch([string] $value) {
write-debug "<set_git_branch> value = '${value}'."
$global:_git_branch = $value
env_set_git_branch $value
}
function set_local_srcdir([string] $value) {
write-debug "<set_local_srcdir> value: '${value}'."
$global:_local_srcdir = $value
env_set_local_srcdir $value
}
function set_verbosity([string] $value) {
write-debug "<set_verbosity> '${value}'."
if (-not(is_verbosity_valid $value)) {
throw "Invalid verbosity value '${value}'."
}
$global:_verbosity = $value
env_set_verbosity $value
}
function len([object] $value) {
if ($null -eq $value) {
return 0
}
$type = $(typeof $value)
write-debug "<len> type: '${type}'."
if ($type.endswith('[]') -or ('hashtable' -eq $type)) {
return $value.count
}
if ('string' -eq $type) {
return $value.length
}
return 0
}
function starts_with([string] $value, [string] $prefix) {
if ('string' -ne $(typeof $value)) {
throw 'Argument `value` must be a string.'
}
return $value.startswith($prefix)
}
function to_local_path([string] $path) {
write-debug "<to_local_path> path: '${path}'."
if ($null -eq $path) {
return $(get_local_srcdir)
}
$out = $path.trim()
$out = $out.trim('/','\')
$out = join-path $(get_local_srcdir) $out
$out = $(normalize_path $out)
return $out
}
function to_lower([string] $value) {
if ('string' -ne $(typeof $value)) {
return $value
}
return $value.tolower()
}
function typeof([object] $object, [switch] $full_name = $false) {
if ($null -eq $object) {
return 'null'
}
# Cannot use the `to_lower` function here, as it would cause a recursion failure.
if ($full_name) {
return $object.gettype().fullname.tolower()
}
return $object.gettype().name.tolower()
}
function usage_exit([string] $message) {
write-error "usage: $message"
cleanup_after
exit 254
}
function value_or_default([object] $value, [object] $default) {
if ($null -eq $value) {
return $default
}
if (('int32' -eq $(typeof $value)) -and ($value -eq 0)) {
return $default
}
if (('double' -eq $(typeof $value)) -and ($value -eq 0.0)) {
return $default
}
if (('string' -eq $(typeof $value)) -and ($value.length -eq 0)) {
return $default
}
if (('array' -eq $(typeof $value)) -and ($value.count -eq 0)) {
return $default
}
if (('hashtable' -eq $(typeof $value)) -and ($value.count -eq 0)) {
return $default
}
return $value
}
function write_content([string] $content, [string] $path, [switch] $overwrite) {
if ($null -eq $content) {
throw 'Argument `content` cannot be `null`.'
}
if (is_empty $path) {
throw 'Argument `path` cannot be `null` or empty.'
}
write-debug "<write_content> content = $($content.length) bytes."
write-debug "<write_content> path = '${path}'."
$path_local = $(to_local_path $path)
write-debug "<write-content> '${path_local}'."
if ($null -eq $content) {
$content = ''
}
if ($overwrite -and (test-path $path_local)) {
remove-item $path_local | out-null
}
$content | out-file $path_local
}
function __write([string] $value, [string] $color, [bool] $no_newline) {
if ($null -eq $value) {
return
}
if (is_tty) {
$opts = @{
NoNewline = $no_newline
}
if (($null -ne $color) -and ($(len $color) -gt 0)) {
$opts.ForegroundColor = $color
}
write-host $value @opts
}
else {
if (-not($no_newline)) {
$value = "${value}`n"
}
write-output $value
}
}
function write-detailed([string] $value, [string] $color = $null, [switch] $no_newline) {
if ('DETAILED' -eq $(get_verbosity)) {
__write $value $color $no_newline
}
}
function write-error([string] $value) {
$opts = @{
color = $global:colors.error
no_newline = $false
}
write-minimal $value @opts
}
function write-failed([string] $value) {
if (is_tty) {
write-normal ' [Failed]' $global:colors.test.failed -no_newline
write-normal " ${value}"
}
else {
write-output " Test: [Failed] ${value}"
}
}
function write-high([string] $value, [switch] $no_newline) {
$opts = @{
color = $global:colors.high
no_newline = $no_newline
}
write-minimal $value @opts
}
function write-low([string] $value, [switch] $no_newline) {
$opts = @{
color = $global:colors.low
no_newline = $no_newline
}
write-detailed $value @opts
}
function write-medium([string] $value, [switch] $no_newline) {
$opts = @{
color = $global:colors.medium
no_newline = $no_newline
}
write-normal $value @opts
}
function write-minimal([string] $value, [string] $color = $null, [switch] $no_newline) {
__write $value $color $no_newline
}
function write-normal([string] $value, [string] $color = $null, [switch] $no_newline) {
if ('MINIMAL' -ne $(get_verbosity)) {
$opts = @{
color = $color
no_newline = $no_newline
}
__write $value @opts
}
}
function write-passed([string] $value) {
if (is_tty) {
write-detailed ' [Passed]' $global:colors.test.passed -no_newline
write-detailed " ${value}"
}
else {
write-output " Test: [Passed] ${value}"
}
}
function write-title([string] $value) {
write-minimal $value $global:colors.title
}
function write-warning([string] $value) {
write-minimal $value $global:colors.warning
}
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set-strictmode -version latest
. "$(& git rev-parse --show-toplevel)/deploy/Kubernetes/_build/common.ps1"
# == begin common.ps1 extensions ==
$global:_print_template = $null
function default_print_template {
$value = $false
write-debug "<default_print_template> -> ${value}."
return $value
}
function env_get_print_template {
$value = $($null -ne $env:NVBUILD_PRINT_TEMPLATE)
write-debug "<env_get_print_template> -> '${value}'."
return $value
}
function env_set_print_template([bool] $value) {
if ($null -eq $env:NVBUILD_NOSET) {
write-debug "<env_set_print_template> value: ${value}."
if ($value) {
$env:NVBUILD_PRINT_TEMPLATE = '1'
}
else {
$env:NVBUILD_PRINT_TEMPLATE = $null
}
}
}
function get_print_template {
if ($null -eq $global:_print_template) {
$value = $(env_get_print_template)
if ($null -ne $value) {
set_print_template $value
}
else {
set_print_template $(default_print_template)
}
}
write-debug "<get_print_template> -> ${global:_print_template}."
return $global:_print_template
}
function set_print_template([bool] $value) {
write-debug "<set_print_template> value: ${value}."
$global:_print_template = $value
env_set_print_template $value
}
# === end common.ps1 extensions ===
function initialize_test([string[]]$params, [object[]] $tests) {
if (($null -eq $params) -or ($null -eq $tests)) {
write-error 'usage: initialize_test {params} {tests}'
write-error ' {params} list of argument passed to the script.'
write-error ' {tests} list of test objects.'
write-error ' '
usage_exit 'initialize_test {params} {tests}.'
}
write-debug "<initialize_test> params: [$(len $params)]."
write-debug "<initialize_test> tests: [$(len $tests)]."
$command = $null
$is_debug = $false
$is_verbosity_specified = $false
$test_filter = @()
if (0 -eq $(len $params)) {
write-title './test-chart <command> [<options>]'
write-high 'commands:'
write-normal ' list Prints a list of available tests and quits.'
write-normal ' test Executes available tests. (default)'
write-normal ''
write-high 'options:'
write-normal ' --print|-p Prints the output of the ''helm template'' command to the terminal.'
write-normal ' -t:<test> Specifies which tests to run. When not provided all tests will be run.'
write-normal ' Use ''list'' to determine which tests are available.'
write-normal ' -v:<verbosity> Enables verbose output from the test scripts.'
write-normal ' verbosity:'
write-normal ' minimal|m: Sets build-system verbosity to minimal. (default)'
write-normal ' normal|n: Sets build-system verbosity to normal.'
write-normal ' detailed|d: Sets build-system verbosity to detailed.'
write-normal ' --debug Enables verbose build script tracing; this has no effect on build-system verbosity.'
write-normal ''
cleanup_after
exit 0
}
for ($i = 0 ; $i -lt $(len $params) ; $i += 1) {
$arg = $params[$i]
$arg2 = $null
$pair = $arg -split ':'
if ($(len $pair) -gt 1) {
$arg = $pair[0]
if ($(len $pair[1]) -gt 0) {
$arg2 = $pair[1]
}
}
if ($i -eq 0) {
if ('list' -ieq $arg)
{
$command = 'LIST'
continue
}
elseif ('test' -ieq $arg) {
$command = 'TEST'
continue
}
else {
$command = 'TEST'
}
}
if ('--debug' -ieq $arg) {
$is_debug = $true
}
elseif (('--print' -ieq $arg) -or ('-p' -ieq $arg)) {
if ('TEST' -ne $command) {
usage_exit "Option '${arg}' not supported by command 'list'."
}
if (get_print_template) {
usage_exit "Option '${arg}' already specified."
}
set_print_template($true)
}
elseif (('--test' -ieq $arg) -or ('-t' -ieq $arg))
{
if ($null -eq $arg2)
{
if ($i + 1 -ge $(len $params)) {
usage_exit "Expected value following ""{$arg}""."
}
$i += 1
$test_name = $params[$i]
}
else
{
$test_name = $arg2
}
$test_found = $false
$parts = $test_name.split('/')
if ($(len $parts) -gt 1) {
$test_name = $parts[$(len $parts) - 1]
}
foreach ($test in $tests) {
if ($test.name -ieq $test_name) {
$test_found = $true
break
}
}
if (-not $test_found) {
usage_exit "Unknown test name ""${test_name}"" provided."
}
$test_filter += $test_name
}
elseif (('--verbosity' -ieq $arg) -or ('-v' -ieq $arg)) {
if ($null -eq $arg2)
{
if ($i + 1 -ge $(len $params)) {
usage_exit "Expected value following ""{$arg}""."
}
$i += 1
$value = $params[$i]
}
else
{
$value = $arg2
}
if (('minimal' -ieq $value) -or ('m' -ieq $value)) {
$verbosity = 'MINIMAL'
}
elseif (('normal' -ieq $value) -or ('n' -ieq $value)) {
$verbosity = 'NORMAL'
}
elseif (('detailed' -ieq $value) -or ('d' -ieq $value)) {
$verbosity = 'DETAILED'
}
else {
usage_exit "Invalid verbosity option ""${arg}""."
}
$(set_verbosity $verbosity)
$is_verbosity_specified = $true
}
else {
usage_exit "Unknown option '${arg}'."
}
}
$is_debug = $is_debug -or $(get_is_debug)
set_is_debug $is_debug
$tests_path = split-path -parent $myinvocation.pscommandpath
$root_path = split-path -parent $tests_path
$chart_path = join-path $root_path 'chart'
if (-not $(test-path $root_path)) {
fatal_exit "Expected path '${root_path}' not found or inaccessible."
}
if (-not $(test-path $chart_path)) {
fatal_exit "Expected path '${chart_path}' not found or inaccessible."
}
if (-not $(test-path $tests_path)) {
fatal_exit "Expected path '${tests_path}' not found or inaccessible."
}
write-debug "<initialize_test> root_path = '${root_path}'."
write-debug "<initialize_test> chart_path = '${chart_path}'."
write-debug "<initialize_test> tests_path = '${tests_path}'."
# When a subset of tests has been requested, filter out the not requested tests.
if ($(len $test_filter) -gt 0) {
write-debug "<initialize_test> selected: [$(len $test_filter)]."
$replace = @()
# Find the test that matches each selected item and add it to a replacement list.
foreach ($filter in $test_filter) {
foreach ($test in $tests) {
if ($test.name -ieq $filter) {
$replace += $test
break
}
}
}
# Replace the test list with the replacement list.
$tests = $replace
write-debug "<initialize_test> tests = [$(len $tests)]."
}
if ((-not $is_verbosity_specified) -and (-not $(is_tty))) {
write-debug "<initialize_test> override verbosity with 'detailed' when TTY not detected."
set_verbosity 'DETAILED'
}
return @{
chart_path = $root_path
command = $command
tests = $tests
}
}
function list_helm_tests([object] $config) {
if ($null -eq $config.tests) {
write-error 'usage: list_helm_tests {config}' -category InvalidArgument
write-error ' {config} configuration object returned by `initialize_test`.'
write-error ' '
usage_exit 'list_helm_tests {config}.'
}
if (($null -eq $config.tests) -or ($null -eq $config.command) -or ($null -eq $config.chart_path)) {
fatal_exit 'invalid configuration object received.'
}
write-debug "<list_helm_tests> config.chart_path = '$($config.chart_path)'."
write-debug "<list_helm_tests> config.command = '$($config.command)'."
write-debug "<list_helm_tests> config = [$(len $config.tests)]"
if ('LIST' -ne $config.command) {
throw "List method called when command was 'test'."
}
write-title "Available tests:"
foreach ($test in $config.tests) {
if ('MINIMAL' -ne $(get_verbosity)) {
write-high "- $($test.name):"
if ('DETAILED' -eq $(get_verbosity)) {
write-detailed ' matches:'
if (len $test.matches -gt 0) {
foreach ($match in $test.matches) {
$regex = generate_regex $match
write-low " ${regex}"
}
}
else {
write-low ' <none>'
}
write-detailed ' options:'
if (len $test.options -gt 0) {
foreach ($option in $test.options) {
write-low " ${option}"
}
}
else{
write-low ' <none>'
}
}
else {
$matches_count = 0
if (($null -ne $test.matches)) {
$matches_count = $(len $test.matches)
}
$options_count = 0
if (($null -ne $test.options)) {
$options_count = $(len $test.options)
}
write-normal " matches: ${matches_count}"
if ($options_count -gt 0) {
write-normal " options: " -no_newline
write-normal "${options_count}" $global:colors.low
}
}
write-normal ' values:'
if ($(len $test.values) -gt 0) {
foreach($value in $test.values) {
write-normal " ${value}"
}
}
else {
write-normal ' <none>'
}
}
else {
write-minimal "- $($test.name)"
}
}
$(cleanup_after)
}
function test_helm_chart([object] $config) {
write-debug "<test_helm_chart> config.chart_path = '$($config.chart_path)'."
write-debug "<test_helm_chart> config.command = '$($config.command)'."
write-debug "<test_helm_chart> config = [$(len $config.tests)]."
if ('LIST' -eq $config.command) {
list_helm_tests $config
return $true
}
$timer = [System.Diagnostics.Stopwatch]::StartNew()
push-location $config.chart_path
try {
$fail_count = 0
$pass_count = 0
$total_fail_checks = 0
$total_pass_checks = 0
foreach ($test in $config.tests) {
$fail_checks = 0
$pass_checks = 0
$values_path = resolve-path $(join-path 'chart' 'values.yaml') -relative
write-debug "<test_helm_chart> values_path = '${values_path}'."
$helm_command = "helm template test -f ${values_path}"
write-debug "<test_helm_chart> helm_command = '${helm_command}'."
# First add all values files to the command.
if ($(len $test.values) -gt 0) {
foreach ($value in $test.values) {
write-debug "<test_helm-chart> value = '${value}'."
$values_path = $(resolve-path $(join-path 'tests' $value) -relative)
write-debug "<test_helm_chart> values_path = '${values_path}'."
$helm_command = "${helm_command} -f $values_path"
}
write-debug "<test_helm_chart> helm_command = '${helm_command}'."
}
# Second add all --set options to the command.
if ($(len $test.options) -gt 0) {
foreach ($option in $test.options) {
write-debug "<test_helm_chart> option = '${option}'."
$helm_command = "${helm_command} --set `"${option}`""
}
}
$helm_command = "${helm_command} ./chart/."
write-debug "<test_helm_chart> helm_command = '${helm_command}'."
$captured = invoke-expression "${helm_command} 2>&1" | out-string
$exit_code = $LASTEXITCODE
write-debug "<test_helm_chart> expected = $($test.expected)."
write-debug "<test_helm_chart> actual = ${exit_code}."
$is_pass = $test.expected -eq $exit_code
if (-not $is_pass) {
write-normal ">> Failed: exit code ${exit_code} did not match expected $($test.expected)." $global:colors.low
# When the exit code is an unexpected non-zero value, print Helm's output.
if ($exit_code -ne 0)
{
# Disable template printing to avoid a double print.
set_print_template $false
write-minimal "Helm Template Output" $global:colors.high
write-minimal $captured $global:colors.low
}
}
if ($(len $test.matches) -gt 0) {
foreach ($match in $test.matches) {
$regex = generate_regex $match
write-debug "<test_helm_chart> regex = '${regex}'."
$is_match = $captured -match $regex
write-debug "<test_helm_chart> is_match = ${is_match}."
if (-not $is_match) {
write-normal ">> Failed: output did not match: ""${regex}""." $global:colors.low
}
if ($is_match) {
$pass_checks += 1
}
else {
$fail_checks += 1
$is_pass = $false
}
}
}
$total_fail_checks += $fail_checks
$total_pass_checks += $pass_checks
if (get_print_template) {
write-normal "Helm Template Output" $global:colors.high
write-normal $captured $global:colors.low
}
if ($is_pass) {
$pass_count += 1
write-passed "$($test.name) (passed ${pass_checks} of $($fail_checks + $pass_checks) checks)"
}
else {
$fail_count += 1
write-failed "$($test.name) (failed ${fail_checks} of $($fail_checks + $pass_checks) checks)"
write-low " command: $($config.chart_path)> ${helm_command}"
}
}
}
catch {
pop-location
throw $_
}
pop-location
$timer.stop()
if ($fail_count -gt 0) {
write-minimal "Failed: ${fail_count}" $global:colors.test.failed -no_newline
write-normal ", Passed: ${pass_count} ($total_pass_checks) [${total_fail_checks}]" $global:colors.test.failed -no_newline
write-minimal ", Passed: ${pass_count} ($total_pass_checks)" $global:colors.test.failed -no_newline
write-normal ", Tests: $(len $config.tests) [$($total_fail_checks + $total_pass_checks)]" $global:colors.test.failed -no_newline
write-normal " ($('{0:0.000}' -f $timer.elapsed.totalseconds) seconds)" $global:colors.low -no_newline
write-minimal ''
return $false
}
else
{
write-minimal "Passed: ${pass_count}" $global:colors.test.passed -no_newline
write-normal ", Tests: $(len $config.tests) [${total_pass_checks}]" $global:colors.test.passed -no_newline
write-minimal ", Tests: $(len $config.tests)" $global:colors.test.passed -no_newline
write-normal ", [$($total_fail_checks + $total_pass_checks)]" $global:colors.test.passed -no_newline
write-normal " ($('{0:0.000}' -f $timer.elapsed.totalseconds) seconds)" $global:colors.low
write-minimal ''
return $true
}
$(cleanup_after)
}
function generate_regex([object] $match) {
$regex = ''
if ('hashtable' -eq $(typeof $match)) {
write-debug "<generate_regex> match is hashtable"
write-debug "<generate_regex> indent: $($match.indent)."
write-debug "<generate_regex> match.lines: [$(len $match.lines)]."
if ($match.indent -gt 0) {
$prefix = "\s{$($match.indent)}"
}
else {
$prefix = ''
}
foreach ($line in $match.lines) {
$line = $line -replace '([\.\*\+\?\^\$\{\}\(\)|[\]\\])', '\$1'
$regex = "${regex}${prefix}${line}\s*[\n\r]{1,2}"
}
}
else {
$regex = $match
}
write-debug "<generate_regex> -> '${regex}'."
return $regex
}
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
ARG BASE_IMAGE="ubuntu:noble"
FROM ${BASE_IMAGE}
# Set useful labels.
LABEL "base"="ubuntu:noble"
LABEL "configuration"="tester"
LABEL "version"="devel"
# Stop APT (Debian package manager) from complaining about interactivity.
ENV DEBIAN_FRONTEND=noninteractive
# Set additional environment values that make usage more pleasant.
ENV TERM=xterm-256color
# Update / upgrade the base image.
RUN apt-get update \
&& apt-get upgrade --yes \
&& rm -rf /var/lib/apt/lists/*
# Install pre-requisites.
RUN apt-get update \
&& apt-get install --no-install-recommends --yes --fix-missing \
apt-transport-https \
software-properties-common \
git \
wget \
&& rm -rf /var/lib/apt/lists/*
# Download and register the Microsoft repository keys.
RUN wget -q https://packages.microsoft.com/config/ubuntu/24.04/packages-microsoft-prod.deb \
&& dpkg -i packages-microsoft-prod.deb \
&& rm packages-microsoft-prod.deb
# Install Powershell runtime and terminal.
RUN apt-get update \
&& apt-get install --yes \
powershell
# Download Helm and move it to /usr/local/bin.
RUN wget -q https://get.helm.sh/helm-v3.17.0-linux-amd64.tar.gz \
&& tar -zxvf helm-v3.17.0-linux-amd64.tar.gz \
&& mv linux-amd64/helm /usr/local/bin/helm
# Create /workspace and set it to the default folder.
RUN mkdir -p /workspace
# Enable Git operations in the /workspace directory.
RUN printf "[safe]\n directory=/workspace\n" > /root/.gitconfig
WORKDIR /workspace
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
apiVersion: v2
appVersion: 1.0.0
description: Distributed Neural Models (dynamo) Component
icon: https://www.nvidia.com/content/dam/en-zz/Solutions/about-nvidia/logo-and-brand/01-nvidia-logo-vert-500x200-2c50-d@2x.png
name: dynamo_component
version: 1.0.0
# SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Annotation Groups
{{- define "nvidia.annotations.default" }}
dynamo: "{{ .Release.Name }}.{{ .Chart.AppVersion | default "0.0" }}"
{{- with .Values.kubernetes }}
{{- with .annotations }}
{{ toYaml . }}
{{- end }}
{{- end }}
{{- end -}}
{{- define "nvidia.annotations.chart" }}
helm.sh/chart: {{ .Chart.Name | quote }}
{{- template "nvidia.annotations.default" . }}
{{- end -}}
# Label Groups
{{- define "nvidia.labels.default" }}
{{- template "nvidia.label.appInstance" . }}
{{- template "nvidia.label.appName" . }}
{{- template "nvidia.label.appPartOf" . }}
{{- template "nvidia.label.appVersion" . }}
{{- end -}}
{{- define "nvidia.labels.chart" }}
{{- template "nvidia.labels.default" . }}
{{- template "nvidia.label.appManagedBy" . }}
{{- template "nvidia.label.chart" . }}
{{- with .Values.kubernetes }}
{{- with .labels }}
{{ toYaml . }}
{{- end }}
{{- end }}
{{- template "nvidia.label.release" . }}
{{- end -}}
# Label Values
{{- define "nvidia.label.appInstance" }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}
{{- define "nvidia.label.appManagedBy" }}
{{- $service_name := "dynamo" }}
{{- with .Release.Service }}
{{- $service_name = . }}
{{- end }}
app.kubernetes.io/managed-by: {{ $service_name }}
{{- end }}
{{- define "nvidia.label.appName" }}
app.kubernetes.io/name: {{ required "Property '.component.name' is required." .Values.component.name }}
{{- end }}
{{- define "nvidia.label.appPartOf" }}
{{- $part_of := "dynamo" }}
{{- with .Values.kubernetes }}
{{- with .partOf }}
{{- $part_of = . }}
{{- end }}
{{- end }}
app.kubernetes.io/part-of: {{ $part_of }}
{{- end }}
{{- define "nvidia.label.appVersion" }}
app.kubernetes.io/version: {{ .Chart.Version | default "0.0" | quote }}
{{- end }}
{{- define "nvidia.label.chart" }}
helm.sh/chart: {{ .Chart.Name | quote }}
helm.sh/version: {{ .Chart.Version | default "0.0" | quote }}
{{- end }}
{{- define "nvidia.label.release" }}
release: "{{ .Chart.Name }}_v{{ .Chart.Version | default "0.0" }}"
{{- end }}
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
{{- $component_name := "" }}
{{- with $.Values.component }}
{{- $component_name = required "Property `.component.name` is required." .name }}
{{- with .namespace }}
{{- $component_namespace = (lower .) }}
{{- end }}
{{- else }}
{{- fail "Property `.component` is required." }}
{{- end }}
{{- $container_image_name := "" }}
{{- with $.Values.image }}
{{- $container_image_name = required "Property `.image.name` is required." .name }}
{{- else }}
{{- fail "Property `.image` is required." }}
{{- end }}
{{- $component_namespace := "default" }}
{{- $instance_count := 1 }}
{{- $kube_grace_period := 30 }}
{{- $kube_liveness_delay := 10 }}
{{- $kube_liveness_enabled := true }}
{{- $kube_liveness_fail := 15 }}
{{- $kube_liveness_period := 2 }}
{{- $kube_liveness_success := 1 }}
{{- $kube_readiness_delay := 10 }}
{{- $kube_readiness_enabled := true }}
{{- $kube_readiness_fail := 15 }}
{{- $kube_readiness_period := 2 }}
{{- $kube_readiness_success := 1 }}
{{- $kube_tolerations_count := 0 }}
{{- $model_repository_path := "/var/run/models" }}
{{- $parallel_pipeline := 1 }}
{{- $parallel_tensor := 1 }}
{{- $parallel_world := 1 }}
{{- $port_api := 443 }}
{{- $port_health := 8000 }}
{{- $port_metrics := 9347 }}
{{- $port_request := 9345 }}
{{- $request_plane_etcd_url := "" }}
{{- $request_plane_nats_url := "" }}
{{- with $.Values.distributed }}
{{- with .requestPlane }}
{{- $request_plane_etcd_url = required "Property `.distributed.requestPlane.etcdUrl` is required." .etcdUrl }}
{{- $request_plane_nats_url = required "Property `.distributed.requestPlane.natsUrl` is required." .natsUrl }}
{{- else }}
{{- fail "Property `.distributed.requestPlane` is required." }}
{{- end }}
{{- else }}
{{- fail "Property `.distributed` is required." }}
{{- end }}
{{- $request_plane_timeout := 60 }}
{{- $resources_cpu := 4 }}
{{- $resources_ephemeral := "1Gi" }}
{{- $resources_gpu := 1 }}
{{- $resources_memory := "16Gi" }}
{{- $resources_shmem := "512Mi" }}
{{- $worker_count := 1 }}
{{- with $.Values.kubernetes }}
{{- with .checks }}
{{- with .liveness }}
{{- $kube_liveness_enabled = ne false .enabled }}
{{- with .failureThreshold }}
{{- $kube_liveness_fail = (int .) }}
{{- if le $kube_liveness_fail 0 }}
{{- fail "The value of property `.kubernetes.checks.liveness.failureThreshold` must be greater than zero." }}
{{- end }}
{{- end }}
{{- with .initialDelaySeconds }}
{{- $kube_liveness_delay = (int .) }}
{{- if le $kube_liveness_delay 0 }}
{{- fail "The value of property `.kubernetes.checks.liveness.initialDelaySeconds` must be greater than zero." }}
{{- end }}
{{- end }}
{{- with .periodSeconds }}
{{- $kube_liveness_period = (int .) }}
{{- if le $kube_liveness_period 0 }}
{{- fail "The value of property `.kubernetes.checks.liveness.periodSeconds` must be greater than zero." }}
{{- end }}
{{- end }}
{{- with .successThreshold }}
{{- $kube_liveness_success = (int .) }}
{{- if le $kube_liveness_success 0 }}
{{- fail "The value of property `.kubernetes.checks.liveness.successThreshold` must be greater than zero." }}
{{- end }}
{{- end }}
{{- end }}
{{- with .readiness }}
{{- $kube_readiness_enabled = ne false .enabled }}
{{- with .failureThreshold }}
{{- $kube_readiness_fail = (int .) }}
{{- if le $kube_readiness_fail 0 }}
{{- fail "The value of property `.kubernetes.checks.readiness.failureThreshold` must be greater than zero." }}
{{- end }}
{{- end }}
{{- with .initialDelaySeconds }}
{{- $kube_readiness_delay = (int .) }}
{{- if le $kube_readiness_delay 0 }}
{{- fail "The value of property `.kubernetes.checks.readiness.initialDelaySeconds` must be greater than zero." }}
{{- end }}
{{- end }}
{{- with .periodSeconds }}
{{- $kube_readiness_period = (int .) }}
{{- if le $kube_readiness_period 0 }}
{{- fail "The value of property `.kubernetes.checks.readiness.periodSeconds` must be greater than zero." }}
{{- end }}
{{- end }}
{{- with .successThreshold }}
{{- $kube_readiness_success = (int .) }}
{{- if le $kube_readiness_success 0 }}
{{- fail "The value of property `.kubernetes.checks.readiness.successThreshold` must be greater than zero." }}
{{- end }}
{{- end }}
{{- end }}
{{- end }}
{{- with .terminationGracePeriod }}
{{- $kube_grace_period = (int .)}}
{{- if le $kube_grace_period 0 }}
{{- fail "The value of property `.kubernetes.terminationGracePeriod` must be greater than zero." }}
{{- end }}
{{- end }}
{{- with .tolerations }}
{{- if gt (len .) 0 }}
{{- $kube_tolerations_count = (len .) }}
{{- end }}
{{- end }}
{{- end }}
{{- with $.Values.distributed }}
{{- with .requestPlane }}
{{- with .timeout }}
{{- $request_plane_timeout = (int .) }}
{{- end }}
{{- end }}
{{- with .workerCount }}
{{- $worker_count = (int .) }}
{{- end }}
{{- end }}
{{- with $.Values.model }}
{{- with .instance }}
{{- with .count }}
{{- $instance_count = (int .) }}
{{- if le $instance_count 0 }}
{{- fail "The value of property `.instance.count` must be greater than zero." }}
{{- end }}
{{- end }}
{{- with .parallelism }}
{{- with .pipeline }}
{{- $parallel_pipeline = (int .) }}
{{- if le $parallel_pipeline 0 }}
{{- fail "The value of property `.instance.parallelism.pipeline` must be greater than zero." }}
{{- end }}
{{- end }}
{{- with .tensor }}
{{- $parallel_tensor = (int .) }}
{{- if le $parallel_tensor 0 }}
{{- fail "The value of property `.instance.parallelism.tensor` must be greater than zero." }}
{{- end }}
{{- end }}
{{- $parallel_world = mul $parallel_pipeline $parallel_tensor }}
{{- end }}
{{- end }}
{{- with .repository }}
{{- with .path }}
{{- $model_repository_path = . }}
{{- end }}
{{- end }}
{{- end }}
{{- with $.Values.ports }}
{{- with .api }}
{{- $port_api = (int .) }}
{{- if le $port_api 0 }}
{{- fail "The value of property `.ports.api` must be greater than zero." }}
{{- end }}
{{- if gt $port_api 65535 }}
{{- fail "The value of property `.ports.api` must be less than 65,536." }}
{{- end }}
{{- end }}
{{- with .health }}
{{- $port_health = (int .) }}
{{- if le $port_health 0 }}
{{- fail "The value of property `.ports.health` must be greater than zero." }}
{{- end }}
{{- if gt $port_health 65535 }}
{{- fail "The value of property `.ports.health` must be less than 65,536." }}
{{- end }}
{{- end }}
{{- with .metrics }}
{{- $port_metrics = (int .) }}
{{- if le $port_metrics 0 }}
{{- fail "The value of property `.ports.metrics` must be greater than zero." }}
{{- end }}
{{- if gt $port_metrics 65535 }}
{{- fail "The value of property `.ports.metrics` must be less than 65,536." }}
{{- end }}
{{- end }}
{{- with .request }}
{{- $port_request = (int .) }}
{{- if le $port_request 0 }}
{{- fail "The value of property `.ports.request` must be greater than zero." }}
{{- end }}
{{- if gt $port_request 65535 }}
{{- fail "The value of property `.ports.request` must be less than 65,536." }}
{{- end }}
{{- end }}
{{- end }}
{{- with $.Values.ports }}
{{- with .api }}
{{- $port_api = (int .) }}
{{- if le $port_api 0 }}
{{- fail "The value of property `.ports.api` must be greater than zero." }}
{{- end }}
{{- if gt $port_api 65535 }}
{{- fail "The value of property `.ports.api` must be less than 65,536." }}
{{- end }}
{{- end }}
{{- with .health }}
{{- $port_health = (int .) }}
{{- if le $port_health 0 }}
{{- fail "The value of property `.ports.health` must be greater than zero." }}
{{- end }}
{{- if gt $port_health 65535 }}
{{- fail "The value of property `.ports.health` must be less than 65,536." }}
{{- end }}
{{- end }}
{{- with .metrics }}
{{- $port_metrics = (int .) }}
{{- if le $port_metrics 0 }}
{{- fail "The value of property `.ports.metrics` must be greater than zero." }}
{{- end }}
{{- if gt $port_metrics 65535 }}
{{- fail "The value of property `.ports.metrics` must be less than 65,536." }}
{{- end }}
{{- end }}
{{- with .request }}
{{- $port_request = (int .) }}
{{- if le $port_request 0 }}
{{- fail "The value of property `.ports.request` must be greater than zero." }}
{{- end }}
{{- if gt $port_request 65535 }}
{{- fail "The value of property `.ports.request` must be less than 65,536." }}
{{- end }}
{{- end }}
{{- end }}
{{- with $.Values.resources }}
{{- with .cpu }}
{{- $resources_cpu = (int .) }}
{{- if le $resources_cpu 0 }}
{{- fail "The value of property `.resources.cpu` must be greater than zero." }}
{{- end }}
{{- end }}
{{- with .gpu }}
{{- with .count }}
{{- $resources_gpu = (int .) }}
{{- if lt $resources_cpu 0 }}
{{- fail "The value of property `.resources.gpu.count` must be greater than or equal to zero." }}
{{- end }}
{{- end }}
{{- end }}
{{- with .ephemeral }}
{{- $resources_ephemeral = . }}
{{- end }}
{{- with .memory }}
{{- $resources_memory = . }}
{{- end }}
{{- with .sharedMemory }}
{{- $resources_shmem = . }}
{{- end }}
{{- end }}
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ $.Release.Name }}
namespace: {{ $.Release.Namespace | quote }}
annotations:
{{- with $ }}
{{- include "nvidia.annotations.chart" . | indent 4 }}
{{- end }}
labels:
app: {{ $.Release.Name }}
app.kubernetes.io/component: {{ $component_name }}
{{- with $ }}
{{- include "nvidia.labels.chart" . | indent 4 }}
{{- end }}
spec:
selector:
matchLabels:
app: {{ $.Release.Name }}
app.kubernetes.io/component: {{ $component_name }}
replicas: {{ $instance_count }}
template:
metadata:
annotations:
{{- with $ }}
{{- include "nvidia.annotations.chart" . | indent 8 }}
{{- end }}
labels:
app: {{ $.Release.Name }}
app.kubernetes.io/component: {{ $component_name }}
{{- with $ }}
{{- include "nvidia.labels.chart" . | indent 8 }}
{{- end }}
spec:
{{- if ne $resources_gpu 0 }}
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: nvidia.com/gpu
operator: Exists
{{- with $.Values.resources }}
{{- with .gpu }}
{{- with .product }}
{{- if gt (len .) 0 }}
- key: nvidia.com/gpu.product
operator: In
values:
{{ toYaml . | indent 16 }}
{{- end }}
{{- end }}
{{- end }}
{{- end }}
{{- end }}
containers:
- name: server
args:
- uv
- run
- dynamo
- start
- --service-name
- {{ $component_name | quote }}
command:
- sh
- -c
env:
{{- if gt $parallel_world 1 }}
- name: DYNAMO_LLM_PP
value: {{ $parallel_pipeline }}
- name: DYNAMO_LLM_TP
value: {{ $parallel_tensor }}
{{- end }}
- name: DYNAMO_NAME
value: {{ $component_name | quote }}
{{- if gt (len $component_namespace) 0 }}
- name: DYNAMO_NAMESPACE
value: {{ $component_namespace | quote }}
{{- end }}
{{- if ne $port_health 8000 }}
- name: DYNAMO_PORT_HEALTH
value: {{ $port_health }}
{{- end }}
{{- if ne $port_metrics 9347 }}
- name: DYNAMO_PORT_METRICS
value: {{ $port_metrics }}
{{- end }}
{{- if ne $port_request 9345 }}
- name: DYNAMO_PORT_REQUEST
value: {{ $port_request }}
{{- end }}
- name: DYNAMO_RP_ETCD_URL
value: {{ $request_plane_etcd_url }}
- name: DYNAMO_RP_NATS_URL
value: {{ $request_plane_nats_url }}
- name: DYNAMO_RP_TIMEOUT
value: {{ $request_plane_timeout | quote }}
{{- if gt $worker_count 1 }}
- name: DYNAMO_WORKER_COUNT
value: {{ $worker_count }}
{{- end }}
image: {{ $container_image_name }}
imagePullPolicy: IfNotPresent
{{- if $kube_liveness_enabled }}
livenessProbe:
failureThreshold: {{ $kube_liveness_fail }}
httpGet:
path: /v2/health/live
port: {{ $port_health }}
initialDelaySeconds: {{ $kube_liveness_delay }}
periodSeconds: {{ $kube_liveness_period }}
successThreshold: {{ $kube_liveness_success }}
{{- end }}
ports:
- containerPort: {{ $port_health }}
name: health
- containerPort: {{ $port_request }}
name: request
- containerPort: {{ $port_api }}
name: api
- containerPort: {{ $port_metrics }}
name: metrics
{{- if $kube_readiness_enabled }}
readinessProbe:
failureThreshold: {{ $kube_readiness_fail }}
httpGet:
path: /v2/health/ready
port: {{ $port_health }}
initialDelaySeconds: {{ $kube_readiness_delay }}
periodSeconds: {{ $kube_readiness_period }}
successThreshold: {{ $kube_readiness_success }}
{{- end }}
resources:
limits:
cpu: {{ $resources_cpu }}
ephemeral-storage: {{ $resources_ephemeral }}
{{- if gt $resources_gpu 0 }}
nvidia.com/gpu: {{ $resources_gpu }}
{{- end }}
memory: {{ $resources_memory }}
requests:
cpu: {{ $resources_cpu }}
ephemeral-storage: {{ $resources_ephemeral }}
{{- if gt $resources_gpu 0 }}
nvidia.com/gpu: {{ $resources_gpu }}
{{- end }}
memory: {{ $resources_memory }}
volumeMounts:
{{- with $.Values.model }}
{{- with .repository }}
{{- with .volumeMounts }}
{{- range . }}
{{- $mount_path := $model_repository_path }}
{{- $volume_name := required "Property `.modelRepository.volumeMounts[*].name` is required." .name }}
{{- if eq "shared-memory" $volume_name }}
{{- fail "Property `.modelRepository.volumeMounts[*].name` cannot be `shared-memory` because it is a reserved name." }}
{{- end }}
{{- with .path }}
{{- $mount_path = printf "%s/%s" $model_repository_path (trimPrefix "/" .) }}
{{- if regexMatch "/\\.\\./?" $mount_path }}
{{- fail (printf "Value of property `.modelRepository.volumeMounts[*].path` '%s' is illegal because '%s' is not a sub-directory of '%s'." . (clean $mount_path) $model_repository_path) }}
{{- end }}
{{- end }}
- mountPath: {{ $mount_path }}
name: {{ $volume_name }}
{{- end }}
{{- end }}
{{- end }}
{{- end }}
- mountPath: /dev/shm
name: shared-memory
{{- with $.Values.image }}
{{- with .pullSecrets }}
{{- if len . }}
imagePullSecrets:
{{ toYaml . | indent 6 }}
{{- end }}
{{- end }}
{{- end }}
restartPolicy: Always
terminationGracePeriodSeconds: 30
{{- if or (gt $resources_gpu 0) (gt $kube_tolerations_count 0) }}
tolerations:
{{- if gt $resources_gpu 0 }}
- effect: NoSchedule
key: nvidia.com/gpu
operator: Exists
{{- end }}
{{- with $.Values.kubernetes }}
{{- with .tolerations }}
{{ toYaml . | indent 6 }}
{{- end }}
{{- end }}
{{- end }}
volumes:
{{- with $.Values.model }}
{{- with .repository }}
{{- with .volumeMounts }}
{{- range . }}
- name: {{ required "Property `.modelRepository.volumeMounts.name` is required." .name }}
persistentVolumeClaim:
claimName: {{ required "Property `.modelRepository.volumeMounts.persistentVolumeClaim` is required." .persistentVolumeClaim }}
{{- end }}
{{- end }}
{{- end }}
{{- end }}
- name: shared-memory
emptyDir:
medium: Memory
sizeLimit: {{ $resources_shmem }}
{
"$schema": "https://json-schema.org/draft-07/schema#",
"copyright": [
"SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.",
"SPDX-License-Identifier: Apache-2.0",
"Licensed under the Apache License, Version 2.0 (the \"License\");",
"you may not use this file except in compliance with the License.",
"You may obtain a copy of the License at",
"http://www.apache.org/licenses/LICENSE-2.0",
"Unless required by applicable law or agreed to in writing, software",
"distributed under the License is distributed on an \"AS IS\" BASIS,",
"WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.",
"See the License for the specific language governing permissions and",
"limitations under the License."
],
"properties": {
"component": {
"properties": {
"name": {
"pattern": "^[a-z]([a-z0-9_\\-]{0,29}[a-z0-9])?$",
"type": "string"
},
"namespace": {
"pattern": "^[a-z]([a-z0-9_\\-]{0,29}[a-z0-9])?$",
"oneOf": [
{ "type": "string" },
{ "type": "null" }
]
}
},
"required": [
"name"
]
},
"image": {
"properties": {
"pullSecrets": {
"oneOf": [
{
"items": [
{
"properties": {
"name": {
"$ref": "#/$defs/kubernetes_label",
"type": "string"
}
},
"type": "object"
}
],
"minItems": 0,
"type": "array"
},
{ "type": "null" }
]
},
"name": {
"type": "string"
}
},
"required": [
"name"
],
"type": "object"
},
"distributed": {
"properties": {
"requestPlane": {
"properties": {
"etcdUrl": {
"type": "string"
},
"natsUrl": {
"type": "string"
},
"timeout": {
"oneOf": [
{
"minimum": 1,
"type": "integer"
},
{ "type": "null" }
]
}
},
"oneOf": [
{ "type": "object" },
{ "type": "null" }
],
"required": [
"etcdUrl",
"natsUrl"
]
},
"workerCount": {
"oneOf": [
{
"minimum": 1,
"type": "integer"
},
{ "type": "null" }
]
}
},
"oneOf": [
{ "type": "object" },
{ "type": "null" }
]
},
"kubernetes": {
"properties": {
"checks": {
"properties": {
"liveness": {
"properties": {
"enabled": {
"oneOf": [
{ "type": "boolean" },
{ "type": "null" }
]
},
"failureThreshold": {
"oneOf": [
{
"minimum": 1,
"type": "integer"
},
{ "type": "null" }
]
},
"initialDelaySeconds": {
"oneOf": [
{
"minimum": 1,
"type": "integer"
},
{ "type": "null" }
]
},
"periodSeconds": {
"oneOf": [
{
"minimum": 1,
"type": "integer"
},
{ "type": "null" }
]
},
"successThreshold": {
"oneOf": [
{
"minimum": 1,
"type": "integer"
},
{ "type": "null" }
]
}
},
"oneOf": [
{ "type": "object" },
{ "type": "null" }
]
},
"readiness": {
"properties": {
"enabled": {
"oneOf": [
{ "type": "boolean" },
{ "type": "null" }
]
},
"failureThreshold": {
"oneOf": [
{
"minimum": 1,
"type": "integer"
},
{ "type": "null" }
]
},
"initialDelaySeconds": {
"oneOf": [
{
"minimum": 1,
"type": "integer"
},
{ "type": "null" }
]
},
"periodSeconds": {
"oneOf": [
{
"minimum": 1,
"type": "integer"
},
{ "type": "null" }
]
},
"successThreshold": {
"oneOf": [
{
"minimum": 1,
"type": "integer"
},
{ "type": "null" }
]
}
},
"oneOf": [
{ "type": "object" },
{ "type": "null" }
]
}
},
"oneOf": [
{ "type": "object" },
{ "type": "null" }
]
},
"labels": {
"oneOf": [
{
"items": {
"$ref": "#/$defs/kubernetes_label"
},
"minItems": 0,
"type": "array"
},
{ "type": "null" }
]
},
"terminationGracePeriod": {
"oneOf": [
{
"minimum": 1,
"type": "integer"
},
{ "type": "null" }
]
},
"tolerations": {
"oneOf": [
{
"items": { "type": "object" },
"minItems": 0,
"type": "array"
},
{ "type": "null" }
]
}
},
"oneOf": [
{ "type": "object" },
{ "type": "null" }
]
},
"model": {
"properties": {
"instance": {
"properties": {
"count": {
"oneOf": [
{
"minimum": 1,
"type": "integer"
},
{ "type": "null" }
]
},
"parallelism": {
"properties": {
"pipeline": {
"oneOf": [
{
"minimum": 1,
"type": "integer"
},
{ "type": "null" }
]
},
"tensor": {
"oneOf": [
{
"minimum": 1,
"type": "integer"
},
{ "type": "null" }
]
}
},
"oneOf": [
{ "type": "object" },
{ "type": "null" }
]
}
},
"oneOf": [
{ "type": "object" },
{ "type": "null" }
]
},
"repository": {
"properties": {
"path": {
"oneOf": [
{ "type": "string" },
{ "type": "null" }
]
},
"volumeMounts": {
"oneOf": [
{
"items": {
"properties": {
"name": {
"$ref": "#/$defs/kubernetes_label"
},
"path": {
"oneOf": [
{ "type": "string" },
{ "type": "null" }
]
},
"persistentVolumeClaim": {
"$ref": "#/$defs/kubernetes_label"
}
},
"type": "object",
"required": [
"name",
"persistentVolumeClaim"
]
},
"minItems": 0,
"type": "array"
},
{ "type": "null" }
]
}
},
"oneOf": [
{ "type": "object" },
{ "type": "null" }
]
}
},
"oneOf": [
{ "type": "object" },
{ "type": "null" }
]
},
"ports": {
"properties": {
"data": {
"oneOf": [
{ "$ref": "#/$defs/container_port" },
{ "type": "null" }
]
},
"health": {
"oneOf": [
{ "$ref": "#/$defs/container_port" },
{ "type": "null" }
]
},
"metrics": {
"oneOf": [
{ "$ref": "#/$defs/container_port" },
{ "type": "null" }
]
},
"request": {
"oneOf": [
{ "$ref": "#/$defs/container_port" },
{ "type": "null" }
]
}
},
"oneOf": [
{ "type": "object" },
{ "type": "null" }
]
},
"resources": {
"properties": {
"cpu": {
"oneOf": [
{
"minimum": 1,
"type": "integer"
},
{ "type": "null" }
]
},
"ephemeral": {
"oneOf": [
{ "$ref": "#/$defs/kubernetes_units" },
{ "type": "null" }
]
},
"gpu": {
"properties": {
"count": {
"oneOf": [
{
"minimum": 1,
"type": "integer"
},
{ "type" : "null" }
]
},
"product": {
"oneOf": [
{
"items": {
"$ref": "#/$defs/kubernetes_label"
},
"type": "array"
},
{ "type": "null" }
]
}
},
"oneOf": [
{ "type": "object" },
{ "type": "null" }
]
},
"memory": {
"oneOf": [
{ "$ref": "#/$defs/kubernetes_units" },
{ "type" : "null" }
]
},
"sharedMemory": {
"oneOf": [
{ "$ref": "#/$defs/kubernetes_units" },
{ "type" : "null" }
]
}
},
"oneOf": [
{ "type": "object" },
{ "type": "null" }
]
}
},
"required": [
"component",
"distributed",
"image"
],
"type": "object",
"$defs": {
"container_port": {
"maximum": 65535,
"minimum": 1025,
"type": "integer"
},
"kubernetes_label": {
"pattern": "^[a-z0-9]([a-z0-9_\\-\\/\\.]{0,61}[a-z0-9])?$",
"type": "string"
},
"kubernetes_units": {
"pattern": "^\\d+[GKMgkm]i$",
"type": "string"
},
"service_port": {
"maximum": 32767,
"minimum": 30000,
"type": "integer"
}
}
}
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# `component` contains configuration options related to the Distributed Neural Models Component.
component:
# `component.name` is the name of the Distributed Neural Models (dynamo) Component in the distributed deployment.
name: # (required)
# `component.namespace` is the Distributed Neural Models namespace in which the Distributed Neural Models Component will be deployed.
namespace: # (default: "default")
# `image` contains configuration options related to the Distributed Neural Models Component container image.
image: # (required)
# `image.pullSecrets` is an optional list of pull secrets to be used when downloading the Distributed Neural Models Component container image.
pullSecrets: [] # (optional)
# - name: pull-secret-name
# `image.name` is the name of the container image containing the version of Distributed Neural Models Component container image to be used.
# name: # (required)
# `distributed` contains configuration options related to organization of Distributed Neural Models workflows.
distributed: # (required)
# `distributed.requestPlane` contains configuration options related to connecting the Distributed Neural Models Component to its Distributed Neural Models Request Plane.
requestPlane:
# `distributed.requestPlane.etcdUrl` is the URL of the etcd server used by the Distributed Neural Models Request Plane.
etcdUrl: # (required)
# `distributed.requestPlane.natsUrl` is the URL of the NATS server used by the Distributed Neural Models Request Plane.
natsUrl: # (required)
# `distributed.requestPlane.timeout` is the maximum time in seconds the Distributed Neural Models Component will wait for a response from the Distributed Neural Models Request Plane.
timeout: # (default 60)
# `distributed.workerCount` is the number of worker instances to be deployed as part of the Distributed Neural Models Component.
workerCount: # (default 1)
# `model` contains configuration options related to the model(s) loaded by the Distributed Neural Models Component.
model:
# `model.instance` are optional configuration options related to the number of Distributed Neural Models Component pods are deployed.
instance:
# `instance.count` is the number of worker instances (whole model) to be deployed as part of this helm chart.
count: # (default 1)
# `model.instance.parallelism` contains optional configuration options related to how work for a single model is spread across multiple pods.
# When the product of `pipeline`*`tensor` is greater than 1, multiple pods will be deployed as a single logical worker.
parallelism:
# `model.instance.parallelism.pipeline` specifies the level of pipeline parallelism used by the model hosted by the Distributed Neural Models Component.
# Pipeline parallelism involves sharding the model (vertically) into chunks, where each chunk comprises a subset of layers that is executed on a separate device.
pipeline: # (default 1)
# `model.instance.parallelism.tensor` specifies the level of tensor parallelism used by the model hosted by the Distributed Neural Models Component.
# Tensor parallelism involves sharding (horizontally) individual layers of the model into smaller, independent blocks of computation that can be executed on different devices.
tensor: # (default 1)
# `model.repository` contains configuration options related to the model repository used by the Distributed Neural Models Component to load model(s).
repository: # (optional)
# `model.repository.path` is a local file-system path within the container to the model repository.
# When `persistentVolumeClaim` is specified, this is the path to which the PVC will be mounted.
path: # (default: /var/run/models)
# `model.repository.volumeMounts` are persistent volumes (PV) to be mounted with the Distributed Neural Models Component container.
volumeMounts: [] # (optional)
# # `model.repository.volumeMounts.name` is the name to associate the volume mount with. Volume mount names must be unique and cannot contain spaces or special characters.
# - name: # (required)
# # `model.repository.volumeMounts.path` is the file-system path relative to model repository's root path to which the volume will be mounted to.
# # When not provided, the volume is mounted to the root of the repository.
# # Overlapping mount paths can cause errors during container deployment.
# path: # (optional)
# # `model.repository.volumeMounts.persistentVolumeclaim` is the name of the persistent volume claim (PVC) used to mount a folder containing the model(s) Triton will load.
# persistentVolumeClaim: # (required)
# `ports` contains configuration options for the management of the Distributed Neural Models Component exposed.
ports: # (optional)
# `ports.health` is the container port exposed to enable Distributed Neural Models Component Kubernetes health reporting.
health: # (default 8000)
# `ports.metrics` is the container port exposed to enable Distributed Neural Models Component metrics reporting.
metrics: # (default 9347)
# `ports.request` is the container port exposed to enable Distributed Neural Models Component request-plane operations.
request: # (default 9345)
# `resources` contains configuration options related to the resources assigned to Distributed Neural Models Component and loaded model(s).
resources: # (optional)
# `resources.cpu` is the number of logical CPU cores required by the Distributed Neural Models Component and loaded model(s).
cpu: # (default: 4)
# `resources.ephemeral` is the ephemeral storage (aka local disk usage) allowance.
# Ephemeral storage MUST include any shared memory allocated to Distributed Neural Models Component.
# Value must be provided in Kubernetes' unit notation.
ephemeral: # (default: 1Gi)
# `resources.gpu` contains configuration options related GPU resources to be assigned to the Distributed Neural Models Component and loaded model(s).
gpu: # (optional)
# `resources.gpu.count` specifies the number of GPUs required by the Distributed Neural Models Component and loaded model(s).
count: # (default: 1)
# `resources.gpu.product` defines list of the supported GPUs to which Distributed Neural Models Component instance(s) can be deployed.
# Value must match the node's `.metadata.labels.nvidia.com/gpu.product` label provided by the NVIDIA GPU Discovery Service.
# Run 'kubectl get nodes' to find node names.
# Run 'kubectl describe node <node_name>' to inspect a node's labels.
product: [] # (optional)
# `resources.memory` specifies the amount of CPU visible (aka host) memory available to the Distributed Neural Models Component and loaded model(s).
# This value must include any shared memory allocated (via `resources.sharedMemory`) to Distributed Neural Models Component.
# Value must be provided in Kubernetes' unit notation.
memory: # (default: 16Gi)
# `resources.sharedMemory` specifies about amount of shared CPU visible (aka host) memory available the Distributed Neural Models Component and loaded model(s).
# Value must be provided in Kubernetes' unit notation.
sharedMemory: # (default: 512Mi)
# `kubernetes` contains configurations option related to the Kubernetes objects created by the chart.
kubernetes: # (optional)
# `kubernetes.annotations` is an optional set of annotations to be applied to create Kubernetes objects.
annotations: [] # (optional)
# `kubernetes.checks` are optional configuration options controlling how the cluster monitors the health of Distributed Neural Models Component deployment(s).
checks:
# `kubernetes.checks.liveness` are configuration options related to how the cluster determines that a Distributed Neural Models Component instance is "alive" and responsive.
liveness:
# `kubernetes.checks.liveness.enabled` when `true`, instructs the cluster will actively determine if the pod is alive; otherwise the cluster will always assume the pod is alive.
enabled: # (default true)
# `kubernetes.checks.liveness.failureThreshold` is the number of failed responses required to determine a pod is not responsive (aka "alive").
failureThreshold: # (default 15)
# `kubernetes.checks.liveness.initialDelaySeconds` is the minimum wait before the cluster beings to attempt to determine the health of the pod.
initialDelaySeconds: # (default 10)
# `kubernetes.checks.liveness.periodSeconds` is the minimum period between attempts to determine the health of the pod.
periodSeconds: # (default 2)
# `kubernetes.checks.liveness.successThreshold` is the number of successful responses required to determine that a pod is healthy.
successThreshold: # (default 1)
# `kubernetes.checks.readiness` contains configuration options related to how the cluster determines that a Distributed Neural Models Component instance is ready.
readiness:
# `kubernetes.checks.readiness.enabled` when `true`, instructs the cluster will actively determine if the pod is ready; otherwise the cluster will always assume the pod is ready.
enabled: # (default true)
# `kubernetes.checks.readiness.failureThreshold` is the number of failed responses required to determine a pod is not responsive (aka "ready").
failureThreshold: # (default 15)
# `kubernetes.checks.readiness.initialDelaySeconds` is the minimum wait before the cluster beings to attempt to determine the readiness of the pod.
initialDelaySeconds: # (default 10)
# `kubernetes.checks.readiness.periodSeconds` is the minimum period between attempts to determine the readiness of the pod.
periodSeconds: # (default 2)
# `kubernetes.checks.readiness.successThreshold` is the number of successful responses required to determine that a pod is ready.
successThreshold: # (default 1)
# `kubernetes.labels` is an optional set of labels to be applied to created Kubernetes objects.
# These labels can be used for association with a preexisting service object.
labels: [] # (optional)
# `kubernetes.partOf` is an optional value to be used with the `app.kubernetes.io/part-of` label on created Kubernetes objects.
partOf: # (default: nova-distributed)
# `kubernetes.terminationGracePeriod` is the duration in seconds the cluster will wait for a Distributed Neural Models Component instance to gracefully terminate.
terminationGracePeriod: # (default 30)
# `kubernetes.tolerations` are tolerations applied to every pod deployed as part of this deployment.
# Template already includes `nvidia.com/gpu=present:NoSchedule` when `resources.gpu` is specified.
tolerations: [] # (optional)
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
component:
name: test_dynamo_chart
distributed:
requestPlane:
etcdUrl: etcd:2379
natsUrl: nats://nats:4222
image:
name: test_dynamo_image-name
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
ports:
health: 80000
metrics: "alive"
request: -3000
resources:
cpu: -4
kubernetes:
checks:
liveness:
enabled: true
failureThreshold: -1
initialDelaySeconds: 5.5
periodSeconds: "10s"
successThreshold: 0
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
resources:
gpu:
count: 2
product:
- "a10g"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment