# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # Default values for dynamo-gaie. # This is a YAML-formatted file. # Declare variables to be passed into your templates. # This is the Dynamo namespace where the dynamo model is deployed dynamoNamespace: "vllm-agg" # This is the port on which the model is exposed model: # This is the model name that will be used to route traffic to the dynamo model # for example, if the model name is Qwen/Qwen3-0.6B, then the modelShortName should be qwen identifier: "Qwen/Qwen3-0.6B" # This is the short name of the model that will be used to generate the resource names shortName: "qwen" # Criticality level for the inference model criticality: "Critical" # InferencePool configuration inferencePool: # Target port number for the inference pool port: 8000 # HTTPRoute configuration httpRoute: # Enable the HTTPRoute enabled: true # Gateway parent reference configuration gatewayName: "inference-gateway" # Path matching configuration path: prefix: "/" # Timeout configuration timeout: request: "300s" extension: # EPP image for the GAIE extension (Dynamo EPP image by default) image: "" # leave empty to use defaults below standardImage: us-central1-docker.pkg.dev/k8s-artifacts-prod/images/gateway-api-inference-extension/epp:v0.4.0 dynamoImage: nvcr.io/nvstaging/ai-dynamo/gaie-epp-dynamo:v0.6.0-1 # generic knobs you may want in both modes imagePullSecrets: - docker-imagepullsecret epp: imagePullPolicy: IfNotPresent # Add env in name/value pairs extraEnv: [] # If you ever want to completely override args, supply a list here. # When empty, chart will render sane defaults argsOverride: [] # Dynamo routing mode - set to true to enable KV-aware routing via Dynamo EPP image useDynamo: true # Dynamo-specific settings (only used when useDynamo: true) configFile: "/etc/epp/epp-config-dynamo.yaml" dynamo: namespace: "vllm-agg" # Required when useDynamo: true. kvBlockSize: "16" # Platform configuration (for Dynamo mode) platformReleaseName: dynamo-platform platformNamespace: "my-model"