Unverified Commit a4eb4e8a authored by Harrison Saturley-Hall's avatar Harrison Saturley-Hall Committed by GitHub
Browse files

fix: misnamed tensorrtllm-runtime image and incorrect tag (#4289)


Signed-off-by: default avatarHarrison King Saturley-Hall <hsaturleyhal@nvidia.com>
Signed-off-by: default avatarHarrison Saturley-Hall <harrison.saturley.hall@gmail.com>
Co-authored-by: default avatarhhzhang16 <54051230+hhzhang16@users.noreply.github.com>
parent dce20d06
......@@ -12,7 +12,7 @@ spec:
# ProfilingConfig maps directly to the profile_sla.py config format
profilingConfig:
profilerImage: "nvcr.io/nvidian/dynamo-dev/vllm-runtime:dep-554.0"
profilerImage: "nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag"
config:
# Sweep/profiling configuration
sweep:
......@@ -31,7 +31,7 @@ spec:
# Deployment overrides for the auto-created DGD
deploymentOverrides:
workersImage: "nvcr.io/nvidian/dynamo-dev/trtllm-runtime:dep-554.0"
workersImage: "nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:my-tag"
# Automatically create DynamoGraphDeployment after profiling
autoApply: true
......@@ -12,7 +12,7 @@ spec:
# ProfilingConfig maps directly to the profile_sla.py config format
profilingConfig:
profilerImage: "nvcr.io/nvidian/dynamo-dev/vllm-runtime:dep-554.0"
profilerImage: "nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag"
config:
# Sweep/profiling configuration
sweep:
......@@ -28,7 +28,7 @@ spec:
# Deployment overrides for the auto-created DGD
deploymentOverrides:
workersImage: "nvcr.io/nvidian/dynamo-dev/vllm-runtime:dep-554.0"
workersImage: "nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag"
# Automatically create DynamoGraphDeployment after profiling
autoApply: true
......@@ -25,7 +25,7 @@ spec:
backend: trtllm
# ProfilerImage is the container image to use for profiling jobs (required)
profilerImage: "nvcr.io/nvidia/ai-dynamo/trtllm-runtime:0.6.1"
profilerImage: "nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.6.1"
# ProfilingConfig maps directly to the profile_sla.py config format
# See benchmarks/profiler/utils/profiler_argparse.py for complete schema
......
......@@ -392,7 +392,7 @@ spec:
backend: trtllm
profilingConfig:
profilerImage: "nvcr.io/nvidia/ai-dynamo/trtllm-runtime:0.6.1"
profilerImage: "nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.6.1"
config:
sla:
isl: 4000
......@@ -409,7 +409,7 @@ spec:
backend_version: "0.20.0"
deploymentOverrides:
workersImage: "nvcr.io/nvidia/ai-dynamo/trtllm-runtime:0.6.1"
workersImage: "nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.6.1"
autoApply: true
```
......@@ -493,7 +493,7 @@ AssertionError: num_heads <N> should be divisible by tp_size <M> and the divisio
```yaml
profilingConfig:
profilerImage: "nvcr.io/nvidia/ai-dynamo/trtllm-runtime:0.6.1"
profilerImage: "nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.6.1"
config:
hardware:
max_num_gpus_per_engine: 4 # For Qwen3-0.6B (16 heads / 4 = max TP of 4)
......
......@@ -89,7 +89,7 @@ resources:
```yaml
extraPodSpec:
mainContainer:
image: my-registry/trtllm-runtime:my-tag
image: my-registry/tensorrtllm-runtime:my-tag
workingDir: /workspace/examples/backends/trtllm
args:
- "python3"
......@@ -109,7 +109,7 @@ Before using these templates, ensure you have:
### Container Images
The deployment files currently require access to `my-registry/trtllm-runtime`. If you don't have access, build and push your own image:
The deployment files currently require access to `my-registry/tensorrtllm-runtime`. If you don't have access, build and push your own image:
```bash
./container/build.sh --framework tensorrtllm
......@@ -141,7 +141,7 @@ Edit the template to match your environment:
```yaml
# Update image registry and tag
image: my-registry/trtllm-runtime:my-tag
image: my-registry/tensorrtllm-runtime:my-tag
# Configure your model and deployment settings
args:
......
......@@ -13,7 +13,7 @@ spec:
replicas: 1
extraPodSpec:
mainContainer:
image: my-registry/trtllm-runtime:my-tag
image: my-registry/tensorrtllm-runtime:my-tag
TRTLLMWorker:
envFromSecret: hf-token-secret
dynamoNamespace: trtllm-agg
......@@ -24,7 +24,7 @@ spec:
gpu: "1"
extraPodSpec:
mainContainer:
image: my-registry/trtllm-runtime:my-tag
image: my-registry/tensorrtllm-runtime:my-tag
workingDir: /workspace/
command:
- python3
......
......@@ -13,7 +13,7 @@ spec:
replicas: 1
extraPodSpec:
mainContainer:
image: my-registry/trtllm-runtime:my-tag
image: my-registry/tensorrtllm-runtime:my-tag
envs:
- name: DYN_ROUTER_MODE
value: kv
......@@ -27,7 +27,7 @@ spec:
gpu: "1"
extraPodSpec:
mainContainer:
image: my-registry/trtllm-runtime:my-tag
image: my-registry/tensorrtllm-runtime:my-tag
workingDir: /workspace/
command:
- python3
......
......@@ -95,7 +95,7 @@ spec:
replicas: 1
extraPodSpec:
mainContainer:
image: my-registry/trtllm-runtime:my-tag
image: my-registry/tensorrtllm-runtime:my-tag
workingDir: /workspace/examples/backends/trtllm
command:
- python3
......@@ -127,7 +127,7 @@ spec:
- name: nvidia-config
mountPath: /workspace/
readOnly: true
image: my-registry/trtllm-runtime:my-tag
image: my-registry/tensorrtllm-runtime:my-tag
workingDir: /workspace/
command:
- python3
......@@ -165,7 +165,7 @@ spec:
- name: nvidia-config
mountPath: /workspace/
readOnly: true
image: my-registry/trtllm-runtime:my-tag
image: my-registry/tensorrtllm-runtime:my-tag
workingDir: /workspace/
command:
- python3
......
......@@ -13,7 +13,7 @@ spec:
replicas: 1
extraPodSpec:
mainContainer:
image: my-registry/trtllm-runtime:my-tag
image: my-registry/tensorrtllm-runtime:my-tag
TRTLLMPrefillWorker:
dynamoNamespace: trtllm-disagg
envFromSecret: hf-token-secret
......@@ -25,7 +25,7 @@ spec:
gpu: "1"
extraPodSpec:
mainContainer:
image: my-registry/trtllm-runtime:my-tag
image: my-registry/tensorrtllm-runtime:my-tag
workingDir: /workspace/
command:
- python3
......@@ -51,7 +51,7 @@ spec:
gpu: "1"
extraPodSpec:
mainContainer:
image: my-registry/trtllm-runtime:my-tag
image: my-registry/tensorrtllm-runtime:my-tag
workingDir: /workspace/
command:
- python3
......
......@@ -13,7 +13,7 @@ spec:
replicas: 1
extraPodSpec:
mainContainer:
image: my-registry/trtllm-runtime:my-tag
image: my-registry/tensorrtllm-runtime:my-tag
workingDir: /workspace/examples/backends/trtllm
command:
- python3
......@@ -38,7 +38,7 @@ spec:
replicas: 1
extraPodSpec:
mainContainer:
image: my-registry/trtllm-runtime:my-tag
image: my-registry/tensorrtllm-runtime:my-tag
workingDir: /workspace/components/src/dynamo/planner
ports:
- name: metrics
......@@ -89,7 +89,7 @@ spec:
extraPodSpec:
terminationGracePeriodSeconds: 600
mainContainer:
image: my-registry/trtllm-runtime:my-tag
image: my-registry/tensorrtllm-runtime:my-tag
workingDir: /workspace/
command:
- python3
......@@ -116,7 +116,7 @@ spec:
extraPodSpec:
terminationGracePeriodSeconds: 600
mainContainer:
image: my-registry/trtllm-runtime:my-tag
image: my-registry/tensorrtllm-runtime:my-tag
workingDir: /workspace/
command:
- python3
......
......@@ -13,7 +13,7 @@ spec:
replicas: 1
extraPodSpec:
mainContainer:
image: my-registry/trtllm-runtime:my-tag
image: my-registry/tensorrtllm-runtime:my-tag
envs:
- name: DYN_ROUTER_MODE
value: kv
......@@ -27,7 +27,7 @@ spec:
gpu: "1"
extraPodSpec:
mainContainer:
image: my-registry/trtllm-runtime:my-tag
image: my-registry/tensorrtllm-runtime:my-tag
workingDir: /workspace/
command:
- python3
......@@ -53,7 +53,7 @@ spec:
gpu: "1"
extraPodSpec:
mainContainer:
image: my-registry/trtllm-runtime:my-tag
image: my-registry/tensorrtllm-runtime:my-tag
workingDir: /workspace/
command:
- python3
......
......@@ -74,7 +74,7 @@ Ensure your Kubernetes cluster has:
Ensure access to NVIDIA container registry for runtime images:
- `nvcr.io/nvidia/ai-dynamo/vllm-runtime:x.y.z`
- `nvcr.io/nvidia/ai-dynamo/trtllm-runtime:x.y.z`
- `nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:x.y.z`
- `nvcr.io/nvidia/ai-dynamo/sglang-runtime:x.y.z`
### 5. HuggingFace Access and Kubernetes Secret Creation
......
......@@ -46,7 +46,7 @@ spec:
command:
- /bin/sh
- -c
image: my-registry/trtllm-runtime:my-tag
image: my-registry/tensorrtllm-runtime:my-tag
replicas: 1
TrtllmWorker:
componentType: main
......@@ -81,7 +81,7 @@ spec:
command:
- /bin/sh
- -c
image: my-registry/trtllm-runtime:my-tag
image: my-registry/tensorrtllm-runtime:my-tag
env:
- name: TRTLLM_ENABLE_PDL
value: "1"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment