Unverified Commit a4eb4e8a authored by Harrison Saturley-Hall's avatar Harrison Saturley-Hall Committed by GitHub
Browse files

fix: misnamed tensorrtllm-runtime image and incorrect tag (#4289)


Signed-off-by: default avatarHarrison King Saturley-Hall <hsaturleyhal@nvidia.com>
Signed-off-by: default avatarHarrison Saturley-Hall <harrison.saturley.hall@gmail.com>
Co-authored-by: default avatarhhzhang16 <54051230+hhzhang16@users.noreply.github.com>
parent dce20d06
...@@ -12,7 +12,7 @@ spec: ...@@ -12,7 +12,7 @@ spec:
# ProfilingConfig maps directly to the profile_sla.py config format # ProfilingConfig maps directly to the profile_sla.py config format
profilingConfig: profilingConfig:
profilerImage: "nvcr.io/nvidian/dynamo-dev/vllm-runtime:dep-554.0" profilerImage: "nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag"
config: config:
# Sweep/profiling configuration # Sweep/profiling configuration
sweep: sweep:
...@@ -31,7 +31,7 @@ spec: ...@@ -31,7 +31,7 @@ spec:
# Deployment overrides for the auto-created DGD # Deployment overrides for the auto-created DGD
deploymentOverrides: deploymentOverrides:
workersImage: "nvcr.io/nvidian/dynamo-dev/trtllm-runtime:dep-554.0" workersImage: "nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:my-tag"
# Automatically create DynamoGraphDeployment after profiling # Automatically create DynamoGraphDeployment after profiling
autoApply: true autoApply: true
...@@ -12,7 +12,7 @@ spec: ...@@ -12,7 +12,7 @@ spec:
# ProfilingConfig maps directly to the profile_sla.py config format # ProfilingConfig maps directly to the profile_sla.py config format
profilingConfig: profilingConfig:
profilerImage: "nvcr.io/nvidian/dynamo-dev/vllm-runtime:dep-554.0" profilerImage: "nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag"
config: config:
# Sweep/profiling configuration # Sweep/profiling configuration
sweep: sweep:
...@@ -28,7 +28,7 @@ spec: ...@@ -28,7 +28,7 @@ spec:
# Deployment overrides for the auto-created DGD # Deployment overrides for the auto-created DGD
deploymentOverrides: deploymentOverrides:
workersImage: "nvcr.io/nvidian/dynamo-dev/vllm-runtime:dep-554.0" workersImage: "nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag"
# Automatically create DynamoGraphDeployment after profiling # Automatically create DynamoGraphDeployment after profiling
autoApply: true autoApply: true
...@@ -25,7 +25,7 @@ spec: ...@@ -25,7 +25,7 @@ spec:
backend: trtllm backend: trtllm
# ProfilerImage is the container image to use for profiling jobs (required) # ProfilerImage is the container image to use for profiling jobs (required)
profilerImage: "nvcr.io/nvidia/ai-dynamo/trtllm-runtime:0.6.1" profilerImage: "nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.6.1"
# ProfilingConfig maps directly to the profile_sla.py config format # ProfilingConfig maps directly to the profile_sla.py config format
# See benchmarks/profiler/utils/profiler_argparse.py for complete schema # See benchmarks/profiler/utils/profiler_argparse.py for complete schema
......
...@@ -392,7 +392,7 @@ spec: ...@@ -392,7 +392,7 @@ spec:
backend: trtllm backend: trtllm
profilingConfig: profilingConfig:
profilerImage: "nvcr.io/nvidia/ai-dynamo/trtllm-runtime:0.6.1" profilerImage: "nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.6.1"
config: config:
sla: sla:
isl: 4000 isl: 4000
...@@ -409,7 +409,7 @@ spec: ...@@ -409,7 +409,7 @@ spec:
backend_version: "0.20.0" backend_version: "0.20.0"
deploymentOverrides: deploymentOverrides:
workersImage: "nvcr.io/nvidia/ai-dynamo/trtllm-runtime:0.6.1" workersImage: "nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.6.1"
autoApply: true autoApply: true
``` ```
...@@ -493,7 +493,7 @@ AssertionError: num_heads <N> should be divisible by tp_size <M> and the divisio ...@@ -493,7 +493,7 @@ AssertionError: num_heads <N> should be divisible by tp_size <M> and the divisio
```yaml ```yaml
profilingConfig: profilingConfig:
profilerImage: "nvcr.io/nvidia/ai-dynamo/trtllm-runtime:0.6.1" profilerImage: "nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.6.1"
config: config:
hardware: hardware:
max_num_gpus_per_engine: 4 # For Qwen3-0.6B (16 heads / 4 = max TP of 4) max_num_gpus_per_engine: 4 # For Qwen3-0.6B (16 heads / 4 = max TP of 4)
......
...@@ -89,7 +89,7 @@ resources: ...@@ -89,7 +89,7 @@ resources:
```yaml ```yaml
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: my-registry/trtllm-runtime:my-tag image: my-registry/tensorrtllm-runtime:my-tag
workingDir: /workspace/examples/backends/trtllm workingDir: /workspace/examples/backends/trtllm
args: args:
- "python3" - "python3"
...@@ -109,7 +109,7 @@ Before using these templates, ensure you have: ...@@ -109,7 +109,7 @@ Before using these templates, ensure you have:
### Container Images ### Container Images
The deployment files currently require access to `my-registry/trtllm-runtime`. If you don't have access, build and push your own image: The deployment files currently require access to `my-registry/tensorrtllm-runtime`. If you don't have access, build and push your own image:
```bash ```bash
./container/build.sh --framework tensorrtllm ./container/build.sh --framework tensorrtllm
...@@ -141,7 +141,7 @@ Edit the template to match your environment: ...@@ -141,7 +141,7 @@ Edit the template to match your environment:
```yaml ```yaml
# Update image registry and tag # Update image registry and tag
image: my-registry/trtllm-runtime:my-tag image: my-registry/tensorrtllm-runtime:my-tag
# Configure your model and deployment settings # Configure your model and deployment settings
args: args:
......
...@@ -13,7 +13,7 @@ spec: ...@@ -13,7 +13,7 @@ spec:
replicas: 1 replicas: 1
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: my-registry/trtllm-runtime:my-tag image: my-registry/tensorrtllm-runtime:my-tag
TRTLLMWorker: TRTLLMWorker:
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
dynamoNamespace: trtllm-agg dynamoNamespace: trtllm-agg
...@@ -24,7 +24,7 @@ spec: ...@@ -24,7 +24,7 @@ spec:
gpu: "1" gpu: "1"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: my-registry/trtllm-runtime:my-tag image: my-registry/tensorrtllm-runtime:my-tag
workingDir: /workspace/ workingDir: /workspace/
command: command:
- python3 - python3
......
...@@ -13,7 +13,7 @@ spec: ...@@ -13,7 +13,7 @@ spec:
replicas: 1 replicas: 1
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: my-registry/trtllm-runtime:my-tag image: my-registry/tensorrtllm-runtime:my-tag
envs: envs:
- name: DYN_ROUTER_MODE - name: DYN_ROUTER_MODE
value: kv value: kv
...@@ -27,7 +27,7 @@ spec: ...@@ -27,7 +27,7 @@ spec:
gpu: "1" gpu: "1"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: my-registry/trtllm-runtime:my-tag image: my-registry/tensorrtllm-runtime:my-tag
workingDir: /workspace/ workingDir: /workspace/
command: command:
- python3 - python3
......
...@@ -95,7 +95,7 @@ spec: ...@@ -95,7 +95,7 @@ spec:
replicas: 1 replicas: 1
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: my-registry/trtllm-runtime:my-tag image: my-registry/tensorrtllm-runtime:my-tag
workingDir: /workspace/examples/backends/trtllm workingDir: /workspace/examples/backends/trtllm
command: command:
- python3 - python3
...@@ -127,7 +127,7 @@ spec: ...@@ -127,7 +127,7 @@ spec:
- name: nvidia-config - name: nvidia-config
mountPath: /workspace/ mountPath: /workspace/
readOnly: true readOnly: true
image: my-registry/trtllm-runtime:my-tag image: my-registry/tensorrtllm-runtime:my-tag
workingDir: /workspace/ workingDir: /workspace/
command: command:
- python3 - python3
...@@ -165,7 +165,7 @@ spec: ...@@ -165,7 +165,7 @@ spec:
- name: nvidia-config - name: nvidia-config
mountPath: /workspace/ mountPath: /workspace/
readOnly: true readOnly: true
image: my-registry/trtllm-runtime:my-tag image: my-registry/tensorrtllm-runtime:my-tag
workingDir: /workspace/ workingDir: /workspace/
command: command:
- python3 - python3
......
...@@ -13,7 +13,7 @@ spec: ...@@ -13,7 +13,7 @@ spec:
replicas: 1 replicas: 1
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: my-registry/trtllm-runtime:my-tag image: my-registry/tensorrtllm-runtime:my-tag
TRTLLMPrefillWorker: TRTLLMPrefillWorker:
dynamoNamespace: trtllm-disagg dynamoNamespace: trtllm-disagg
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
...@@ -25,7 +25,7 @@ spec: ...@@ -25,7 +25,7 @@ spec:
gpu: "1" gpu: "1"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: my-registry/trtllm-runtime:my-tag image: my-registry/tensorrtllm-runtime:my-tag
workingDir: /workspace/ workingDir: /workspace/
command: command:
- python3 - python3
...@@ -51,7 +51,7 @@ spec: ...@@ -51,7 +51,7 @@ spec:
gpu: "1" gpu: "1"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: my-registry/trtllm-runtime:my-tag image: my-registry/tensorrtllm-runtime:my-tag
workingDir: /workspace/ workingDir: /workspace/
command: command:
- python3 - python3
......
...@@ -13,7 +13,7 @@ spec: ...@@ -13,7 +13,7 @@ spec:
replicas: 1 replicas: 1
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: my-registry/trtllm-runtime:my-tag image: my-registry/tensorrtllm-runtime:my-tag
workingDir: /workspace/examples/backends/trtllm workingDir: /workspace/examples/backends/trtllm
command: command:
- python3 - python3
...@@ -38,7 +38,7 @@ spec: ...@@ -38,7 +38,7 @@ spec:
replicas: 1 replicas: 1
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: my-registry/trtllm-runtime:my-tag image: my-registry/tensorrtllm-runtime:my-tag
workingDir: /workspace/components/src/dynamo/planner workingDir: /workspace/components/src/dynamo/planner
ports: ports:
- name: metrics - name: metrics
...@@ -89,7 +89,7 @@ spec: ...@@ -89,7 +89,7 @@ spec:
extraPodSpec: extraPodSpec:
terminationGracePeriodSeconds: 600 terminationGracePeriodSeconds: 600
mainContainer: mainContainer:
image: my-registry/trtllm-runtime:my-tag image: my-registry/tensorrtllm-runtime:my-tag
workingDir: /workspace/ workingDir: /workspace/
command: command:
- python3 - python3
...@@ -116,7 +116,7 @@ spec: ...@@ -116,7 +116,7 @@ spec:
extraPodSpec: extraPodSpec:
terminationGracePeriodSeconds: 600 terminationGracePeriodSeconds: 600
mainContainer: mainContainer:
image: my-registry/trtllm-runtime:my-tag image: my-registry/tensorrtllm-runtime:my-tag
workingDir: /workspace/ workingDir: /workspace/
command: command:
- python3 - python3
......
...@@ -13,7 +13,7 @@ spec: ...@@ -13,7 +13,7 @@ spec:
replicas: 1 replicas: 1
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: my-registry/trtllm-runtime:my-tag image: my-registry/tensorrtllm-runtime:my-tag
envs: envs:
- name: DYN_ROUTER_MODE - name: DYN_ROUTER_MODE
value: kv value: kv
...@@ -27,7 +27,7 @@ spec: ...@@ -27,7 +27,7 @@ spec:
gpu: "1" gpu: "1"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: my-registry/trtllm-runtime:my-tag image: my-registry/tensorrtllm-runtime:my-tag
workingDir: /workspace/ workingDir: /workspace/
command: command:
- python3 - python3
...@@ -53,7 +53,7 @@ spec: ...@@ -53,7 +53,7 @@ spec:
gpu: "1" gpu: "1"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: my-registry/trtllm-runtime:my-tag image: my-registry/tensorrtllm-runtime:my-tag
workingDir: /workspace/ workingDir: /workspace/
command: command:
- python3 - python3
......
...@@ -74,7 +74,7 @@ Ensure your Kubernetes cluster has: ...@@ -74,7 +74,7 @@ Ensure your Kubernetes cluster has:
Ensure access to NVIDIA container registry for runtime images: Ensure access to NVIDIA container registry for runtime images:
- `nvcr.io/nvidia/ai-dynamo/vllm-runtime:x.y.z` - `nvcr.io/nvidia/ai-dynamo/vllm-runtime:x.y.z`
- `nvcr.io/nvidia/ai-dynamo/trtllm-runtime:x.y.z` - `nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:x.y.z`
- `nvcr.io/nvidia/ai-dynamo/sglang-runtime:x.y.z` - `nvcr.io/nvidia/ai-dynamo/sglang-runtime:x.y.z`
### 5. HuggingFace Access and Kubernetes Secret Creation ### 5. HuggingFace Access and Kubernetes Secret Creation
......
...@@ -46,7 +46,7 @@ spec: ...@@ -46,7 +46,7 @@ spec:
command: command:
- /bin/sh - /bin/sh
- -c - -c
image: my-registry/trtllm-runtime:my-tag image: my-registry/tensorrtllm-runtime:my-tag
replicas: 1 replicas: 1
TrtllmWorker: TrtllmWorker:
componentType: main componentType: main
...@@ -81,7 +81,7 @@ spec: ...@@ -81,7 +81,7 @@ spec:
command: command:
- /bin/sh - /bin/sh
- -c - -c
image: my-registry/trtllm-runtime:my-tag image: my-registry/tensorrtllm-runtime:my-tag
env: env:
- name: TRTLLM_ENABLE_PDL - name: TRTLLM_ENABLE_PDL
value: "1" value: "1"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment