Unverified Commit 9e8f67ed authored by Harrison Saturley-Hall's avatar Harrison Saturley-Hall Committed by GitHub
Browse files

fix: update the tags for consistency and remove 0.4.1 refs (#3058)


Signed-off-by: default avatarHarrison King Saturley-Hall <hsaturleyhal@nvidia.com>
Signed-off-by: default avatarHarrison Saturley-Hall <hsaturleyhal@nvidia.com>
parent 158435cd
......@@ -398,7 +398,7 @@ The benchmark job is configured directly in the YAML file.
- **Model**: `Qwen/Qwen3-0.6B`
- **Service**: `qwen-vllm-agg=vllm-agg-frontend:8000`
- **Docker Image**: `nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.5.0`
- **Docker Image**: `nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag`
### Customizing the Job
......
......@@ -151,7 +151,7 @@ spec:
1. **Set the container image:**
```bash
export DOCKER_IMAGE=nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1 # or any existing image tag (TODO: update to 0.5.0 upon release as profiling with 0.4.1 is broken)
export DOCKER_IMAGE=nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
```
2. **Set the config path for the profiling job:**
......
......@@ -38,7 +38,7 @@ spec:
memory: "2Gi"
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.5.0
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
......@@ -95,7 +95,7 @@ spec:
port: 9090
periodSeconds: 10
failureThreshold: 60
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.5.0
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
volumeMounts:
- name: local-model-cache
mountPath: /root/.cache
......
......@@ -3,7 +3,7 @@
"containerDefinitions": [
{
"name": "dynamo-vllm-frontend",
"image": "nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.0",
"image": "nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag",
"repositoryCredentials": {
"credentialsParameter": "arn:aws:secretsmanager:us-east-2:AWS_ID:secret:ngc_nvcr_access"
},
......
......@@ -3,7 +3,7 @@
"containerDefinitions": [
{
"name": "dynamo-prefill",
"image": "nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.0",
"image": "nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag",
"repositoryCredentials": {
"credentialsParameter": "arn:aws:secretsmanager:us-east-2:AWS_ID:secret:ngc_access"
},
......
......@@ -20,7 +20,7 @@ spec:
periodSeconds: 10
timeoutSeconds: 1800
failureThreshold: 60
image: nvcr.io/nvidian/dynamo-dev/dynamo-sglang-wideep-runtime:hzhou-0917-01
image: my-registry/sglang-wideep-runtime:my-tag
decode:
dynamoNamespace: sgl-dsr1-16gpu
componentType: worker
......@@ -45,7 +45,7 @@ spec:
periodSeconds: 10
timeoutSeconds: 1800
failureThreshold: 60
image: nvcr.io/nvidian/dynamo-dev/dynamo-sglang-wideep-runtime:hzhou-0917-01
image: my-registry/sglang-wideep-runtime:my-tag
workingDir: /workspace/components/backends/sglang
command:
- /bin/sh
......@@ -89,7 +89,7 @@ spec:
periodSeconds: 10
timeoutSeconds: 1800
failureThreshold: 60
image: nvcr.io/nvidian/dynamo-dev/dynamo-sglang-wideep-runtime:hzhou-0917-01
image: my-registry/sglang-wideep-runtime:my-tag
workingDir: /workspace/components/backends/sglang
command:
- /bin/sh
......
......@@ -20,7 +20,7 @@ spec:
periodSeconds: 10
timeoutSeconds: 1800
failureThreshold: 60
image: nvcr.io/nvidian/dynamo-dev/dynamo-sglang-wideep-runtime:hzhou-0917-01
image: my-registry/sglang-wideep-runtime:my-tag
decode:
dynamoNamespace: sgl-dsr1-8gpu
componentType: worker
......@@ -43,7 +43,7 @@ spec:
periodSeconds: 10
timeoutSeconds: 1800
failureThreshold: 60
image: nvcr.io/nvidian/dynamo-dev/dynamo-sglang-wideep-runtime:hzhou-0917-01
image: my-registry/sglang-wideep-runtime:my-tag
workingDir: /workspace/components/backends/sglang
command:
- /bin/sh
......@@ -84,7 +84,7 @@ spec:
periodSeconds: 10
timeoutSeconds: 1800
failureThreshold: 60
image: nvcr.io/nvidian/dynamo-dev/dynamo-sglang-wideep-runtime:hzhou-0917-01
image: my-registry/sglang-wideep-runtime:my-tag
workingDir: /workspace/components/backends/sglang
command:
- /bin/sh
......
......@@ -16,7 +16,7 @@ spec:
restartPolicy: Never
containers:
- name: perf
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:aiperf-0637181
image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
env:
- name: TARGET_MODEL
......
......@@ -56,7 +56,7 @@ spec:
command:
- /bin/sh
- -c
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:gpt-oss-dynamo-nvl72-debug-trtllm-tot
image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/trtllm
replicas: 1
resources:
......
......@@ -16,7 +16,7 @@ spec:
mountPoint: /root/.cache/huggingface
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
replicas: 1
VllmPrefillWorker:
......@@ -36,7 +36,7 @@ spec:
command:
- /bin/sh
- -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
replicas: 1
resources:
......
......@@ -16,7 +16,7 @@ spec:
restartPolicy: Never
containers:
- name: perf
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
......
......@@ -16,7 +16,7 @@ spec:
mountPoint: /root/.cache/huggingface
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
replicas: 1
VllmPrefillWorker:
......@@ -36,7 +36,7 @@ spec:
command:
- /bin/sh
- -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
replicas: 1
resources:
......@@ -61,7 +61,7 @@ spec:
command:
- /bin/sh
- -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
replicas: 1
resources:
......
......@@ -16,7 +16,7 @@ spec:
restartPolicy: Never
containers:
- name: perf
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
......
......@@ -16,7 +16,7 @@ spec:
mountPoint: /root/.cache/huggingface
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
replicas: 1
VllmPrefillWorker:
......@@ -46,7 +46,7 @@ spec:
command:
- /bin/sh
- -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
replicas: 2
resources:
......@@ -81,7 +81,7 @@ spec:
command:
- /bin/sh
- -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
replicas: 1
resources:
......
......@@ -16,7 +16,7 @@ spec:
restartPolicy: Never
containers:
- name: perf
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
......
......@@ -38,7 +38,7 @@ spec:
memory: "100Gi"
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0825-01
image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
......@@ -88,7 +88,7 @@ spec:
port: 9090
periodSeconds: 10
failureThreshold: 60
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0825-01
image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
......
......@@ -38,7 +38,7 @@ spec:
memory: "100Gi"
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0902-01
image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
......@@ -88,7 +88,7 @@ spec:
port: 9090
periodSeconds: 10
failureThreshold: 60
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0902-01
image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
......@@ -138,7 +138,7 @@ spec:
port: 9090
periodSeconds: 10
failureThreshold: 60
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0902-01
image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
......
......@@ -38,7 +38,7 @@ spec:
memory: "100Gi"
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0902-01
image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
......@@ -88,7 +88,7 @@ spec:
port: 9090
periodSeconds: 10
failureThreshold: 60
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0902-01
image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
......@@ -138,7 +138,7 @@ spec:
port: 9090
periodSeconds: 10
failureThreshold: 60
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0902-01
image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
......
......@@ -43,7 +43,7 @@ spec:
memory: "100Gi"
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0902-01
image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
......@@ -79,7 +79,7 @@ spec:
failureThreshold: 10
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0902-01
image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/planner/src/dynamo/planner
ports:
- name: metrics
......@@ -128,7 +128,7 @@ spec:
failureThreshold: 10
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0902-01
image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
......@@ -179,7 +179,7 @@ spec:
port: 9090
periodSeconds: 10
failureThreshold: 60
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0902-01
image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- python3
......@@ -235,7 +235,7 @@ spec:
port: 9090
periodSeconds: 10
failureThreshold: 60
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0902-01
image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- python3
......
......@@ -38,7 +38,7 @@ spec:
memory: "100Gi"
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0902-01
image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
......@@ -88,7 +88,7 @@ spec:
port: 9090
periodSeconds: 10
failureThreshold: 60
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0902-01
image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
......@@ -138,7 +138,7 @@ spec:
port: 9090
periodSeconds: 10
failureThreshold: 60
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0902-01
image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment