Unverified Commit 9e8f67ed authored by Harrison Saturley-Hall's avatar Harrison Saturley-Hall Committed by GitHub
Browse files

fix: update the tags for consistency and remove 0.4.1 refs (#3058)


Signed-off-by: default avatarHarrison King Saturley-Hall <hsaturleyhal@nvidia.com>
Signed-off-by: default avatarHarrison Saturley-Hall <hsaturleyhal@nvidia.com>
parent 158435cd
...@@ -398,7 +398,7 @@ The benchmark job is configured directly in the YAML file. ...@@ -398,7 +398,7 @@ The benchmark job is configured directly in the YAML file.
- **Model**: `Qwen/Qwen3-0.6B` - **Model**: `Qwen/Qwen3-0.6B`
- **Service**: `qwen-vllm-agg=vllm-agg-frontend:8000` - **Service**: `qwen-vllm-agg=vllm-agg-frontend:8000`
- **Docker Image**: `nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.5.0` - **Docker Image**: `nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag`
### Customizing the Job ### Customizing the Job
......
...@@ -151,7 +151,7 @@ spec: ...@@ -151,7 +151,7 @@ spec:
1. **Set the container image:** 1. **Set the container image:**
```bash ```bash
export DOCKER_IMAGE=nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1 # or any existing image tag (TODO: update to 0.5.0 upon release as profiling with 0.4.1 is broken) export DOCKER_IMAGE=nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
``` ```
2. **Set the config path for the profiling job:** 2. **Set the config path for the profiling job:**
......
...@@ -38,7 +38,7 @@ spec: ...@@ -38,7 +38,7 @@ spec:
memory: "2Gi" memory: "2Gi"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.5.0 image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
command: command:
- /bin/sh - /bin/sh
...@@ -95,7 +95,7 @@ spec: ...@@ -95,7 +95,7 @@ spec:
port: 9090 port: 9090
periodSeconds: 10 periodSeconds: 10
failureThreshold: 60 failureThreshold: 60
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.5.0 image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
volumeMounts: volumeMounts:
- name: local-model-cache - name: local-model-cache
mountPath: /root/.cache mountPath: /root/.cache
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
"containerDefinitions": [ "containerDefinitions": [
{ {
"name": "dynamo-vllm-frontend", "name": "dynamo-vllm-frontend",
"image": "nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.0", "image": "nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag",
"repositoryCredentials": { "repositoryCredentials": {
"credentialsParameter": "arn:aws:secretsmanager:us-east-2:AWS_ID:secret:ngc_nvcr_access" "credentialsParameter": "arn:aws:secretsmanager:us-east-2:AWS_ID:secret:ngc_nvcr_access"
}, },
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
"containerDefinitions": [ "containerDefinitions": [
{ {
"name": "dynamo-prefill", "name": "dynamo-prefill",
"image": "nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.0", "image": "nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag",
"repositoryCredentials": { "repositoryCredentials": {
"credentialsParameter": "arn:aws:secretsmanager:us-east-2:AWS_ID:secret:ngc_access" "credentialsParameter": "arn:aws:secretsmanager:us-east-2:AWS_ID:secret:ngc_access"
}, },
......
...@@ -20,7 +20,7 @@ spec: ...@@ -20,7 +20,7 @@ spec:
periodSeconds: 10 periodSeconds: 10
timeoutSeconds: 1800 timeoutSeconds: 1800
failureThreshold: 60 failureThreshold: 60
image: nvcr.io/nvidian/dynamo-dev/dynamo-sglang-wideep-runtime:hzhou-0917-01 image: my-registry/sglang-wideep-runtime:my-tag
decode: decode:
dynamoNamespace: sgl-dsr1-16gpu dynamoNamespace: sgl-dsr1-16gpu
componentType: worker componentType: worker
...@@ -45,7 +45,7 @@ spec: ...@@ -45,7 +45,7 @@ spec:
periodSeconds: 10 periodSeconds: 10
timeoutSeconds: 1800 timeoutSeconds: 1800
failureThreshold: 60 failureThreshold: 60
image: nvcr.io/nvidian/dynamo-dev/dynamo-sglang-wideep-runtime:hzhou-0917-01 image: my-registry/sglang-wideep-runtime:my-tag
workingDir: /workspace/components/backends/sglang workingDir: /workspace/components/backends/sglang
command: command:
- /bin/sh - /bin/sh
...@@ -89,7 +89,7 @@ spec: ...@@ -89,7 +89,7 @@ spec:
periodSeconds: 10 periodSeconds: 10
timeoutSeconds: 1800 timeoutSeconds: 1800
failureThreshold: 60 failureThreshold: 60
image: nvcr.io/nvidian/dynamo-dev/dynamo-sglang-wideep-runtime:hzhou-0917-01 image: my-registry/sglang-wideep-runtime:my-tag
workingDir: /workspace/components/backends/sglang workingDir: /workspace/components/backends/sglang
command: command:
- /bin/sh - /bin/sh
......
...@@ -20,7 +20,7 @@ spec: ...@@ -20,7 +20,7 @@ spec:
periodSeconds: 10 periodSeconds: 10
timeoutSeconds: 1800 timeoutSeconds: 1800
failureThreshold: 60 failureThreshold: 60
image: nvcr.io/nvidian/dynamo-dev/dynamo-sglang-wideep-runtime:hzhou-0917-01 image: my-registry/sglang-wideep-runtime:my-tag
decode: decode:
dynamoNamespace: sgl-dsr1-8gpu dynamoNamespace: sgl-dsr1-8gpu
componentType: worker componentType: worker
...@@ -43,7 +43,7 @@ spec: ...@@ -43,7 +43,7 @@ spec:
periodSeconds: 10 periodSeconds: 10
timeoutSeconds: 1800 timeoutSeconds: 1800
failureThreshold: 60 failureThreshold: 60
image: nvcr.io/nvidian/dynamo-dev/dynamo-sglang-wideep-runtime:hzhou-0917-01 image: my-registry/sglang-wideep-runtime:my-tag
workingDir: /workspace/components/backends/sglang workingDir: /workspace/components/backends/sglang
command: command:
- /bin/sh - /bin/sh
...@@ -84,7 +84,7 @@ spec: ...@@ -84,7 +84,7 @@ spec:
periodSeconds: 10 periodSeconds: 10
timeoutSeconds: 1800 timeoutSeconds: 1800
failureThreshold: 60 failureThreshold: 60
image: nvcr.io/nvidian/dynamo-dev/dynamo-sglang-wideep-runtime:hzhou-0917-01 image: my-registry/sglang-wideep-runtime:my-tag
workingDir: /workspace/components/backends/sglang workingDir: /workspace/components/backends/sglang
command: command:
- /bin/sh - /bin/sh
......
...@@ -16,7 +16,7 @@ spec: ...@@ -16,7 +16,7 @@ spec:
restartPolicy: Never restartPolicy: Never
containers: containers:
- name: perf - name: perf
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:aiperf-0637181 image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
env: env:
- name: TARGET_MODEL - name: TARGET_MODEL
......
...@@ -56,7 +56,7 @@ spec: ...@@ -56,7 +56,7 @@ spec:
command: command:
- /bin/sh - /bin/sh
- -c - -c
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:gpt-oss-dynamo-nvl72-debug-trtllm-tot image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/trtllm workingDir: /workspace/components/backends/trtllm
replicas: 1 replicas: 1
resources: resources:
......
...@@ -16,7 +16,7 @@ spec: ...@@ -16,7 +16,7 @@ spec:
mountPoint: /root/.cache/huggingface mountPoint: /root/.cache/huggingface
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1 image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
replicas: 1 replicas: 1
VllmPrefillWorker: VllmPrefillWorker:
...@@ -36,7 +36,7 @@ spec: ...@@ -36,7 +36,7 @@ spec:
command: command:
- /bin/sh - /bin/sh
- -c - -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1 image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
replicas: 1 replicas: 1
resources: resources:
......
...@@ -16,7 +16,7 @@ spec: ...@@ -16,7 +16,7 @@ spec:
restartPolicy: Never restartPolicy: Never
containers: containers:
- name: perf - name: perf
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1 image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
command: command:
- /bin/sh - /bin/sh
......
...@@ -16,7 +16,7 @@ spec: ...@@ -16,7 +16,7 @@ spec:
mountPoint: /root/.cache/huggingface mountPoint: /root/.cache/huggingface
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1 image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
replicas: 1 replicas: 1
VllmPrefillWorker: VllmPrefillWorker:
...@@ -36,7 +36,7 @@ spec: ...@@ -36,7 +36,7 @@ spec:
command: command:
- /bin/sh - /bin/sh
- -c - -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1 image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
replicas: 1 replicas: 1
resources: resources:
...@@ -61,7 +61,7 @@ spec: ...@@ -61,7 +61,7 @@ spec:
command: command:
- /bin/sh - /bin/sh
- -c - -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1 image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
replicas: 1 replicas: 1
resources: resources:
......
...@@ -16,7 +16,7 @@ spec: ...@@ -16,7 +16,7 @@ spec:
restartPolicy: Never restartPolicy: Never
containers: containers:
- name: perf - name: perf
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1 image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
command: command:
- /bin/sh - /bin/sh
......
...@@ -16,7 +16,7 @@ spec: ...@@ -16,7 +16,7 @@ spec:
mountPoint: /root/.cache/huggingface mountPoint: /root/.cache/huggingface
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1 image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
replicas: 1 replicas: 1
VllmPrefillWorker: VllmPrefillWorker:
...@@ -46,7 +46,7 @@ spec: ...@@ -46,7 +46,7 @@ spec:
command: command:
- /bin/sh - /bin/sh
- -c - -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1 image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
replicas: 2 replicas: 2
resources: resources:
...@@ -81,7 +81,7 @@ spec: ...@@ -81,7 +81,7 @@ spec:
command: command:
- /bin/sh - /bin/sh
- -c - -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1 image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
replicas: 1 replicas: 1
resources: resources:
......
...@@ -16,7 +16,7 @@ spec: ...@@ -16,7 +16,7 @@ spec:
restartPolicy: Never restartPolicy: Never
containers: containers:
- name: perf - name: perf
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1 image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
command: command:
- /bin/sh - /bin/sh
......
...@@ -38,7 +38,7 @@ spec: ...@@ -38,7 +38,7 @@ spec:
memory: "100Gi" memory: "100Gi"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0825-01 image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
command: command:
- /bin/sh - /bin/sh
...@@ -88,7 +88,7 @@ spec: ...@@ -88,7 +88,7 @@ spec:
port: 9090 port: 9090
periodSeconds: 10 periodSeconds: 10
failureThreshold: 60 failureThreshold: 60
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0825-01 image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
command: command:
- /bin/sh - /bin/sh
......
...@@ -38,7 +38,7 @@ spec: ...@@ -38,7 +38,7 @@ spec:
memory: "100Gi" memory: "100Gi"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0902-01 image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
command: command:
- /bin/sh - /bin/sh
...@@ -88,7 +88,7 @@ spec: ...@@ -88,7 +88,7 @@ spec:
port: 9090 port: 9090
periodSeconds: 10 periodSeconds: 10
failureThreshold: 60 failureThreshold: 60
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0902-01 image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
command: command:
- /bin/sh - /bin/sh
...@@ -138,7 +138,7 @@ spec: ...@@ -138,7 +138,7 @@ spec:
port: 9090 port: 9090
periodSeconds: 10 periodSeconds: 10
failureThreshold: 60 failureThreshold: 60
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0902-01 image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
command: command:
- /bin/sh - /bin/sh
......
...@@ -38,7 +38,7 @@ spec: ...@@ -38,7 +38,7 @@ spec:
memory: "100Gi" memory: "100Gi"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0902-01 image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
command: command:
- /bin/sh - /bin/sh
...@@ -88,7 +88,7 @@ spec: ...@@ -88,7 +88,7 @@ spec:
port: 9090 port: 9090
periodSeconds: 10 periodSeconds: 10
failureThreshold: 60 failureThreshold: 60
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0902-01 image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
command: command:
- /bin/sh - /bin/sh
...@@ -138,7 +138,7 @@ spec: ...@@ -138,7 +138,7 @@ spec:
port: 9090 port: 9090
periodSeconds: 10 periodSeconds: 10
failureThreshold: 60 failureThreshold: 60
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0902-01 image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
command: command:
- /bin/sh - /bin/sh
......
...@@ -43,7 +43,7 @@ spec: ...@@ -43,7 +43,7 @@ spec:
memory: "100Gi" memory: "100Gi"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0902-01 image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
command: command:
- /bin/sh - /bin/sh
...@@ -79,7 +79,7 @@ spec: ...@@ -79,7 +79,7 @@ spec:
failureThreshold: 10 failureThreshold: 10
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0902-01 image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/planner/src/dynamo/planner workingDir: /workspace/components/planner/src/dynamo/planner
ports: ports:
- name: metrics - name: metrics
...@@ -128,7 +128,7 @@ spec: ...@@ -128,7 +128,7 @@ spec:
failureThreshold: 10 failureThreshold: 10
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0902-01 image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
command: command:
- /bin/sh - /bin/sh
...@@ -179,7 +179,7 @@ spec: ...@@ -179,7 +179,7 @@ spec:
port: 9090 port: 9090
periodSeconds: 10 periodSeconds: 10
failureThreshold: 60 failureThreshold: 60
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0902-01 image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
command: command:
- python3 - python3
...@@ -235,7 +235,7 @@ spec: ...@@ -235,7 +235,7 @@ spec:
port: 9090 port: 9090
periodSeconds: 10 periodSeconds: 10
failureThreshold: 60 failureThreshold: 60
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0902-01 image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
command: command:
- python3 - python3
......
...@@ -38,7 +38,7 @@ spec: ...@@ -38,7 +38,7 @@ spec:
memory: "100Gi" memory: "100Gi"
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0902-01 image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
command: command:
- /bin/sh - /bin/sh
...@@ -88,7 +88,7 @@ spec: ...@@ -88,7 +88,7 @@ spec:
port: 9090 port: 9090
periodSeconds: 10 periodSeconds: 10
failureThreshold: 60 failureThreshold: 60
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0902-01 image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
command: command:
- /bin/sh - /bin/sh
...@@ -138,7 +138,7 @@ spec: ...@@ -138,7 +138,7 @@ spec:
port: 9090 port: 9090
periodSeconds: 10 periodSeconds: 10
failureThreshold: 60 failureThreshold: 60
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0902-01 image: my-registry/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm workingDir: /workspace/components/backends/vllm
command: command:
- /bin/sh - /bin/sh
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment