Unverified Commit 9e8f67ed authored by Harrison Saturley-Hall's avatar Harrison Saturley-Hall Committed by GitHub
Browse files

fix: update the tags for consistency and remove 0.4.1 refs (#3058)


Signed-off-by: default avatarHarrison King Saturley-Hall <hsaturleyhal@nvidia.com>
Signed-off-by: default avatarHarrison Saturley-Hall <hsaturleyhal@nvidia.com>
parent 158435cd
......@@ -17,7 +17,7 @@ spec:
fsGroup: 1000
containers:
- name: benchmark-runner
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.5.0
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
securityContext:
allowPrivilegeEscalation: false
capabilities:
......
......@@ -18,7 +18,7 @@ spec:
- name: nvcr-imagepullsecret
containers:
- name: nixl-benchmark
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:nixlbench-e42c07a8
image: my-registry/vllm-runtime:nixlbench-e42c07a8
command: ["sh", "-c"]
args:
- "nixlbench -etcd_endpoints http://dynamo-platform-etcd:2379 --target_seg_type VRAM --initiator_seg_type VRAM && sleep infinity"
......
......@@ -130,7 +130,7 @@ uv pip install --prerelease=allow sglang[all]==0.4.9.post6
<summary>Instructions</summary>
```bash
docker pull nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.3.2
docker pull nvcr.io/nvidia/ai-dynamo/sglang-runtime:my-tag
```
</details>
......
......@@ -92,7 +92,7 @@ Edit the template to match your environment:
```yaml
# Update image registry and tag
image: your-registry/sglang-runtime:your-tag
image: my-registry/sglang-runtime:my-tag
# Configure your model
args:
......
......@@ -18,7 +18,7 @@ spec:
replicas: 1
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/sglang-runtime:hzhou-0811-1
image: my-registry/sglang-runtime:my-tag
Planner:
dynamoNamespace: dynamo
envFromSecret: hf-token-secret
......@@ -49,7 +49,7 @@ spec:
mountPoint: /data
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/sglang-runtime:hzhou-0811-1
image: my-registry/sglang-runtime:my-tag
workingDir: /workspace/components/planner/src/dynamo/planner
command:
- /bin/sh
......@@ -89,7 +89,7 @@ spec:
failureThreshold: 10
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/sglang-runtime:hzhou-0811-1
image: my-registry/sglang-runtime:my-tag
workingDir: /workspace/components/backends/sglang
command:
- /bin/sh
......@@ -106,7 +106,7 @@ spec:
gpu: "1"
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/sglang-runtime:hzhou-0811-1
image: my-registry/sglang-runtime:my-tag
workingDir: /workspace/components/backends/sglang
command:
- python3
......@@ -137,7 +137,7 @@ spec:
gpu: "1"
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/sglang-runtime:hzhou-0811-1
image: my-registry/sglang-runtime:my-tag
workingDir: /workspace/components/backends/sglang
command:
- python3
......
......@@ -89,7 +89,7 @@ resources:
```yaml
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17
image: my-registry/trtllm-runtime:my-tag
workingDir: /workspace/components/backends/trtllm
args:
- "python3"
......@@ -109,7 +109,7 @@ Before using these templates, ensure you have:
### Container Images
The deployment files currently require access to `nvcr.io/nvidian/nim-llm-dev/trtllm-runtime`. If you don't have access, build and push your own image:
The deployment files currently require access to `my-registry/trtllm-runtime`. If you don't have access, build and push your own image:
```bash
./container/build.sh --framework tensorrtllm
......@@ -141,7 +141,7 @@ Edit the template to match your environment:
```yaml
# Update image registry and tag
image: your-registry/trtllm-runtime:your-tag
image: my-registry/trtllm-runtime:my-tag
# Configure your model and deployment settings
args:
......
......@@ -34,7 +34,7 @@ spec:
replicas: 1
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:my-tag
TRTLLMWorker:
envFromSecret: hf-token-secret
dynamoNamespace: trtllm-agg
......@@ -50,7 +50,7 @@ spec:
configMap:
name: nvidia-config
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:my-tag
workingDir: /workspace/components/backends/trtllm
# mount the configmap as a volume
volumeMounts:
......
......@@ -13,7 +13,7 @@ spec:
replicas: 1
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17
image: my-registry/trtllm-runtime:my-tag
TRTLLMWorker:
envFromSecret: hf-token-secret
dynamoNamespace: trtllm-agg
......@@ -24,7 +24,7 @@ spec:
gpu: "1"
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17
image: my-registry/trtllm-runtime:my-tag
workingDir: /workspace/components/backends/trtllm
command:
- /bin/sh
......
......@@ -13,7 +13,7 @@ spec:
replicas: 1
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17
image: my-registry/trtllm-runtime:my-tag
envs:
- name: DYN_ROUTER_MODE
value: kv
......@@ -27,7 +27,7 @@ spec:
gpu: "1"
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17
image: my-registry/trtllm-runtime:my-tag
workingDir: /workspace/components/backends/trtllm
command:
- /bin/sh
......
......@@ -13,7 +13,7 @@ spec:
replicas: 1
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17
image: my-registry/trtllm-runtime:my-tag
TRTLLMPrefillWorker:
dynamoNamespace: trtllm-disagg
envFromSecret: hf-token-secret
......@@ -24,7 +24,7 @@ spec:
gpu: "1"
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17
image: my-registry/trtllm-runtime:my-tag
workingDir: /workspace/components/backends/trtllm
command:
- /bin/sh
......@@ -41,7 +41,7 @@ spec:
gpu: "1"
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17
image: my-registry/trtllm-runtime:my-tag
workingDir: /workspace/components/backends/trtllm
command:
- /bin/sh
......
......@@ -18,7 +18,7 @@ spec:
replicas: 1
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/dynamo-dev/dynamo-trtllm-runtime:hzhou-0909-03
image: my-registry/trtllm-runtime:my-tag
workingDir: /workspace/components/backends/trtllm
command:
- python3
......@@ -69,7 +69,7 @@ spec:
mountPoint: /data
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/dynamo-dev/dynamo-trtllm-runtime:hzhou-0909-03
image: my-registry/trtllm-runtime:my-tag
workingDir: /workspace/components/planner/src/dynamo/planner
ports:
- name: metrics
......@@ -114,7 +114,7 @@ spec:
failureThreshold: 10
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/dynamo-dev/dynamo-trtllm-runtime:hzhou-0909-03
image: my-registry/trtllm-runtime:my-tag
workingDir: /workspace/components/backends/trtllm
command:
- python3
......@@ -152,7 +152,7 @@ spec:
port: 9090
periodSeconds: 10
failureThreshold: 60
image: nvcr.io/nvidian/dynamo-dev/dynamo-trtllm-runtime:hzhou-0909-03
image: my-registry/trtllm-runtime:my-tag
workingDir: /workspace/components/backends/trtllm
command:
- python3
......@@ -186,7 +186,7 @@ spec:
port: 9090
periodSeconds: 10
failureThreshold: 60
image: nvcr.io/nvidian/dynamo-dev/dynamo-trtllm-runtime:hzhou-0909-03
image: my-registry/trtllm-runtime:my-tag
workingDir: /workspace/components/backends/trtllm
command:
- python3
......
......@@ -13,7 +13,7 @@ spec:
replicas: 1
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17
image: my-registry/trtllm-runtime:my-tag
envs:
- name: DYN_ROUTER_MODE
value: kv
......@@ -27,7 +27,7 @@ spec:
gpu: "1"
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17
image: my-registry/trtllm-runtime:my-tag
workingDir: /workspace/components/backends/trtllm
command:
- /bin/sh
......@@ -44,7 +44,7 @@ spec:
gpu: "1"
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17
image: my-registry/trtllm-runtime:my-tag
workingDir: /workspace/components/backends/trtllm
command:
- /bin/sh
......
......@@ -116,7 +116,7 @@ Edit the template to match your environment:
```yaml
# Update image registry and tag
image: your-registry/vllm-runtime:your-tag
image: my-registry/vllm-runtime:my-tag
# Configure your model
args:
......
......@@ -13,7 +13,7 @@ spec:
replicas: 1
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
VllmDecodeWorker:
envFromSecret: hf-token-secret
dynamoNamespace: vllm-agg
......@@ -24,7 +24,7 @@ spec:
gpu: "1"
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
......
......@@ -13,7 +13,7 @@ spec:
replicas: 1
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
envs:
- name: DYN_ROUTER_MODE
value: kv
......@@ -27,7 +27,7 @@ spec:
gpu: "1"
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
......
......@@ -13,7 +13,7 @@ spec:
replicas: 1
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
VllmDecodeWorker:
dynamoNamespace: vllm-disagg
envFromSecret: hf-token-secret
......@@ -24,7 +24,7 @@ spec:
gpu: "1"
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
......@@ -41,7 +41,7 @@ spec:
gpu: "1"
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
......
......@@ -20,7 +20,7 @@ spec:
replicas: 1
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
Planner:
dynamoNamespace: vllm-disagg-planner
envFromSecret: hf-token-secret
......@@ -51,7 +51,7 @@ spec:
mountPoint: /data
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/planner/src/dynamo/planner
command:
- /bin/sh
......@@ -91,7 +91,7 @@ spec:
failureThreshold: 10
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
......@@ -114,7 +114,7 @@ spec:
port: 9090
periodSeconds: 10
failureThreshold: 60
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- python3
......@@ -139,7 +139,7 @@ spec:
port: 9090
periodSeconds: 10
failureThreshold: 60
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- python3
......
......@@ -13,7 +13,7 @@ spec:
replicas: 1
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
envs:
- name: DYN_ROUTER_MODE
value: kv
......@@ -27,7 +27,7 @@ spec:
gpu: "1"
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
......@@ -44,7 +44,7 @@ spec:
gpu: "1"
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
......
......@@ -73,7 +73,7 @@ eppAware:
# Container name for the sidecar
name: frontend-router
# Sidecar image
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.4.1
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
# Image pull policy for the sidecar
imagePullPolicy: IfNotPresent
# Command and args for running the frontend in router mode.
......
......@@ -10,7 +10,7 @@ Install a pre-built wheel from PyPI.
source venv/bin/activate
# Install Dynamo from PyPI (choose one backend extra)
uv pip install "ai-dynamo[sglang]==0.4.1" # or [vllm], [trtllm]
uv pip install "ai-dynamo[sglang]==my-tag" # or [vllm], [trtllm]
Pip from source
......@@ -41,4 +41,4 @@ Pull and run prebuilt images from NVIDIA NGC (`nvcr.io`).
docker run --rm -it \
--gpus all \
--network host \
nvcr.io/nvidia/ai-dynamo/sglang-runtime:0.4.1 # or vllm, tensorrtllm
nvcr.io/nvidia/ai-dynamo/sglang-runtime:my-tag # or vllm, tensorrtllm
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment