"lib/bindings/python/vscode:/vscode.git/clone" did not exist on "c8845b412005ee6e1b1d234fbee31e4d33390abf"
Unverified Commit c8770464 authored by hhzhang16's avatar hhzhang16 Committed by GitHub
Browse files

feat: normalize dynamo namespace computation (#5231)


Signed-off-by: default avatarHannah Zhang <hannahz@nvidia.com>
parent abd4b5d9
......@@ -13,7 +13,6 @@ spec:
services:
Frontend:
componentType: frontend
dynamoNamespace: disagg-router-6p-2d
envs:
- name: HF_HOME
value: /home/dynamo/.cache/huggingface
......@@ -38,7 +37,6 @@ spec:
subComponentType: null
VllmDecodeWorker:
componentType: worker
dynamoNamespace: disagg-router-6p-2d
envFromSecret: hf-token-secret
extraPodSpec:
mainContainer:
......@@ -86,7 +84,6 @@ spec:
useAsCompilationCache: true
VllmPrefillWorker:
componentType: worker
dynamoNamespace: disagg-router-6p-2d
envFromSecret: hf-token-secret
extraPodMetadata:
annotations:
......
......@@ -8,7 +8,6 @@ metadata:
spec:
services:
Frontend:
dynamoNamespace: vllm-moe-agg
componentType: frontend
replicas: 1
extraPodSpec:
......@@ -16,7 +15,6 @@ spec:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
VllmDecodeWorker:
envFromSecret: hf-token-secret
dynamoNamespace: vllm-moe-agg
componentType: worker
replicas: 1
resources:
......
......@@ -8,7 +8,6 @@ metadata:
spec:
services:
Frontend:
dynamoNamespace: vllm-moe-disagg
componentType: frontend
replicas: 1
extraPodSpec:
......@@ -17,7 +16,6 @@ spec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
VllmDecodeWorker:
dynamoNamespace: vllm-moe-disagg
envFromSecret: hf-token-secret
componentType: worker
subComponentType: decode
......@@ -83,7 +81,6 @@ spec:
- --no-enable-prefix-caching
- --enforce-eager
VllmPrefillWorker:
dynamoNamespace: vllm-moe-disagg
envFromSecret: hf-token-secret
componentType: worker
subComponentType: prefill
......
......@@ -8,7 +8,6 @@ metadata:
spec:
services:
Frontend:
dynamoNamespace: vllm-agg
componentType: main
replicas: 1
livenessProbe:
......@@ -46,7 +45,6 @@ spec:
args:
- "python3 -m dynamo.frontend --http-port 8000"
VllmDecodeWorker:
dynamoNamespace: vllm-agg
envFromSecret: hf-token-secret
componentType: worker
replicas: 1
......
......@@ -8,7 +8,6 @@ metadata:
spec:
services:
Frontend:
dynamoNamespace: vllm-disagg
componentType: main
replicas: 1
livenessProbe:
......@@ -46,7 +45,6 @@ spec:
args:
- "python3 -m dynamo.frontend --http-port 8000 --kv-cache-block-size 128"
VllmDecodeWorker:
dynamoNamespace: vllm-disagg
envFromSecret: hf-token-secret
componentType: worker
replicas: 2
......@@ -94,7 +92,6 @@ spec:
args:
- "python3 -m dynamo.vllm --model nvidia/Llama-3.1-8B-Instruct-FP8 --no-enable-prefix-caching --block-size 128 2>&1 | tee /tmp/vllm.log"
VllmPrefillWorker:
dynamoNamespace: vllm-disagg
envFromSecret: hf-token-secret
componentType: worker
replicas: 2
......
......@@ -8,7 +8,6 @@ metadata:
spec:
services:
Frontend:
dynamoNamespace: vllm-disagg
componentType: main
replicas: 1
livenessProbe:
......@@ -46,7 +45,6 @@ spec:
args:
- "python3 -m dynamo.frontend --http-port 8000 --kv-cache-block-size 128"
VllmDecodeWorker:
dynamoNamespace: vllm-disagg
envFromSecret: hf-token-secret
componentType: worker
replicas: 1
......@@ -94,7 +92,6 @@ spec:
args:
- "python3 -m dynamo.vllm --model nvidia/Llama-3.1-8B-Instruct-FP8 --no-enable-prefix-caching --block-size 128 2>&1 | tee /tmp/vllm.log"
VllmPrefillWorker:
dynamoNamespace: vllm-disagg
envFromSecret: hf-token-secret
componentType: worker
replicas: 3
......
......@@ -11,7 +11,6 @@ spec:
value: '{"Prometheus":{"global":{"scrape_interval":"5s"},"scrape_configs":[{"job_name":"prometheus","static_configs":[{"targets":["localhost:8000"]}]},{"job_name":"frontend","static_configs":[{"targets":["vllm-disagg-planner-frontend:8000"]}]}]}}'
services:
Frontend:
dynamoNamespace: vllm-disagg-planner
componentType: main
replicas: 1
livenessProbe:
......@@ -49,7 +48,6 @@ spec:
args:
- "python3 -m dynamo.frontend --http-port 8000 --kv-cache-block-size 128 --router-mode kv --kv-overlap-score-weight 0.0 --router-temperature 0.0 --no-kv-events"
Planner:
dynamoNamespace: vllm-disagg-planner
envFromSecret: hf-token-secret
componentType: planner
replicas: 1
......@@ -94,7 +92,6 @@ spec:
--prometheus-port=9085
--no-correction
VllmDecodeWorker:
dynamoNamespace: vllm-disagg-planner
envFromSecret: hf-token-secret
componentType: worker
subComponentType: decode
......@@ -149,7 +146,6 @@ spec:
- --block-size
- "128"
VllmPrefillWorker:
dynamoNamespace: vllm-disagg-planner
envFromSecret: hf-token-secret
componentType: worker
subComponentType: prefill
......
......@@ -8,7 +8,6 @@ metadata:
spec:
services:
Frontend:
dynamoNamespace: vllm-disagg
componentType: main
replicas: 1
livenessProbe:
......@@ -46,7 +45,6 @@ spec:
args:
- "python3 -m dynamo.frontend --http-port 8000 --kv-cache-block-size 128"
VllmDecodeWorker:
dynamoNamespace: vllm-disagg
envFromSecret: hf-token-secret
componentType: worker
replicas: 1
......@@ -94,7 +92,6 @@ spec:
args:
- "python3 -m dynamo.vllm --model nvidia/Llama-3.1-8B-Instruct-FP8 --no-enable-prefix-caching --block-size 128 --tensor-parallel-size 2 2>&1 | tee /tmp/vllm.log"
VllmPrefillWorker:
dynamoNamespace: vllm-disagg
envFromSecret: hf-token-secret
componentType: worker
replicas: 1
......
......@@ -8,7 +8,6 @@ metadata:
spec:
services:
Frontend:
dynamoNamespace: vllm-disagg
componentType: main
replicas: 1
livenessProbe:
......@@ -46,7 +45,6 @@ spec:
args:
- "python3 -m dynamo.frontend --http-port 8000"
VllmDecodeWorker:
dynamoNamespace: vllm-disagg
envFromSecret: hf-token-secret
componentType: worker
replicas: 1
......@@ -94,7 +92,6 @@ spec:
args:
- "python3 -m dynamo.vllm --model nvidia/Llama-3.1-8B-Instruct-FP8 2>&1 | tee /tmp/vllm.log"
VllmPrefillWorker:
dynamoNamespace: vllm-disagg
envFromSecret: hf-token-secret
componentType: worker
replicas: 1
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment