Unverified Commit c8770464 authored by hhzhang16's avatar hhzhang16 Committed by GitHub
Browse files

feat: normalize dynamo namespace computation (#5231)


Signed-off-by: default avatarHannah Zhang <hannahz@nvidia.com>
parent abd4b5d9
......@@ -8,7 +8,6 @@ metadata:
spec:
services:
Frontend:
dynamoNamespace: vllm-agg-router
componentType: frontend
replicas: 1
extraPodSpec:
......@@ -19,7 +18,6 @@ spec:
value: kv
VllmDecodeWorker:
envFromSecret: hf-token-secret
dynamoNamespace: vllm-agg-router
componentType: worker
replicas: 4
resources:
......
......@@ -8,7 +8,6 @@ metadata:
spec:
services:
Frontend:
dynamoNamespace: vllm-v1-disagg-router
componentType: frontend
replicas: 1
extraPodSpec:
......@@ -18,7 +17,6 @@ spec:
- name: DYN_ROUTER_MODE
value: kv
VllmDecodeWorker:
dynamoNamespace: vllm-v1-disagg-router
envFromSecret: hf-token-secret
componentType: worker
replicas: 1
......@@ -46,7 +44,6 @@ spec:
args:
- python3 -m dynamo.vllm --model meta-llama/Llama-3.1-70B-Instruct -tp 8
VllmPrefillWorker:
dynamoNamespace: vllm-v1-disagg-router
envFromSecret: hf-token-secret
componentType: worker
replicas: 1
......
......@@ -29,7 +29,6 @@ spec:
periodSeconds: 10
timeoutSeconds: 2
failureThreshold: 3
dynamoNamespace: hello-world
componentType: frontend
replicas: 1
resources:
......@@ -68,7 +67,6 @@ spec:
periodSeconds: 60
timeoutSeconds: 30
failureThreshold: 10
dynamoNamespace: hello-world
componentType: worker
replicas: 1
resources:
......
......@@ -7,7 +7,6 @@ metadata:
spec:
services:
Frontend:
dynamoNamespace: sglang-disagg
componentType: frontend
replicas: 1
extraPodSpec:
......@@ -15,7 +14,6 @@ spec:
image: my-registry/sglang-runtime:my-tag
decode:
envFromSecret: hf-token-secret
dynamoNamespace: sglang-disagg
componentType: worker
subComponentType: decode
replicas: 1
......@@ -38,7 +36,6 @@ spec:
exec python3 -m dynamo.sglang --model-path Qwen/Qwen3-0.6B --served-model-name Qwen/Qwen3-0.6B --page-size 16 --tp 1 --trust-remote-code --skip-tokenizer-init --disaggregation-mode decode --disaggregation-transfer-backend nixl --disaggregation-bootstrap-port --disaggregation-bootstrap-port "12345" --host "0.0.0.0"
prefill:
envFromSecret: hf-token-secret
dynamoNamespace: sglang-disagg
componentType: worker
subComponentType: prefill
replicas: 1
......
......@@ -8,14 +8,12 @@ metadata:
spec:
services:
Frontend:
dynamoNamespace: vllm-disagg
componentType: frontend
replicas: 1
extraPodSpec:
mainContainer:
image: my-registry/vllm-runtime:my-tag
VllmDecodeWorker:
dynamoNamespace: vllm-disagg
envFromSecret: hf-token-secret
componentType: worker
subComponentType: decode
......@@ -39,7 +37,6 @@ spec:
/sbin/ldconfig
python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B
VllmPrefillWorker:
dynamoNamespace: vllm-disagg
envFromSecret: hf-token-secret
componentType: worker
subComponentType: prefill
......
......@@ -9,7 +9,6 @@ spec:
backendFramework: vllm
services:
Frontend:
dynamoNamespace: agg-llava
componentType: frontend
replicas: 1
extraPodSpec:
......@@ -17,7 +16,6 @@ spec:
image: my-registry/vllm-runtime:my-tag
EncodeWorker:
envFromSecret: hf-token-secret
dynamoNamespace: agg-llava
componentType: worker
replicas: 1
resources:
......@@ -34,7 +32,6 @@ spec:
- python3 components/encode_worker.py --model llava-hf/llava-1.5-7b-hf
VLMWorker:
envFromSecret: hf-token-secret
dynamoNamespace: agg-llava
componentType: worker
replicas: 1
resources:
......@@ -51,7 +48,6 @@ spec:
- python3 components/worker.py --model llava-hf/llava-1.5-7b-hf --worker-type prefill
Processor:
envFromSecret: hf-token-secret
dynamoNamespace: agg-llava
componentType: worker
replicas: 1
resources:
......
......@@ -9,7 +9,6 @@ spec:
backendFramework: vllm
services:
Frontend:
dynamoNamespace: agg-qwen
componentType: frontend
replicas: 1
extraPodSpec:
......@@ -17,7 +16,6 @@ spec:
image: my-registry/vllm-runtime:my-tag
EncodeWorker:
envFromSecret: hf-token-secret
dynamoNamespace: agg-qwen
componentType: worker
replicas: 1
resources:
......@@ -34,7 +32,6 @@ spec:
- python3 components/encode_worker.py --model Qwen/Qwen2.5-VL-7B-Instruct
VLMWorker:
envFromSecret: hf-token-secret
dynamoNamespace: agg-qwen
componentType: worker
replicas: 1
resources:
......@@ -51,7 +48,6 @@ spec:
- python3 components/worker.py --model Qwen/Qwen2.5-VL-7B-Instruct --worker-type prefill
Processor:
envFromSecret: hf-token-secret
dynamoNamespace: agg-qwen
componentType: worker
replicas: 1
resources:
......
......@@ -14,7 +14,6 @@ spec:
create: false
services:
Frontend:
dynamoNamespace: sgl-dsr1-16gpu
componentType: frontend
replicas: 1
volumeMounts:
......@@ -24,7 +23,6 @@ spec:
mainContainer:
image: my-registry/sglang-runtime:my-tag
decode:
dynamoNamespace: sgl-dsr1-16gpu
componentType: worker
subComponentType: decode
replicas: 1
......@@ -70,7 +68,6 @@ spec:
- 0.0.0.0
- --prefill-round-robin-balance
prefill:
dynamoNamespace: sgl-dsr1-16gpu
componentType: worker
subComponentType: prefill
replicas: 1
......
......@@ -14,7 +14,6 @@ spec:
create: false
services:
Frontend:
dynamoNamespace: sgl-dsr1-8gpu
componentType: frontend
replicas: 1
volumeMounts:
......@@ -24,7 +23,6 @@ spec:
mainContainer:
image: my-registry/sglang-runtime:my-tag
decode:
dynamoNamespace: sgl-dsr1-8gpu
componentType: worker
subComponentType: decode
replicas: 1
......@@ -67,7 +65,6 @@ spec:
- 0.0.0.0
- --prefill-round-robin-balance
prefill:
dynamoNamespace: sgl-dsr1-8gpu
componentType: worker
subComponentType: prefill
replicas: 1
......
......@@ -122,7 +122,6 @@ spec:
backendFramework: trtllm
services:
Frontend:
dynamoNamespace: trtllm-disagg-multinode
componentType: frontend
replicas: 1
extraPodSpec:
......@@ -137,7 +136,6 @@ spec:
- /bin/sh
- -c
prefill:
dynamoNamespace: trtllm-disagg-multinode
componentType: worker
subComponentType: prefill
replicas: 1
......@@ -195,7 +193,6 @@ spec:
configMap:
name: prefill-config
decode:
dynamoNamespace: trtllm-disagg-multinode
componentType: worker
subComponentType: decode
replicas: 1
......
......@@ -12,7 +12,6 @@ spec:
create: false
services:
Frontend:
dynamoNamespace: vllm-dsr1
componentType: frontend
replicas: 1
extraPodSpec:
......@@ -26,7 +25,6 @@ spec:
failureThreshold: 60
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
decode:
dynamoNamespace: vllm-dsr1
componentType: worker
subComponentType: decode
replicas: 1
......@@ -92,7 +90,6 @@ spec:
--max-num-seqs 512 \
--compilation_config '{"pass_config":{"enable_fusion":true,"enable_attn_fusion":true,"enable_noop":true},"custom_ops":["+rms_norm"],"cudagraph_mode":"FULL_DECODE_ONLY"}'
prefill:
dynamoNamespace: vllm-dsr1
componentType: worker
subComponentType: prefill
replicas: 1
......
......@@ -28,7 +28,6 @@ spec:
services:
Frontend:
componentType: frontend
dynamoNamespace: gpt-oss-agg
extraPodSpec:
affinity:
podAntiAffinity:
......@@ -50,7 +49,6 @@ spec:
replicas: 1
TrtllmWorker:
componentType: main
dynamoNamespace: gpt-oss-agg
envFromSecret: hf-token-secret
volumeMounts:
- name: model-cache
......
......@@ -12,7 +12,6 @@ spec:
services:
Frontend:
componentType: frontend
dynamoNamespace: llama3-70b-agg
volumeMounts:
- name: model-cache
mountPoint: /opt/models
......@@ -26,7 +25,6 @@ spec:
replicas: 1
VllmPrefillWorker:
componentType: worker
dynamoNamespace: llama3-70b-agg
envFromSecret: hf-token-secret
volumeMounts:
- name: model-cache
......
......@@ -12,7 +12,6 @@ spec:
services:
Frontend:
componentType: frontend
dynamoNamespace: llama3-70b-disagg-mn
volumeMounts:
- name: model-cache
mountPoint: /opt/models
......@@ -27,7 +26,6 @@ spec:
VllmPrefillWorker:
componentType: worker
subComponentType: prefill
dynamoNamespace: llama3-70b-disagg-mn
envFromSecret: hf-token-secret
volumeMounts:
- name: model-cache
......@@ -59,7 +57,6 @@ spec:
VllmDecodeWorker:
componentType: worker
subComponentType: decode
dynamoNamespace: llama3-70b-disagg-mn
envFromSecret: hf-token-secret
volumeMounts:
- name: model-cache
......
......@@ -12,7 +12,6 @@ spec:
services:
Frontend:
componentType: frontend
dynamoNamespace: llama3-70b-disagg-sn
volumeMounts:
- name: model-cache
mountPoint: /opt/models
......@@ -27,7 +26,6 @@ spec:
VllmPrefillWorker:
componentType: worker
subComponentType: prefill
dynamoNamespace: llama3-70b-disagg-sn
envFromSecret: hf-token-secret
volumeMounts:
- name: model-cache
......@@ -71,7 +69,6 @@ spec:
VllmDecodeWorker:
componentType: worker
subComponentType: decode
dynamoNamespace: llama3-70b-disagg-sn
envFromSecret: hf-token-secret
volumeMounts:
- name: model-cache
......
......@@ -41,7 +41,6 @@ spec:
services:
Frontend:
componentType: frontend
dynamoNamespace: qwen3-235b-a22b-agg
replicas: 1
extraPodSpec:
affinity:
......@@ -63,7 +62,6 @@ spec:
- -c
TrtllmWorker:
componentType: main
dynamoNamespace: qwen3-235b-a22b-agg
envFromSecret: hf-token-secret
sharedMemory:
size: 256Gi
......
......@@ -70,7 +70,6 @@ spec:
services:
Frontend:
componentType: frontend
dynamoNamespace: qwen3-235b-a22b-disagg
replicas: 1
extraPodSpec:
affinity:
......@@ -93,7 +92,6 @@ spec:
TRTLLMPrefillWorker:
componentType: worker
subComponentType: prefill
dynamoNamespace: qwen3-235b-a22b-disagg
envFromSecret: hf-token-secret
replicas: 6
resources:
......@@ -145,7 +143,6 @@ spec:
TRTLLMDecodeWorker:
componentType: worker
subComponentType: decode
dynamoNamespace: qwen3-235b-a22b-disagg
envFromSecret: hf-token-secret
replicas: 1
resources:
......
......@@ -44,7 +44,6 @@ spec:
services:
Frontend:
componentType: frontend
dynamoNamespace: qwen3-32b-fp8-agg
extraPodSpec:
affinity:
podAntiAffinity:
......@@ -66,7 +65,6 @@ spec:
replicas: 1
TrtllmWorker:
componentType: main
dynamoNamespace: qwen3-32b-fp8-agg
envFromSecret: hf-token-secret
volumeMounts:
- name: model-cache
......
......@@ -201,7 +201,6 @@ spec:
services:
Frontend:
componentType: frontend
dynamoNamespace: qwen3-32b-fp8-disagg
extraPodSpec:
affinity:
podAntiAffinity:
......@@ -224,7 +223,6 @@ spec:
TrtllmPrefillWorker:
componentType: worker
subComponentType: prefill
dynamoNamespace: qwen3-32b-fp8-disagg
envFromSecret: hf-token-secret
volumeMounts:
- name: model-cache
......@@ -285,7 +283,6 @@ spec:
TrtllmDecodeWorker:
componentType: worker
subComponentType: decode
dynamoNamespace: qwen3-32b-fp8-disagg
envFromSecret: hf-token-secret
volumeMounts:
- name: model-cache
......
......@@ -13,7 +13,6 @@ spec:
services:
Frontend:
componentType: frontend
dynamoNamespace: agg-8xtp2
envs:
- name: HF_HOME
value: /home/dynamo/.cache/huggingface
......@@ -36,7 +35,6 @@ spec:
subComponentType: null
VllmDecodeWorker:
componentType: worker
dynamoNamespace: agg-8xtp2
envFromSecret: hf-token-secret
volumeMounts:
- name: model-cache
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment