"examples/vscode:/vscode.git/clone" did not exist on "23de4e86aed70d1bf1d62caddac586285213e6bd"
Unverified Commit c8770464 authored by hhzhang16's avatar hhzhang16 Committed by GitHub
Browse files

feat: normalize dynamo namespace computation (#5231)


Signed-off-by: default avatarHannah Zhang <hannahz@nvidia.com>
parent abd4b5d9
...@@ -8,7 +8,6 @@ metadata: ...@@ -8,7 +8,6 @@ metadata:
spec: spec:
services: services:
Frontend: Frontend:
dynamoNamespace: vllm-agg-router
componentType: frontend componentType: frontend
replicas: 1 replicas: 1
extraPodSpec: extraPodSpec:
...@@ -19,7 +18,6 @@ spec: ...@@ -19,7 +18,6 @@ spec:
value: kv value: kv
VllmDecodeWorker: VllmDecodeWorker:
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
dynamoNamespace: vllm-agg-router
componentType: worker componentType: worker
replicas: 4 replicas: 4
resources: resources:
......
...@@ -8,7 +8,6 @@ metadata: ...@@ -8,7 +8,6 @@ metadata:
spec: spec:
services: services:
Frontend: Frontend:
dynamoNamespace: vllm-v1-disagg-router
componentType: frontend componentType: frontend
replicas: 1 replicas: 1
extraPodSpec: extraPodSpec:
...@@ -18,7 +17,6 @@ spec: ...@@ -18,7 +17,6 @@ spec:
- name: DYN_ROUTER_MODE - name: DYN_ROUTER_MODE
value: kv value: kv
VllmDecodeWorker: VllmDecodeWorker:
dynamoNamespace: vllm-v1-disagg-router
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
componentType: worker componentType: worker
replicas: 1 replicas: 1
...@@ -46,7 +44,6 @@ spec: ...@@ -46,7 +44,6 @@ spec:
args: args:
- python3 -m dynamo.vllm --model meta-llama/Llama-3.1-70B-Instruct -tp 8 - python3 -m dynamo.vllm --model meta-llama/Llama-3.1-70B-Instruct -tp 8
VllmPrefillWorker: VllmPrefillWorker:
dynamoNamespace: vllm-v1-disagg-router
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
componentType: worker componentType: worker
replicas: 1 replicas: 1
......
...@@ -29,7 +29,6 @@ spec: ...@@ -29,7 +29,6 @@ spec:
periodSeconds: 10 periodSeconds: 10
timeoutSeconds: 2 timeoutSeconds: 2
failureThreshold: 3 failureThreshold: 3
dynamoNamespace: hello-world
componentType: frontend componentType: frontend
replicas: 1 replicas: 1
resources: resources:
...@@ -68,7 +67,6 @@ spec: ...@@ -68,7 +67,6 @@ spec:
periodSeconds: 60 periodSeconds: 60
timeoutSeconds: 30 timeoutSeconds: 30
failureThreshold: 10 failureThreshold: 10
dynamoNamespace: hello-world
componentType: worker componentType: worker
replicas: 1 replicas: 1
resources: resources:
......
...@@ -7,7 +7,6 @@ metadata: ...@@ -7,7 +7,6 @@ metadata:
spec: spec:
services: services:
Frontend: Frontend:
dynamoNamespace: sglang-disagg
componentType: frontend componentType: frontend
replicas: 1 replicas: 1
extraPodSpec: extraPodSpec:
...@@ -15,7 +14,6 @@ spec: ...@@ -15,7 +14,6 @@ spec:
image: my-registry/sglang-runtime:my-tag image: my-registry/sglang-runtime:my-tag
decode: decode:
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
dynamoNamespace: sglang-disagg
componentType: worker componentType: worker
subComponentType: decode subComponentType: decode
replicas: 1 replicas: 1
...@@ -38,7 +36,6 @@ spec: ...@@ -38,7 +36,6 @@ spec:
exec python3 -m dynamo.sglang --model-path Qwen/Qwen3-0.6B --served-model-name Qwen/Qwen3-0.6B --page-size 16 --tp 1 --trust-remote-code --skip-tokenizer-init --disaggregation-mode decode --disaggregation-transfer-backend nixl --disaggregation-bootstrap-port --disaggregation-bootstrap-port "12345" --host "0.0.0.0" exec python3 -m dynamo.sglang --model-path Qwen/Qwen3-0.6B --served-model-name Qwen/Qwen3-0.6B --page-size 16 --tp 1 --trust-remote-code --skip-tokenizer-init --disaggregation-mode decode --disaggregation-transfer-backend nixl --disaggregation-bootstrap-port --disaggregation-bootstrap-port "12345" --host "0.0.0.0"
prefill: prefill:
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
dynamoNamespace: sglang-disagg
componentType: worker componentType: worker
subComponentType: prefill subComponentType: prefill
replicas: 1 replicas: 1
......
...@@ -8,14 +8,12 @@ metadata: ...@@ -8,14 +8,12 @@ metadata:
spec: spec:
services: services:
Frontend: Frontend:
dynamoNamespace: vllm-disagg
componentType: frontend componentType: frontend
replicas: 1 replicas: 1
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: my-registry/vllm-runtime:my-tag image: my-registry/vllm-runtime:my-tag
VllmDecodeWorker: VllmDecodeWorker:
dynamoNamespace: vllm-disagg
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
componentType: worker componentType: worker
subComponentType: decode subComponentType: decode
...@@ -39,7 +37,6 @@ spec: ...@@ -39,7 +37,6 @@ spec:
/sbin/ldconfig /sbin/ldconfig
python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B
VllmPrefillWorker: VllmPrefillWorker:
dynamoNamespace: vllm-disagg
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
componentType: worker componentType: worker
subComponentType: prefill subComponentType: prefill
......
...@@ -9,7 +9,6 @@ spec: ...@@ -9,7 +9,6 @@ spec:
backendFramework: vllm backendFramework: vllm
services: services:
Frontend: Frontend:
dynamoNamespace: agg-llava
componentType: frontend componentType: frontend
replicas: 1 replicas: 1
extraPodSpec: extraPodSpec:
...@@ -17,7 +16,6 @@ spec: ...@@ -17,7 +16,6 @@ spec:
image: my-registry/vllm-runtime:my-tag image: my-registry/vllm-runtime:my-tag
EncodeWorker: EncodeWorker:
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
dynamoNamespace: agg-llava
componentType: worker componentType: worker
replicas: 1 replicas: 1
resources: resources:
...@@ -34,7 +32,6 @@ spec: ...@@ -34,7 +32,6 @@ spec:
- python3 components/encode_worker.py --model llava-hf/llava-1.5-7b-hf - python3 components/encode_worker.py --model llava-hf/llava-1.5-7b-hf
VLMWorker: VLMWorker:
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
dynamoNamespace: agg-llava
componentType: worker componentType: worker
replicas: 1 replicas: 1
resources: resources:
...@@ -51,7 +48,6 @@ spec: ...@@ -51,7 +48,6 @@ spec:
- python3 components/worker.py --model llava-hf/llava-1.5-7b-hf --worker-type prefill - python3 components/worker.py --model llava-hf/llava-1.5-7b-hf --worker-type prefill
Processor: Processor:
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
dynamoNamespace: agg-llava
componentType: worker componentType: worker
replicas: 1 replicas: 1
resources: resources:
......
...@@ -9,7 +9,6 @@ spec: ...@@ -9,7 +9,6 @@ spec:
backendFramework: vllm backendFramework: vllm
services: services:
Frontend: Frontend:
dynamoNamespace: agg-qwen
componentType: frontend componentType: frontend
replicas: 1 replicas: 1
extraPodSpec: extraPodSpec:
...@@ -17,7 +16,6 @@ spec: ...@@ -17,7 +16,6 @@ spec:
image: my-registry/vllm-runtime:my-tag image: my-registry/vllm-runtime:my-tag
EncodeWorker: EncodeWorker:
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
dynamoNamespace: agg-qwen
componentType: worker componentType: worker
replicas: 1 replicas: 1
resources: resources:
...@@ -34,7 +32,6 @@ spec: ...@@ -34,7 +32,6 @@ spec:
- python3 components/encode_worker.py --model Qwen/Qwen2.5-VL-7B-Instruct - python3 components/encode_worker.py --model Qwen/Qwen2.5-VL-7B-Instruct
VLMWorker: VLMWorker:
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
dynamoNamespace: agg-qwen
componentType: worker componentType: worker
replicas: 1 replicas: 1
resources: resources:
...@@ -51,7 +48,6 @@ spec: ...@@ -51,7 +48,6 @@ spec:
- python3 components/worker.py --model Qwen/Qwen2.5-VL-7B-Instruct --worker-type prefill - python3 components/worker.py --model Qwen/Qwen2.5-VL-7B-Instruct --worker-type prefill
Processor: Processor:
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
dynamoNamespace: agg-qwen
componentType: worker componentType: worker
replicas: 1 replicas: 1
resources: resources:
......
...@@ -14,7 +14,6 @@ spec: ...@@ -14,7 +14,6 @@ spec:
create: false create: false
services: services:
Frontend: Frontend:
dynamoNamespace: sgl-dsr1-16gpu
componentType: frontend componentType: frontend
replicas: 1 replicas: 1
volumeMounts: volumeMounts:
...@@ -24,7 +23,6 @@ spec: ...@@ -24,7 +23,6 @@ spec:
mainContainer: mainContainer:
image: my-registry/sglang-runtime:my-tag image: my-registry/sglang-runtime:my-tag
decode: decode:
dynamoNamespace: sgl-dsr1-16gpu
componentType: worker componentType: worker
subComponentType: decode subComponentType: decode
replicas: 1 replicas: 1
...@@ -70,7 +68,6 @@ spec: ...@@ -70,7 +68,6 @@ spec:
- 0.0.0.0 - 0.0.0.0
- --prefill-round-robin-balance - --prefill-round-robin-balance
prefill: prefill:
dynamoNamespace: sgl-dsr1-16gpu
componentType: worker componentType: worker
subComponentType: prefill subComponentType: prefill
replicas: 1 replicas: 1
......
...@@ -14,7 +14,6 @@ spec: ...@@ -14,7 +14,6 @@ spec:
create: false create: false
services: services:
Frontend: Frontend:
dynamoNamespace: sgl-dsr1-8gpu
componentType: frontend componentType: frontend
replicas: 1 replicas: 1
volumeMounts: volumeMounts:
...@@ -24,7 +23,6 @@ spec: ...@@ -24,7 +23,6 @@ spec:
mainContainer: mainContainer:
image: my-registry/sglang-runtime:my-tag image: my-registry/sglang-runtime:my-tag
decode: decode:
dynamoNamespace: sgl-dsr1-8gpu
componentType: worker componentType: worker
subComponentType: decode subComponentType: decode
replicas: 1 replicas: 1
...@@ -67,7 +65,6 @@ spec: ...@@ -67,7 +65,6 @@ spec:
- 0.0.0.0 - 0.0.0.0
- --prefill-round-robin-balance - --prefill-round-robin-balance
prefill: prefill:
dynamoNamespace: sgl-dsr1-8gpu
componentType: worker componentType: worker
subComponentType: prefill subComponentType: prefill
replicas: 1 replicas: 1
......
...@@ -122,7 +122,6 @@ spec: ...@@ -122,7 +122,6 @@ spec:
backendFramework: trtllm backendFramework: trtllm
services: services:
Frontend: Frontend:
dynamoNamespace: trtllm-disagg-multinode
componentType: frontend componentType: frontend
replicas: 1 replicas: 1
extraPodSpec: extraPodSpec:
...@@ -137,7 +136,6 @@ spec: ...@@ -137,7 +136,6 @@ spec:
- /bin/sh - /bin/sh
- -c - -c
prefill: prefill:
dynamoNamespace: trtllm-disagg-multinode
componentType: worker componentType: worker
subComponentType: prefill subComponentType: prefill
replicas: 1 replicas: 1
...@@ -195,7 +193,6 @@ spec: ...@@ -195,7 +193,6 @@ spec:
configMap: configMap:
name: prefill-config name: prefill-config
decode: decode:
dynamoNamespace: trtllm-disagg-multinode
componentType: worker componentType: worker
subComponentType: decode subComponentType: decode
replicas: 1 replicas: 1
......
...@@ -12,7 +12,6 @@ spec: ...@@ -12,7 +12,6 @@ spec:
create: false create: false
services: services:
Frontend: Frontend:
dynamoNamespace: vllm-dsr1
componentType: frontend componentType: frontend
replicas: 1 replicas: 1
extraPodSpec: extraPodSpec:
...@@ -26,7 +25,6 @@ spec: ...@@ -26,7 +25,6 @@ spec:
failureThreshold: 60 failureThreshold: 60
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
decode: decode:
dynamoNamespace: vllm-dsr1
componentType: worker componentType: worker
subComponentType: decode subComponentType: decode
replicas: 1 replicas: 1
...@@ -92,7 +90,6 @@ spec: ...@@ -92,7 +90,6 @@ spec:
--max-num-seqs 512 \ --max-num-seqs 512 \
--compilation_config '{"pass_config":{"enable_fusion":true,"enable_attn_fusion":true,"enable_noop":true},"custom_ops":["+rms_norm"],"cudagraph_mode":"FULL_DECODE_ONLY"}' --compilation_config '{"pass_config":{"enable_fusion":true,"enable_attn_fusion":true,"enable_noop":true},"custom_ops":["+rms_norm"],"cudagraph_mode":"FULL_DECODE_ONLY"}'
prefill: prefill:
dynamoNamespace: vllm-dsr1
componentType: worker componentType: worker
subComponentType: prefill subComponentType: prefill
replicas: 1 replicas: 1
......
...@@ -28,7 +28,6 @@ spec: ...@@ -28,7 +28,6 @@ spec:
services: services:
Frontend: Frontend:
componentType: frontend componentType: frontend
dynamoNamespace: gpt-oss-agg
extraPodSpec: extraPodSpec:
affinity: affinity:
podAntiAffinity: podAntiAffinity:
...@@ -50,7 +49,6 @@ spec: ...@@ -50,7 +49,6 @@ spec:
replicas: 1 replicas: 1
TrtllmWorker: TrtllmWorker:
componentType: main componentType: main
dynamoNamespace: gpt-oss-agg
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
volumeMounts: volumeMounts:
- name: model-cache - name: model-cache
......
...@@ -12,7 +12,6 @@ spec: ...@@ -12,7 +12,6 @@ spec:
services: services:
Frontend: Frontend:
componentType: frontend componentType: frontend
dynamoNamespace: llama3-70b-agg
volumeMounts: volumeMounts:
- name: model-cache - name: model-cache
mountPoint: /opt/models mountPoint: /opt/models
...@@ -26,7 +25,6 @@ spec: ...@@ -26,7 +25,6 @@ spec:
replicas: 1 replicas: 1
VllmPrefillWorker: VllmPrefillWorker:
componentType: worker componentType: worker
dynamoNamespace: llama3-70b-agg
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
volumeMounts: volumeMounts:
- name: model-cache - name: model-cache
......
...@@ -12,7 +12,6 @@ spec: ...@@ -12,7 +12,6 @@ spec:
services: services:
Frontend: Frontend:
componentType: frontend componentType: frontend
dynamoNamespace: llama3-70b-disagg-mn
volumeMounts: volumeMounts:
- name: model-cache - name: model-cache
mountPoint: /opt/models mountPoint: /opt/models
...@@ -27,7 +26,6 @@ spec: ...@@ -27,7 +26,6 @@ spec:
VllmPrefillWorker: VllmPrefillWorker:
componentType: worker componentType: worker
subComponentType: prefill subComponentType: prefill
dynamoNamespace: llama3-70b-disagg-mn
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
volumeMounts: volumeMounts:
- name: model-cache - name: model-cache
...@@ -59,7 +57,6 @@ spec: ...@@ -59,7 +57,6 @@ spec:
VllmDecodeWorker: VllmDecodeWorker:
componentType: worker componentType: worker
subComponentType: decode subComponentType: decode
dynamoNamespace: llama3-70b-disagg-mn
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
volumeMounts: volumeMounts:
- name: model-cache - name: model-cache
......
...@@ -12,7 +12,6 @@ spec: ...@@ -12,7 +12,6 @@ spec:
services: services:
Frontend: Frontend:
componentType: frontend componentType: frontend
dynamoNamespace: llama3-70b-disagg-sn
volumeMounts: volumeMounts:
- name: model-cache - name: model-cache
mountPoint: /opt/models mountPoint: /opt/models
...@@ -27,7 +26,6 @@ spec: ...@@ -27,7 +26,6 @@ spec:
VllmPrefillWorker: VllmPrefillWorker:
componentType: worker componentType: worker
subComponentType: prefill subComponentType: prefill
dynamoNamespace: llama3-70b-disagg-sn
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
volumeMounts: volumeMounts:
- name: model-cache - name: model-cache
...@@ -71,7 +69,6 @@ spec: ...@@ -71,7 +69,6 @@ spec:
VllmDecodeWorker: VllmDecodeWorker:
componentType: worker componentType: worker
subComponentType: decode subComponentType: decode
dynamoNamespace: llama3-70b-disagg-sn
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
volumeMounts: volumeMounts:
- name: model-cache - name: model-cache
......
...@@ -41,7 +41,6 @@ spec: ...@@ -41,7 +41,6 @@ spec:
services: services:
Frontend: Frontend:
componentType: frontend componentType: frontend
dynamoNamespace: qwen3-235b-a22b-agg
replicas: 1 replicas: 1
extraPodSpec: extraPodSpec:
affinity: affinity:
...@@ -63,7 +62,6 @@ spec: ...@@ -63,7 +62,6 @@ spec:
- -c - -c
TrtllmWorker: TrtllmWorker:
componentType: main componentType: main
dynamoNamespace: qwen3-235b-a22b-agg
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
sharedMemory: sharedMemory:
size: 256Gi size: 256Gi
......
...@@ -70,7 +70,6 @@ spec: ...@@ -70,7 +70,6 @@ spec:
services: services:
Frontend: Frontend:
componentType: frontend componentType: frontend
dynamoNamespace: qwen3-235b-a22b-disagg
replicas: 1 replicas: 1
extraPodSpec: extraPodSpec:
affinity: affinity:
...@@ -93,7 +92,6 @@ spec: ...@@ -93,7 +92,6 @@ spec:
TRTLLMPrefillWorker: TRTLLMPrefillWorker:
componentType: worker componentType: worker
subComponentType: prefill subComponentType: prefill
dynamoNamespace: qwen3-235b-a22b-disagg
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
replicas: 6 replicas: 6
resources: resources:
...@@ -145,7 +143,6 @@ spec: ...@@ -145,7 +143,6 @@ spec:
TRTLLMDecodeWorker: TRTLLMDecodeWorker:
componentType: worker componentType: worker
subComponentType: decode subComponentType: decode
dynamoNamespace: qwen3-235b-a22b-disagg
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
replicas: 1 replicas: 1
resources: resources:
......
...@@ -44,7 +44,6 @@ spec: ...@@ -44,7 +44,6 @@ spec:
services: services:
Frontend: Frontend:
componentType: frontend componentType: frontend
dynamoNamespace: qwen3-32b-fp8-agg
extraPodSpec: extraPodSpec:
affinity: affinity:
podAntiAffinity: podAntiAffinity:
...@@ -66,7 +65,6 @@ spec: ...@@ -66,7 +65,6 @@ spec:
replicas: 1 replicas: 1
TrtllmWorker: TrtllmWorker:
componentType: main componentType: main
dynamoNamespace: qwen3-32b-fp8-agg
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
volumeMounts: volumeMounts:
- name: model-cache - name: model-cache
......
...@@ -201,7 +201,6 @@ spec: ...@@ -201,7 +201,6 @@ spec:
services: services:
Frontend: Frontend:
componentType: frontend componentType: frontend
dynamoNamespace: qwen3-32b-fp8-disagg
extraPodSpec: extraPodSpec:
affinity: affinity:
podAntiAffinity: podAntiAffinity:
...@@ -224,7 +223,6 @@ spec: ...@@ -224,7 +223,6 @@ spec:
TrtllmPrefillWorker: TrtllmPrefillWorker:
componentType: worker componentType: worker
subComponentType: prefill subComponentType: prefill
dynamoNamespace: qwen3-32b-fp8-disagg
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
volumeMounts: volumeMounts:
- name: model-cache - name: model-cache
...@@ -285,7 +283,6 @@ spec: ...@@ -285,7 +283,6 @@ spec:
TrtllmDecodeWorker: TrtllmDecodeWorker:
componentType: worker componentType: worker
subComponentType: decode subComponentType: decode
dynamoNamespace: qwen3-32b-fp8-disagg
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
volumeMounts: volumeMounts:
- name: model-cache - name: model-cache
......
...@@ -13,7 +13,6 @@ spec: ...@@ -13,7 +13,6 @@ spec:
services: services:
Frontend: Frontend:
componentType: frontend componentType: frontend
dynamoNamespace: agg-8xtp2
envs: envs:
- name: HF_HOME - name: HF_HOME
value: /home/dynamo/.cache/huggingface value: /home/dynamo/.cache/huggingface
...@@ -36,7 +35,6 @@ spec: ...@@ -36,7 +35,6 @@ spec:
subComponentType: null subComponentType: null
VllmDecodeWorker: VllmDecodeWorker:
componentType: worker componentType: worker
dynamoNamespace: agg-8xtp2
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
volumeMounts: volumeMounts:
- name: model-cache - name: model-cache
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment