Unverified Commit 74d3db65 authored by Hongkuan Zhou's avatar Hongkuan Zhou Committed by GitHub
Browse files

fix: remove bash wrapper for vllm dsr1 recipe (#6035)


Co-authored-by: default avatarClaude Opus 4.6 <noreply@anthropic.com>
parent 3a418254
...@@ -68,26 +68,35 @@ spec: ...@@ -68,26 +68,35 @@ spec:
- name: GLOO_SOCKET_IFNAME - name: GLOO_SOCKET_IFNAME
value: eth0 value: eth0
command: command:
- /bin/bash - python3
- -c - -m
- dynamo.vllm
args: args:
- | - --model
exec python3 -m dynamo.vllm \ - /model-cache/deepseek-r1
--model /model-cache/deepseek-r1 \ - --served-model-name
--served-model-name deepseek-ai/DeepSeek-R1 \ - deepseek-ai/DeepSeek-R1
--all2all-backend deepep_low_latency \ - --all2all-backend
--data-parallel-hybrid-lb \ - deepep_low_latency
--tensor-parallel-size 1 \ - --data-parallel-hybrid-lb
--data-parallel-size 16 \ - --tensor-parallel-size
--enable-expert-parallel \ - "1"
--max-model-len 16384 \ - --data-parallel-size
--enable-dbo \ - "16"
--dbo-decode-token-threshold 32 \ - --enable-expert-parallel
--async-scheduling \ - --max-model-len
--enable-eplb \ - "16384"
--eplb-config '{"window_size":"1000","step_interval":"3000","num_redundant_experts":"32","log_balancedness":"False"}' \ - --enable-dbo
--max-num-seqs 512 \ - --dbo-decode-token-threshold
--compilation_config '{"pass_config":{"fuse_norm_quant":true,"eliminate_noops":true},"cudagraph_mode":"FULL_DECODE_ONLY"}' - "32"
- --async-scheduling
- --enable-eplb
- --eplb-config
- '{"window_size":"1000","step_interval":"3000","num_redundant_experts":"32","log_balancedness":"False"}'
- --max-num-seqs
- "512"
- --compilation_config
- '{"pass_config":{"fuse_norm_quant":true,"eliminate_noops":true},"cudagraph_mode":"FULL_DECODE_ONLY"}'
prefill: prefill:
componentType: worker componentType: worker
subComponentType: prefill subComponentType: prefill
...@@ -127,23 +136,31 @@ spec: ...@@ -127,23 +136,31 @@ spec:
- name: GLOO_SOCKET_IFNAME - name: GLOO_SOCKET_IFNAME
value: eth0 value: eth0
command: command:
- /bin/bash - python3
- -c - -m
- dynamo.vllm
args: args:
- | - --model
exec python3 -m dynamo.vllm \ - /model-cache/deepseek-r1
--model /model-cache/deepseek-r1 \ - --is-prefill-worker
--is-prefill-worker \ - --served-model-name
--served-model-name deepseek-ai/DeepSeek-R1 \ - deepseek-ai/DeepSeek-R1
--all2all-backend deepep_high_throughput \ - --all2all-backend
--data-parallel-hybrid-lb \ - deepep_high_throughput
--tensor-parallel-size 1 \ - --data-parallel-hybrid-lb
--data-parallel-size 16 \ - --tensor-parallel-size
--enable-expert-parallel \ - "1"
--max-model-len 16384 \ - --data-parallel-size
--enable-dbo \ - "16"
--dbo-decode-token-threshold 32 \ - --enable-expert-parallel
--async-scheduling \ - --max-model-len
--enable-eplb \ - "16384"
--eplb-config '{"window_size":"1000","step_interval":"3000","num_redundant_experts":"32","log_balancedness":"False"}' \ - --enable-dbo
--max-num-seqs 512 - --dbo-decode-token-threshold
- "32"
- --async-scheduling
- --enable-eplb
- --eplb-config
- '{"window_size":"1000","step_interval":"3000","num_redundant_experts":"32","log_balancedness":"False"}'
- --max-num-seqs
- "512"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment