Unverified Commit a98406d4 authored by Rohan Varma's avatar Rohan Varma Committed by GitHub
Browse files

fix: Update TRT-LLM Wide-EP Disagg GB200 Recipe to be compatible with TRT-LLM Version (#5383)

parent 9ca2923d
...@@ -20,10 +20,9 @@ metadata: ...@@ -20,10 +20,9 @@ metadata:
name: prefill-config name: prefill-config
data: data:
prefill_config.yaml: | prefill_config.yaml: |
build_config: max_batch_size: 4
max_batch_size: 4 max_num_tokens: 4608
max_num_tokens: 4608 max_seq_len: 1227
max_seq_len: 1227
tensor_parallel_size: 4 tensor_parallel_size: 4
moe_expert_parallel_size: 4 moe_expert_parallel_size: 4
enable_attention_dp: true enable_attention_dp: true
...@@ -52,10 +51,9 @@ data: ...@@ -52,10 +51,9 @@ data:
moe_expert_parallel_size: 32 moe_expert_parallel_size: 32
enable_attention_dp: true enable_attention_dp: true
pipeline_parallel_size: 1 pipeline_parallel_size: 1
build_config: max_batch_size: 32
max_batch_size: 32 max_num_tokens: 32
max_num_tokens: 32 max_seq_len: 2251
max_seq_len: 2251
cuda_graph_config: cuda_graph_config:
enable_padding: true enable_padding: true
batch_sizes: batch_sizes:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment