Unverified Commit b43a131c authored by Yunzhou Liu's avatar Yunzhou Liu Committed by GitHub
Browse files

docs: update Qwen3-235B-A22B-FP8 recipes (#5254)


Signed-off-by: default avatarElnifio <elnifio0519@gmail.com>
parent 84b5e9b5
......@@ -13,10 +13,6 @@ data:
moe_tensor_parallel_size: 1
enable_attention_dp: false
enable_chunked_prefill: true
build_config:
max_batch_size: 128
max_num_tokens: 8192
max_seq_len: 8192
kv_cache_config:
enable_block_reuse: true
free_gpu_memory_fraction: 0.8
......@@ -91,6 +87,9 @@ spec:
python3 -m dynamo.trtllm \
--model-path "${MODEL_PATH}" \
--served-model-name "Qwen/Qwen3-235B-A22B-FP8" \
--max-batch-size 128 \
--max-num-tokens 8192 \
--max-seq-len 8192 \
--extra-engine-args "${ENGINE_ARGS}"
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.0
workingDir: /workspace/components/backends/trtllm
......
......@@ -13,10 +13,6 @@ data:
moe_expert_parallel_size: 1
enable_attention_dp: false
enable_chunked_prefill: false
build_config:
max_batch_size: 2
max_num_tokens: 8192
max_seq_len: 8192
kv_cache_config:
enable_block_reuse: true
free_gpu_memory_fraction: 0.7
......@@ -42,10 +38,6 @@ data:
moe_tensor_parallel_size: 1
enable_attention_dp: false
enable_chunked_prefill: false
build_config:
max_batch_size: 512
max_num_tokens: 1024
max_seq_len: 8192
kv_cache_config:
enable_block_reuse: false
free_gpu_memory_fraction: 0.95
......@@ -127,9 +119,11 @@ spec:
python3 -m dynamo.trtllm \
--model-path "${MODEL_PATH}" \
--served-model-name "Qwen/Qwen3-235B-A22B-FP8" \
--max-batch-size 2 \
--max-num-tokens 8192 \
--max-seq-len 8192 \
--extra-engine-args "${ENGINE_ARGS}" \
--disaggregation-mode prefill \
--disaggregation-strategy prefill_first
--disaggregation-mode prefill
volumeMounts:
- name: prefill-config
mountPath: /engine_configs
......@@ -180,9 +174,11 @@ spec:
python3 -m dynamo.trtllm \
--model-path "${MODEL_PATH}" \
--served-model-name "Qwen/Qwen3-235B-A22B-FP8" \
--max-batch-size 512 \
--max-num-tokens 1024 \
--max-seq-len 8192 \
--extra-engine-args "${ENGINE_ARGS}" \
--disaggregation-mode decode \
--disaggregation-strategy prefill_first
--disaggregation-mode decode
volumeMounts:
- name: decode-config
mountPath: /engine_configs
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment