Unverified Commit b7224eb5 authored by jthomson04's avatar jthomson04 Committed by GitHub
Browse files

fix: Fix Qwen3-8b-fp8 recipe (#7010)


Signed-off-by: default avatarjthomson04 <jwillthomson19@gmail.com>
parent 734c0b8b
......@@ -12,8 +12,8 @@ data:
enable_attention_dp: false
enable_chunked_prefill: false
max_batch_size: 128
max_num_tokens: 7800
max_seq_len: 7800
max_num_tokens: 7808
max_seq_len: 7808
kv_cache_config:
enable_block_reuse: false
free_gpu_memory_fraction: 0.7
......@@ -166,8 +166,8 @@ data:
enable_attention_dp: false
enable_chunked_prefill: false
max_batch_size: 1
max_num_tokens: 7800
max_seq_len: 7800
max_num_tokens: 7808
max_seq_len: 7808
kv_cache_config:
enable_block_reuse: false
free_gpu_memory_fraction: 0.7
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment