"ssh:/git@developer.sourcefind.cn:2222/OpenDAS/dynamo.git" did not exist on "eeb94dab979bda01dae074b8c804411086f17968"
Unverified Commit f4e20810 authored by Indrajit Bhosale's avatar Indrajit Bhosale Committed by GitHub
Browse files

fix: Update free_gpu_memory_fraction for llama4 Maverick to avoid OOM in multinode setup (#7090)

parent 48eb52e7
...@@ -15,14 +15,14 @@ ...@@ -15,14 +15,14 @@
tensor_parallel_size: 8 tensor_parallel_size: 8
moe_expert_parallel_size: 1 moe_expert_parallel_size: 1
enable_attention_dp: false enable_attention_dp: false
max_num_tokens: 8192 max_num_tokens: 4096
max_batch_size: 16 max_batch_size: 8
trust_remote_code: true trust_remote_code: true
backend: pytorch backend: pytorch
enable_chunked_prefill: true enable_chunked_prefill: true
disable_overlap_scheduler: false disable_overlap_scheduler: false
kv_cache_config: kv_cache_config:
free_gpu_memory_fraction: 0.30 free_gpu_memory_fraction: 0.20
enable_block_reuse: false enable_block_reuse: false
cache_transceiver_config: cache_transceiver_config:
......
...@@ -15,8 +15,8 @@ ...@@ -15,8 +15,8 @@
tensor_parallel_size: 8 tensor_parallel_size: 8
moe_expert_parallel_size: 1 moe_expert_parallel_size: 1
enable_attention_dp: false enable_attention_dp: false
max_num_tokens: 8192 max_num_tokens: 4096
max_batch_size: 16 max_batch_size: 8
trust_remote_code: true trust_remote_code: true
backend: pytorch backend: pytorch
enable_chunked_prefill: true enable_chunked_prefill: true
...@@ -24,7 +24,7 @@ enable_chunked_prefill: true ...@@ -24,7 +24,7 @@ enable_chunked_prefill: true
disable_overlap_scheduler: true disable_overlap_scheduler: true
kv_cache_config: kv_cache_config:
free_gpu_memory_fraction: 0.30 free_gpu_memory_fraction: 0.20
enable_block_reuse: false enable_block_reuse: false
cache_transceiver_config: cache_transceiver_config:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment