Unverified Commit 535c8386 authored by JieXin Liang's avatar JieXin Liang Committed by GitHub
Browse files

[fix] more mem for draft_extend cuda_graph (#6726)

parent 2163586e
...@@ -271,6 +271,9 @@ class ServerArgs: ...@@ -271,6 +271,9 @@ class ServerArgs:
mem_fraction + 48 * 1024 * (1 - mem_fraction) / gpu_mem, mem_fraction + 48 * 1024 * (1 - mem_fraction) / gpu_mem,
(gpu_mem - reserve_mem) / gpu_mem, (gpu_mem - reserve_mem) / gpu_mem,
) )
else:
if self.speculative_algorithm is not None:
self.mem_fraction_static *= 0.95
# Set chunked prefill size, which depends on the gpu memory capacity # Set chunked prefill size, which depends on the gpu memory capacity
if self.chunked_prefill_size is None: if self.chunked_prefill_size is None:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment