"docs/basic_usage/native_api.ipynb" did not exist on "908dd7f9aae52a9c961c836d99e46ba6681fee42"
Unverified Commit 535c8386 authored by JieXin Liang's avatar JieXin Liang Committed by GitHub
Browse files

[fix] more mem for draft_extend cuda_graph (#6726)

parent 2163586e
......@@ -271,6 +271,9 @@ class ServerArgs:
mem_fraction + 48 * 1024 * (1 - mem_fraction) / gpu_mem,
(gpu_mem - reserve_mem) / gpu_mem,
)
else:
if self.speculative_algorithm is not None:
self.mem_fraction_static *= 0.95
# Set chunked prefill size, which depends on the gpu memory capacity
if self.chunked_prefill_size is None:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment