Unverified Commit 3008db9c authored by fzyzcjy's avatar fzyzcjy Committed by GitHub
Browse files

[PD] Allow customizing reserved tokens to avoid KV cache waste (#6002)

parent 357fb2db
...@@ -97,7 +97,9 @@ class DecodePreallocQueue: ...@@ -97,7 +97,9 @@ class DecodePreallocQueue:
self.tp_size = tp_size self.tp_size = tp_size
self.bootstrap_port = bootstrap_port self.bootstrap_port = bootstrap_port
self.num_reserved_decode_tokens = 512 self.num_reserved_decode_tokens = int(
os.environ.get("SGLANG_NUM_RESERVED_DECODE_TOKENS", "512")
)
# Queue for requests pending pre-allocation # Queue for requests pending pre-allocation
self.queue: List[DecodeRequest] = [] self.queue: List[DecodeRequest] = []
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment