"tests/vscode:/vscode.git/clone" did not exist on "1282bd812ea4e1511378bad5b918d609280d2b89"
Unverified Commit 4741239d authored by Wentao Ye's avatar Wentao Ye Committed by GitHub
Browse files

[Bug] Fix Long Context OOM Issue (#25290)


Signed-off-by: default avataryewentao256 <zhyanwentao@126.com>
parent c625f904
...@@ -481,7 +481,7 @@ class MLACommonMetadataBuilder(AttentionMetadataBuilder[M]): ...@@ -481,7 +481,7 @@ class MLACommonMetadataBuilder(AttentionMetadataBuilder[M]):
# which would result in up-projected context being # which would result in up-projected context being
# 2*(192*128)*(64*1024) = 3gb # 2*(192*128)*(64*1024) = 3gb
# (assuming 192 QK head dim, 128 heads, and fp16) # (assuming 192 QK head dim, 128 heads, and fp16)
128 * 1024) 64 * 1024)
assert self.chunked_prefill_workspace_size >= \ assert self.chunked_prefill_workspace_size >= \
scheduler_config.max_num_seqs * cache_config.block_size scheduler_config.max_num_seqs * cache_config.block_size
if self.dcp_world_size > 1: if self.dcp_world_size > 1:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment