Commit e94e60d6 authored by Lianmin Zheng's avatar Lianmin Zheng
Browse files

make flashinfer workspace larger

parent d2f8bfb2
...@@ -360,7 +360,7 @@ class ModelRunner: ...@@ -360,7 +360,7 @@ class ModelRunner:
use_tensor_cores = False use_tensor_cores = False
workspace_buffer = torch.empty( workspace_buffer = torch.empty(
32 * 1024 * 1024, dtype=torch.int8, device="cuda" 128 * 1024 * 1024, dtype=torch.int8, device="cuda"
) )
self.flashinfer_prefill_wrapper = BatchPrefillWithPagedKVCacheWrapper( self.flashinfer_prefill_wrapper = BatchPrefillWithPagedKVCacheWrapper(
workspace_buffer, "NHD" workspace_buffer, "NHD"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment