Commit e94e60d6 authored by Lianmin Zheng's avatar Lianmin Zheng
Browse files

make flashinfer workspace larger

parent d2f8bfb2
......@@ -360,7 +360,7 @@ class ModelRunner:
use_tensor_cores = False
workspace_buffer = torch.empty(
32 * 1024 * 1024, dtype=torch.int8, device="cuda"
128 * 1024 * 1024, dtype=torch.int8, device="cuda"
)
self.flashinfer_prefill_wrapper = BatchPrefillWithPagedKVCacheWrapper(
workspace_buffer, "NHD"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment