make flashinfer workspace larger

e94e60d6 · Lianmin Zheng · d2f8bfb2 · e94e60d6
Commit e94e60d6 authored Jun 21, 2024 by Lianmin Zheng
Hide whitespace changes
Inline Side-by-side

Showing with 1 addition and 1 deletion

python/sglang/srt/managers/controller/model_runner.py python/sglang/srt/managers/controller/model_runner.py +1 -1

No files found.
--- a/python/sglang/srt/managers/controller/model_runner.py
+++ b/python/sglang/srt/managers/controller/model_runner.py
@@ -360,7 +360,7 @@ class ModelRunner:
                use_tensor_cores = False

            workspace_buffer = torch.empty(
-                32 * 1024 * 1024, dtype=torch.int8, device="cuda"
+                128 * 1024 * 1024, dtype=torch.int8, device="cuda"
            )
            self.flashinfer_prefill_wrapper = BatchPrefillWithPagedKVCacheWrapper(
                workspace_buffer, "NHD"