Fix typo in cuda_graph_bs (#2813)

679c3bca · Lianmin Zheng · GitHub · 656aed58 · 679c3bca
Unverified Commit 679c3bca authored Jan 09, 2025 by Lianmin Zheng Committed by GitHub Jan 09, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 0 additions and 5 deletions

python/sglang/srt/model_executor/cuda_graph_runner.py python/sglang/srt/model_executor/cuda_graph_runner.py +0 -5

No files found.
--- a/python/sglang/srt/model_executor/cuda_graph_runner.py
+++ b/python/sglang/srt/model_executor/cuda_graph_runner.py
@@ -131,11 +131,6 @@ class CudaGraphRunner:
            else:
                self.capture_bs = [1, 2, 4] + [i * 8 for i in range(1, 21)]
-        if model_runner.server_args.disable_cuda_graph_padding:
-            self.capture_bs = list(range(1, 33)) + [64, 128]
-        else:
-            self.capture_bs = [1, 2, 4] + [i * 8 for i in range(1, 21)]
        if max(self.capture_bs) > model_runner.req_to_token_pool.size:
            # In some case (e.g., with a small GPU or --max-running-requests), the #max-running-requests
            # is very samll. We add more values here to make sure we capture the maximum bs.