Fix lora bench (#6302)

cd8d4b9d · Qiaolin Yu · GitHub · f194e14f · cd8d4b9d · cd8d4b9d
Unverified Commit cd8d4b9d authored May 15, 2025 by Qiaolin Yu Committed by GitHub May 15, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 3 additions and 3 deletions

benchmark/lora/lora_bench.py benchmark/lora/lora_bench.py +2 -0

python/sglang/srt/lora/lora_manager.py python/sglang/srt/lora/lora_manager.py +1 -3

No files found.
--- a/benchmark/lora/lora_bench.py
+++ b/benchmark/lora/lora_bench.py
@@ -170,6 +170,7 @@ async def benchmark(
        prompt_len=test_prompt_len,
        output_len=test_output_len,
        lora_name="dummy",  # the lora_name argument will not be used
+        image_data=None,
        extra_request_body=extra_request_body,
    )
    test_output = await request_func(request_func_input=test_input)
@@ -194,6 +195,7 @@ async def benchmark(
            prompt_len=prompt_len,
            output_len=output_len,
            lora_name="dummy",
+            image_data=None,
            extra_request_body=extra_request_body,
        )
        tasks.append(

--- a/python/sglang/srt/lora/lora_manager.py
+++ b/python/sglang/srt/lora/lora_manager.py
@@ -170,9 +170,7 @@ class LoRAManager:
                dim=0,
                out=self.cuda_graph_batch_info.seg_indptr[1 : bs + 1],
            )
-            self.cuda_graph_batch_info.max_len = int(
+            self.cuda_graph_batch_info.max_len = 1
-                torch.max(self.cuda_graph_batch_info.seg_lens[:bs])
-            )
            for i, lora_path in enumerate(forward_batch.lora_paths):
                self.cuda_graph_batch_info.weight_indices[i] = (