Support LoRA in bench_serving oai interface (#11318)

92473e2e · Lifu Huang · GitHub · 6c0bb327 · 92473e2e
Unverified Commit 92473e2e authored Oct 08, 2025 by Lifu Huang Committed by GitHub Oct 08, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 11 additions and 0 deletions

python/sglang/bench_serving.py python/sglang/bench_serving.py +11 -0

No files found.
--- a/python/sglang/bench_serving.py
+++ b/python/sglang/bench_serving.py
@@ -209,6 +209,11 @@ async def async_request_openai_completions(
            **request_func_input.extra_request_body,
        }

+        # hack to accommodate different LoRA conventions between SGLang and vLLM.
+        if request_func_input.lora_name:
+            payload["model"] = request_func_input.lora_name
+            payload["lora_path"] = request_func_input.lora_name
+
        if request_func_input.image_data:
            payload.update({"image_data": request_func_input.image_data})

@@ -326,6 +331,12 @@ async def async_request_openai_chat_completions(
            "stream": not args.disable_stream,
            **request_func_input.extra_request_body,
        }
+
+        # hack to accommodate different LoRA conventions between SGLang and vLLM.
+        if request_func_input.lora_name:
+            payload["model"] = request_func_input.lora_name
+            payload["lora_path"] = request_func_input.lora_name
+
        headers = get_auth_headers()

        output = RequestFuncOutput.init_new(request_func_input)