Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
3e4c7da2
Unverified
Commit
3e4c7da2
authored
Oct 17, 2025
by
Mick
Committed by
GitHub
Oct 17, 2025
Browse files
ci: reduce and refactor vlm ut and combine test files (#11062)
parent
d88ac9bc
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
81 additions
and
551 deletions
+81
-551
python/sglang/bench_one_batch_server.py
python/sglang/bench_one_batch_server.py
+2
-4
python/sglang/srt/models/mllama.py
python/sglang/srt/models/mllama.py
+1
-1
test/srt/run_suite.py
test/srt/run_suite.py
+1
-2
test/srt/test_vision_openai_server_a.py
test/srt/test_vision_openai_server_a.py
+57
-263
test/srt/test_vision_openai_server_b.py
test/srt/test_vision_openai_server_b.py
+0
-275
test/srt/test_vision_openai_server_common.py
test/srt/test_vision_openai_server_common.py
+20
-6
No files found.
python/sglang/bench_one_batch_server.py
View file @
3e4c7da2
...
@@ -137,12 +137,10 @@ def generate_markdown_report(trace_dir, results: List["BenchmarkResult"]) -> str
...
@@ -137,12 +137,10 @@ def generate_markdown_report(trace_dir, results: List["BenchmarkResult"]) -> str
# all results should share the same isl & osl
# all results should share the same isl & osl
for
result
in
results
:
for
result
in
results
:
base_url
=
os
.
getenv
(
base_url
=
os
.
getenv
(
"TRACE_BASE_URL"
,
""
).
rstrip
(
"/"
)
"TRACE_BASE_URL"
,
"https://github.com/sgl-project/ci-data/traces"
).
rstrip
(
"/"
)
relay_base
=
os
.
getenv
(
relay_base
=
os
.
getenv
(
"PERFETTO_RELAY_URL"
,
"PERFETTO_RELAY_URL"
,
"
https://docs.sglang.ai/ci-data/pages/perfetto_relay.html
"
,
""
,
).
rstrip
(
"/"
)
).
rstrip
(
"/"
)
summary
+=
result
.
to_markdown_row
(
trace_dir
,
base_url
,
relay_base
)
summary
+=
result
.
to_markdown_row
(
trace_dir
,
base_url
,
relay_base
)
...
...
python/sglang/srt/models/mllama.py
View file @
3e4c7da2
...
@@ -901,7 +901,7 @@ class MllamaForConditionalGeneration(nn.Module):
...
@@ -901,7 +901,7 @@ class MllamaForConditionalGeneration(nn.Module):
img
=
pixel_values
[
0
,
j
]
img
=
pixel_values
[
0
,
j
]
num_tiles
=
img
.
shape
[
0
]
num_tiles
=
img
.
shape
[
0
]
batched_images
[
i
,
j
,
:
num_tiles
]
=
img
batched_images
[
i
,
j
,
:
num_tiles
]
=
img
batched_ar_ids
[
i
,
j
]
=
mm_input
.
mm_items
[
0
].
aspect_ratio_id
[
0
,
j
]
batched_ar_ids
[
i
,
j
]
=
mm_input
.
mm_items
[
0
].
aspect_ratio_id
s
[
0
,
j
]
batched_ar_mask
[
i
,
j
,
:
num_tiles
]
=
mm_input
.
mm_items
[
batched_ar_mask
[
i
,
j
,
:
num_tiles
]
=
mm_input
.
mm_items
[
0
0
...
...
test/srt/run_suite.py
View file @
3e4c7da2
...
@@ -131,8 +131,7 @@ suites = {
...
@@ -131,8 +131,7 @@ suites = {
TestFile
(
"test_triton_sliding_window.py"
,
250
),
TestFile
(
"test_triton_sliding_window.py"
,
250
),
TestFile
(
"test_utils_update_weights.py"
,
48
),
TestFile
(
"test_utils_update_weights.py"
,
48
),
TestFile
(
"test_vision_chunked_prefill.py"
,
175
),
TestFile
(
"test_vision_chunked_prefill.py"
,
175
),
TestFile
(
"test_vision_openai_server_a.py"
,
724
),
TestFile
(
"test_vision_openai_server_a.py"
,
608
),
TestFile
(
"test_vision_openai_server_b.py"
,
446
),
TestFile
(
"test_vlm_input_format.py"
,
300
),
TestFile
(
"test_vlm_input_format.py"
,
300
),
],
],
"per-commit-2-gpu"
:
[
"per-commit-2-gpu"
:
[
...
...
test/srt/test_vision_openai_server_a.py
View file @
3e4c7da2
...
@@ -8,91 +8,34 @@ import unittest
...
@@ -8,91 +8,34 @@ import unittest
from
test_vision_openai_server_common
import
*
from
test_vision_openai_server_common
import
*
from
sglang.test.test_utils
import
(
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
popen_launch_server
,
)
class
TestLlavaServer
(
ImageOpenAITestMixin
):
model
=
"lmms-lab/llava-onevision-qwen2-0.5b-ov"
class
TestLlava
(
ImageOpenAITestMixin
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"lmms-lab/llava-onevision-qwen2-0.5b-ov"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
api_key
=
cls
.
api_key
,
)
cls
.
base_url
+=
"/v1"
class
TestQwen25VLServer
(
ImageOpenAITestMixin
,
VideoOpenAITestMixin
):
class
TestQwen2VLServer
(
ImageOpenAITestMixin
,
VideoOpenAITestMixin
):
model
=
"Qwen/Qwen2.5-VL-7B-Instruct"
@
classmethod
extra_args
=
[
def
setUpClass
(
cls
):
"--cuda-graph-max-bs=4"
,
cls
.
model
=
"Qwen/Qwen2-VL-7B-Instruct"
]
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
api_key
=
cls
.
api_key
,
other_args
=
[
"--mem-fraction-static"
,
"0.35"
,
"--cuda-graph-max-bs"
,
"4"
,
],
)
cls
.
base_url
+=
"/v1"
class
TestQwen3VLServer
(
ImageOpenAITestMixin
,
VideoOpenAITestMixin
):
class
TestQwen3VLServer
(
ImageOpenAITestMixin
,
VideoOpenAITestMixin
):
@
classmethod
model
=
"Qwen/Qwen3-VL-30B-A3B-Instruct"
def
setUpClass
(
cls
):
extra_args
=
[
"--cuda-graph-max-bs=4"
]
cls
.
model
=
"Qwen/Qwen3-VL-30B-A3B-Instruct"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
api_key
=
cls
.
api_key
,
other_args
=
[
"--mem-fraction-static"
,
"0.80"
,
"--cuda-graph-max-bs"
,
"4"
,
],
)
cls
.
base_url
+=
"/v1"
class
TestQwen2_5_VLServer
(
ImageOpenAITestMixin
,
VideoOpenAITestMixin
):
class
TestQwen3OmniServer
(
OmniOpenAITestMixin
):
@
classmethod
model
=
"Qwen/Qwen3-Omni-30B-A3B-Instruct"
def
setUpClass
(
cls
):
extra_args
=
[
# workaround to fit into H100
cls
.
model
=
"Qwen/Qwen2.5-VL-7B-Instruct"
"--mem-fraction-static=0.90"
,
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
"--disable-cuda-graph"
,
cls
.
api_key
=
"sk-123456"
"--disable-fast-image-processor"
,
cls
.
process
=
popen_launch_server
(
"--grammar-backend=none"
,
cls
.
model
,
]
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
api_key
=
cls
.
api_key
,
other_args
=
[
"--mem-fraction-static"
,
"0.35"
,
"--cuda-graph-max-bs"
,
"4"
,
],
)
cls
.
base_url
+=
"/v1"
class
TestVL
M
ContextLength
Issue
(
CustomTestCase
):
class
Test
Qwen2
VLContextLength
Server
(
CustomTestCase
):
@
classmethod
@
classmethod
def
setUpClass
(
cls
):
def
setUpClass
(
cls
):
cls
.
model
=
"Qwen/Qwen2-VL-7B-Instruct"
cls
.
model
=
"Qwen/Qwen2-VL-7B-Instruct"
...
@@ -106,7 +49,6 @@ class TestVLMContextLengthIssue(CustomTestCase):
...
@@ -106,7 +49,6 @@ class TestVLMContextLengthIssue(CustomTestCase):
other_args
=
[
other_args
=
[
"--context-length"
,
"--context-length"
,
"300"
,
"300"
,
"--mem-fraction-static=0.75"
,
"--cuda-graph-max-bs"
,
"--cuda-graph-max-bs"
,
"4"
,
"4"
,
],
],
...
@@ -149,208 +91,60 @@ class TestVLMContextLengthIssue(CustomTestCase):
...
@@ -149,208 +91,60 @@ class TestVLMContextLengthIssue(CustomTestCase):
)
)
# Note(Xinyuan): mllama is not stable for now, skip for CI
# flaky
# class TestMllamaServer(TestOpenAIVisionServer):
# class TestMllamaServer(ImageOpenAITestMixin):
# @classmethod
# model = "meta-llama/Llama-3.2-11B-Vision-Instruct"
# def setUpClass(cls):
# cls.model = "meta-llama/Llama-3.2-11B-Vision-Instruct"
# cls.base_url = DEFAULT_URL_FOR_TEST
# cls.api_key = "sk-123456"
# cls.process = popen_launch_server(
# cls.model,
# cls.base_url,
# timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
# api_key=cls.api_key,
# )
# cls.base_url += "/v1"
class
TestMinicpmvServer
(
ImageOpenAITestMixin
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"openbmb/MiniCPM-V-2_6"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--trust-remote-code"
,
"--mem-fraction-static"
,
"0.35"
,
"--cuda-graph-max-bs"
,
"4"
,
],
)
cls
.
base_url
+=
"/v1"
class
TestMinicpmv4Server
(
ImageOpenAITestMixin
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"openbmb/MiniCPM-V-4"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--trust-remote-code"
,
"--mem-fraction-static"
,
"0.35"
,
"--cuda-graph-max-bs"
,
"4"
,
],
)
cls
.
base_url
+=
"/v1"
class
TestInternVL25Server
(
ImageOpenAITestMixin
):
model
=
"OpenGVLab/InternVL2_5-2B"
extra_args
=
[
"--cuda-graph-max-bs=4"
,
]
class
TestInternVL2_5Server
(
ImageOpenAITestMixin
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"OpenGVLab/InternVL2_5-2B"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--trust-remote-code"
,
"--cuda-graph-max-bs"
,
"4"
,
],
)
cls
.
base_url
+=
"/v1"
class
TestMiniCPMV4Server
(
ImageOpenAITestMixin
):
model
=
"openbmb/MiniCPM-V-4"
extra_args
=
[
"--cuda-graph-max-bs=4"
,
]
class
TestMinicpmo2_6Server
(
ImageOpenAITestMixin
,
AudioOpenAITestMixin
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"openbmb/MiniCPM-o-2_6"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--trust-remote-code"
,
"--mem-fraction-static"
,
"0.65"
,
"--cuda-graph-max-bs"
,
"4"
,
],
)
cls
.
base_url
+=
"/v1"
class
TestMiniCPMo26Server
(
ImageOpenAITestMixin
,
AudioOpenAITestMixin
):
model
=
"openbmb/MiniCPM-o-2_6"
extra_args
=
[
"--cuda-graph-max-bs=4"
,
]
class
TestMimoVLServer
(
ImageOpenAITestMixin
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"XiaomiMiMo/MiMo-VL-7B-RL"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
api_key
=
cls
.
api_key
,
other_args
=
[
"--trust-remote-code"
,
"--mem-fraction-static"
,
"0.6"
,
"--cuda-graph-max-bs"
,
"4"
,
],
)
cls
.
base_url
+=
"/v1"
class
TestGemma3itServer
(
ImageOpenAITestMixin
):
model
=
"google/gemma-3-4b-it"
extra_args
=
[
"--cuda-graph-max-bs=4"
,
]
class
TestVILAServer
(
ImageOpenAITestMixin
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"Efficient-Large-Model/NVILA-Lite-2B-hf-0626"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
revision
=
"6bde1de5964b40e61c802b375fff419edc867506"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
api_key
=
cls
.
api_key
,
other_args
=
[
"--trust-remote-code"
,
"--context-length=65536"
,
f
"--revision=
{
cls
.
revision
}
"
,
"--cuda-graph-max-bs"
,
"4"
,
],
)
cls
.
base_url
+=
"/v1"
class
TestKimiVLServer
(
ImageOpenAITestMixin
):
model
=
"moonshotai/Kimi-VL-A3B-Instruct"
extra_args
=
[
"--context-length=8192"
,
"--dtype=bfloat16"
,
]
class
TestPhi4MMServer
(
ImageOpenAITestMixin
,
AudioOpenAITestMixin
):
def
test_video_images_chat_completion
(
self
):
@
classmethod
# model context length exceeded
def
setUpClass
(
cls
):
pass
# Manually download LoRA adapter_config.json as it's not downloaded by the model loader by default.
from
huggingface_hub
import
constants
,
snapshot_download
snapshot_download
(
"microsoft/Phi-4-multimodal-instruct"
,
allow_patterns
=
[
"**/adapter_config.json"
],
)
cls
.
model
=
"microsoft/Phi-4-multimodal-instruct"
class
TestGLM41VServer
(
ImageOpenAITestMixin
,
VideoOpenAITestMixin
):
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
model
=
"zai-org/GLM-4.1V-9B-Thinking"
cls
.
api_key
=
"sk-123456"
extra_args
=
[
"--reasoning-parser=glm45"
,
]
revision
=
"33e62acdd07cd7d6635badd529aa0a3467bb9c6a"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--trust-remote-code"
,
"--mem-fraction-static"
,
"0.70"
,
"--disable-radix-cache"
,
"--max-loras-per-batch"
,
"2"
,
"--revision"
,
revision
,
"--lora-paths"
,
f
"vision=
{
constants
.
HF_HUB_CACHE
}
/models--microsoft--Phi-4-multimodal-instruct/snapshots/
{
revision
}
/vision-lora"
,
f
"speech=
{
constants
.
HF_HUB_CACHE
}
/models--microsoft--Phi-4-multimodal-instruct/snapshots/
{
revision
}
/speech-lora"
,
"--cuda-graph-max-bs"
,
"4"
,
],
)
cls
.
base_url
+=
"/v1"
def
get_vision_request_kwargs
(
self
):
class
TestQwen2AudioServer
(
AudioOpenAITestMixin
):
return
{
model
=
"Qwen/Qwen2-Audio-7B-Instruct"
"extra_body"
:
{
"lora_path"
:
"vision"
,
"top_k"
:
1
,
"top_p"
:
1.0
,
}
}
def
get_audio_request_kwargs
(
self
):
return
{
"extra_body"
:
{
"lora_path"
:
"speech"
,
"top_k"
:
1
,
"top_p"
:
1.0
,
}
}
# This _test_audio_ambient_completion test is way too complicated to pass for a small LLM
def
test_audio_ambient_completion
(
self
):
pass
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
...
test/srt/test_vision_openai_server_b.py
deleted
100644 → 0
View file @
d88ac9bc
import
unittest
from
test_vision_openai_server_common
import
*
from
sglang.test.test_utils
import
(
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
popen_launch_server
,
)
class
TestPixtralServer
(
ImageOpenAITestMixin
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"mistral-community/pixtral-12b"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--trust-remote-code"
,
"--mem-fraction-static"
,
"0.70"
,
"--cuda-graph-max-bs"
,
"4"
,
],
)
cls
.
base_url
+=
"/v1"
class
TestMistral3_1Server
(
ImageOpenAITestMixin
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"unsloth/Mistral-Small-3.1-24B-Instruct-2503"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--trust-remote-code"
,
"--mem-fraction-static"
,
"0.75"
,
"--cuda-graph-max-bs"
,
"4"
,
],
)
cls
.
base_url
+=
"/v1"
class
TestDeepseekVL2Server
(
ImageOpenAITestMixin
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"deepseek-ai/deepseek-vl2-small"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--trust-remote-code"
,
"--context-length"
,
"4096"
,
"--cuda-graph-max-bs"
,
"4"
,
],
)
cls
.
base_url
+=
"/v1"
class
TestJanusProServer
(
ImageOpenAITestMixin
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"deepseek-ai/Janus-Pro-7B"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--trust-remote-code"
,
"--mem-fraction-static"
,
"0.35"
,
"--cuda-graph-max-bs"
,
"4"
,
],
)
cls
.
base_url
+=
"/v1"
def
test_video_images_chat_completion
(
self
):
pass
## Skip for ci test
# class TestLlama4Server(TestOpenAIVisionServer):
# @classmethod
# def setUpClass(cls):
# cls.model = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
# cls.base_url = DEFAULT_URL_FOR_TEST
# cls.api_key = "sk-123456"
# cls.process = popen_launch_server(
# cls.model,
# cls.base_url,
# timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
# other_args=[
# "--chat-template",
# "llama-4",
# "--mem-fraction-static",
# "0.8",
# "--tp-size=8",
# "--context-length=8192",
# "--mm-attention-backend",
# "fa3",
# "--cuda-graph-max-bs",
# "4",
# ],
# )
# cls.base_url += "/v1"
class
TestGemma3itServer
(
ImageOpenAITestMixin
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"google/gemma-3-4b-it"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--trust-remote-code"
,
"--mem-fraction-static"
,
"0.70"
,
"--enable-multimodal"
,
"--cuda-graph-max-bs"
,
"4"
,
],
)
cls
.
base_url
+=
"/v1"
class
TestGemma3nServer
(
ImageOpenAITestMixin
,
AudioOpenAITestMixin
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"google/gemma-3n-E4B-it"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--trust-remote-code"
,
"--mem-fraction-static"
,
"0.70"
,
"--cuda-graph-max-bs"
,
"4"
,
],
)
cls
.
base_url
+=
"/v1"
# This _test_audio_ambient_completion test is way too complicated to pass for a small LLM
def
test_audio_ambient_completion
(
self
):
pass
def
_test_mixed_image_audio_chat_completion
(
self
):
self
.
_test_mixed_image_audio_chat_completion
()
class
TestQwen2AudioServer
(
AudioOpenAITestMixin
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"Qwen/Qwen2-Audio-7B-Instruct"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--trust-remote-code"
,
"--mem-fraction-static"
,
"0.70"
,
],
)
cls
.
base_url
+=
"/v1"
# Temporarily skip Kimi-VL for CI test due to issue in transformers=4.57.0
# class TestKimiVLServer(ImageOpenAITestMixin):
# @classmethod
# def setUpClass(cls):
# cls.model = "moonshotai/Kimi-VL-A3B-Instruct"
# cls.base_url = DEFAULT_URL_FOR_TEST
# cls.api_key = "sk-123456"
# cls.process = popen_launch_server(
# cls.model,
# cls.base_url,
# timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
# other_args=[
# "--trust-remote-code",
# "--context-length",
# "4096",
# "--dtype",
# "bfloat16",
# "--cuda-graph-max-bs",
# "4",
# ],
# )
# cls.base_url += "/v1"
# def test_video_images_chat_completion(self):
# pass
class
TestGLM41VServer
(
ImageOpenAITestMixin
,
VideoOpenAITestMixin
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"zai-org/GLM-4.1V-9B-Thinking"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--trust-remote-code"
,
"--mem-fraction-static"
,
"0.68"
,
"--cuda-graph-max-bs"
,
"4"
,
"--reasoning-parser"
,
"glm45"
,
],
)
cls
.
base_url
+=
"/v1"
class
TestQwen3OmniServer
(
OmniOpenAITestMixin
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"Qwen/Qwen3-Omni-30B-A3B-Instruct"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
# workaround to fit into H100
"--trust-remote-code"
,
"--mem-fraction-static"
,
"0.90"
,
"--disable-cuda-graph"
,
"--disable-fast-image-processor"
,
"--grammar-backend"
,
"none"
,
],
)
cls
.
base_url
+=
"/v1"
if
__name__
==
"__main__"
:
del
(
TestOpenAIMLLMServerBase
,
ImageOpenAITestMixin
,
VideoOpenAITestMixin
,
AudioOpenAITestMixin
,
OmniOpenAITestMixin
,
)
unittest
.
main
()
test/srt/test_vision_openai_server_common.py
View file @
3e4c7da2
...
@@ -9,7 +9,12 @@ import requests
...
@@ -9,7 +9,12 @@ import requests
from
PIL
import
Image
from
PIL
import
Image
from
sglang.srt.utils
import
kill_process_tree
from
sglang.srt.utils
import
kill_process_tree
from
sglang.test.test_utils
import
DEFAULT_URL_FOR_TEST
,
CustomTestCase
from
sglang.test.test_utils
import
(
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
CustomTestCase
,
popen_launch_server
,
)
# image
# image
IMAGE_MAN_IRONING_URL
=
"https://raw.githubusercontent.com/sgl-project/sgl-test-files/refs/heads/main/images/man_ironing_on_back_of_suv.png"
IMAGE_MAN_IRONING_URL
=
"https://raw.githubusercontent.com/sgl-project/sgl-test-files/refs/heads/main/images/man_ironing_on_back_of_suv.png"
...
@@ -24,12 +29,21 @@ AUDIO_BIRD_SONG_URL = "https://raw.githubusercontent.com/sgl-project/sgl-test-fi
...
@@ -24,12 +29,21 @@ AUDIO_BIRD_SONG_URL = "https://raw.githubusercontent.com/sgl-project/sgl-test-fi
class
TestOpenAIMLLMServerBase
(
CustomTestCase
):
class
TestOpenAIMLLMServerBase
(
CustomTestCase
):
model
:
str
extra_args
:
list
=
[]
fixed_args
:
list
=
[
"--trust-remote-code"
,
"--enable-multimodal"
]
@
classmethod
@
classmethod
def
setUpClass
(
cls
):
def
setUpClass
(
cls
):
cls
.
model
=
""
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
api_key
=
"sk-123456"
cls
.
process
=
None
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
api_key
=
cls
.
api_key
,
other_args
=
cls
.
extra_args
+
cls
.
fixed_args
,
)
cls
.
base_url
+=
"/v1"
cls
.
base_url
+=
"/v1"
@
classmethod
@
classmethod
...
@@ -421,7 +435,7 @@ class ImageOpenAITestMixin(TestOpenAIMLLMServerBase):
...
@@ -421,7 +435,7 @@ class ImageOpenAITestMixin(TestOpenAIMLLMServerBase):
or
"device"
in
video_response
or
"device"
in
video_response
or
"microphone"
in
video_response
or
"microphone"
in
video_response
),
f
"""
),
f
"""
====================== video_response =====================
====================== video_
images
response =====================
{
video_response
}
{
video_response
}
===========================================================
===========================================================
should contain 'iPod' or 'device' or 'microphone'
should contain 'iPod' or 'device' or 'microphone'
...
@@ -435,7 +449,7 @@ class ImageOpenAITestMixin(TestOpenAIMLLMServerBase):
...
@@ -435,7 +449,7 @@ class ImageOpenAITestMixin(TestOpenAIMLLMServerBase):
or
"Steve"
in
video_response
or
"Steve"
in
video_response
or
"hand"
in
video_response
or
"hand"
in
video_response
),
f
"""
),
f
"""
====================== video_response =====================
====================== video_
images
response =====================
{
video_response
}
{
video_response
}
===========================================================
===========================================================
should contain 'man' or 'person' or 'individual' or 'speaker' or 'presenter' or 'Steve' or 'hand'
should contain 'man' or 'person' or 'individual' or 'speaker' or 'presenter' or 'Steve' or 'hand'
...
@@ -446,7 +460,7 @@ class ImageOpenAITestMixin(TestOpenAIMLLMServerBase):
...
@@ -446,7 +460,7 @@ class ImageOpenAITestMixin(TestOpenAIMLLMServerBase):
or
"display"
in
video_response
or
"display"
in
video_response
or
"hold"
in
video_response
or
"hold"
in
video_response
),
f
"""
),
f
"""
====================== video_response =====================
====================== video_
images
response =====================
{
video_response
}
{
video_response
}
===========================================================
===========================================================
should contain 'present' or 'examine' or 'display' or 'hold'
should contain 'present' or 'examine' or 'display' or 'hold'
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment