Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
3e4c7da2
Unverified
Commit
3e4c7da2
authored
Oct 17, 2025
by
Mick
Committed by
GitHub
Oct 17, 2025
Browse files
ci: reduce and refactor vlm ut and combine test files (#11062)
parent
d88ac9bc
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
81 additions
and
551 deletions
+81
-551
python/sglang/bench_one_batch_server.py
python/sglang/bench_one_batch_server.py
+2
-4
python/sglang/srt/models/mllama.py
python/sglang/srt/models/mllama.py
+1
-1
test/srt/run_suite.py
test/srt/run_suite.py
+1
-2
test/srt/test_vision_openai_server_a.py
test/srt/test_vision_openai_server_a.py
+57
-263
test/srt/test_vision_openai_server_b.py
test/srt/test_vision_openai_server_b.py
+0
-275
test/srt/test_vision_openai_server_common.py
test/srt/test_vision_openai_server_common.py
+20
-6
No files found.
python/sglang/bench_one_batch_server.py
View file @
3e4c7da2
...
...
@@ -137,12 +137,10 @@ def generate_markdown_report(trace_dir, results: List["BenchmarkResult"]) -> str
# all results should share the same isl & osl
for
result
in
results
:
base_url
=
os
.
getenv
(
"TRACE_BASE_URL"
,
"https://github.com/sgl-project/ci-data/traces"
).
rstrip
(
"/"
)
base_url
=
os
.
getenv
(
"TRACE_BASE_URL"
,
""
).
rstrip
(
"/"
)
relay_base
=
os
.
getenv
(
"PERFETTO_RELAY_URL"
,
"
https://docs.sglang.ai/ci-data/pages/perfetto_relay.html
"
,
""
,
).
rstrip
(
"/"
)
summary
+=
result
.
to_markdown_row
(
trace_dir
,
base_url
,
relay_base
)
...
...
python/sglang/srt/models/mllama.py
View file @
3e4c7da2
...
...
@@ -901,7 +901,7 @@ class MllamaForConditionalGeneration(nn.Module):
img
=
pixel_values
[
0
,
j
]
num_tiles
=
img
.
shape
[
0
]
batched_images
[
i
,
j
,
:
num_tiles
]
=
img
batched_ar_ids
[
i
,
j
]
=
mm_input
.
mm_items
[
0
].
aspect_ratio_id
[
0
,
j
]
batched_ar_ids
[
i
,
j
]
=
mm_input
.
mm_items
[
0
].
aspect_ratio_id
s
[
0
,
j
]
batched_ar_mask
[
i
,
j
,
:
num_tiles
]
=
mm_input
.
mm_items
[
0
...
...
test/srt/run_suite.py
View file @
3e4c7da2
...
...
@@ -131,8 +131,7 @@ suites = {
TestFile
(
"test_triton_sliding_window.py"
,
250
),
TestFile
(
"test_utils_update_weights.py"
,
48
),
TestFile
(
"test_vision_chunked_prefill.py"
,
175
),
TestFile
(
"test_vision_openai_server_a.py"
,
724
),
TestFile
(
"test_vision_openai_server_b.py"
,
446
),
TestFile
(
"test_vision_openai_server_a.py"
,
608
),
TestFile
(
"test_vlm_input_format.py"
,
300
),
],
"per-commit-2-gpu"
:
[
...
...
test/srt/test_vision_openai_server_a.py
View file @
3e4c7da2
...
...
@@ -8,91 +8,34 @@ import unittest
from
test_vision_openai_server_common
import
*
from
sglang.test.test_utils
import
(
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
popen_launch_server
,
)
class
TestLlavaServer
(
ImageOpenAITestMixin
):
model
=
"lmms-lab/llava-onevision-qwen2-0.5b-ov"
class
TestLlava
(
ImageOpenAITestMixin
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"lmms-lab/llava-onevision-qwen2-0.5b-ov"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
api_key
=
cls
.
api_key
,
)
cls
.
base_url
+=
"/v1"
class
TestQwen2VLServer
(
ImageOpenAITestMixin
,
VideoOpenAITestMixin
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"Qwen/Qwen2-VL-7B-Instruct"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
api_key
=
cls
.
api_key
,
other_args
=
[
"--mem-fraction-static"
,
"0.35"
,
"--cuda-graph-max-bs"
,
"4"
,
],
)
cls
.
base_url
+=
"/v1"
class
TestQwen25VLServer
(
ImageOpenAITestMixin
,
VideoOpenAITestMixin
):
model
=
"Qwen/Qwen2.5-VL-7B-Instruct"
extra_args
=
[
"--cuda-graph-max-bs=4"
,
]
class
TestQwen3VLServer
(
ImageOpenAITestMixin
,
VideoOpenAITestMixin
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"Qwen/Qwen3-VL-30B-A3B-Instruct"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
api_key
=
cls
.
api_key
,
other_args
=
[
"--mem-fraction-static"
,
"0.80"
,
"--cuda-graph-max-bs"
,
"4"
,
],
)
cls
.
base_url
+=
"/v1"
model
=
"Qwen/Qwen3-VL-30B-A3B-Instruct"
extra_args
=
[
"--cuda-graph-max-bs=4"
]
class
TestQwen2_5_VLServer
(
ImageOpenAITestMixin
,
VideoOpenAITestMixin
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"Qwen/Qwen2.5-VL-7B-Instruct"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
api_key
=
cls
.
api_key
,
other_args
=
[
"--mem-fraction-static"
,
"0.35"
,
"--cuda-graph-max-bs"
,
"4"
,
],
)
cls
.
base_url
+=
"/v1"
class
TestQwen3OmniServer
(
OmniOpenAITestMixin
):
model
=
"Qwen/Qwen3-Omni-30B-A3B-Instruct"
extra_args
=
[
# workaround to fit into H100
"--mem-fraction-static=0.90"
,
"--disable-cuda-graph"
,
"--disable-fast-image-processor"
,
"--grammar-backend=none"
,
]
class
TestVL
M
ContextLength
Issue
(
CustomTestCase
):
class
Test
Qwen2
VLContextLength
Server
(
CustomTestCase
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"Qwen/Qwen2-VL-7B-Instruct"
...
...
@@ -106,7 +49,6 @@ class TestVLMContextLengthIssue(CustomTestCase):
other_args
=
[
"--context-length"
,
"300"
,
"--mem-fraction-static=0.75"
,
"--cuda-graph-max-bs"
,
"4"
,
],
...
...
@@ -149,208 +91,60 @@ class TestVLMContextLengthIssue(CustomTestCase):
)
# Note(Xinyuan): mllama is not stable for now, skip for CI
# class TestMllamaServer(TestOpenAIVisionServer):
# @classmethod
# def setUpClass(cls):
# cls.model = "meta-llama/Llama-3.2-11B-Vision-Instruct"
# cls.base_url = DEFAULT_URL_FOR_TEST
# cls.api_key = "sk-123456"
# cls.process = popen_launch_server(
# cls.model,
# cls.base_url,
# timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
# api_key=cls.api_key,
# )
# cls.base_url += "/v1"
# flaky
# class TestMllamaServer(ImageOpenAITestMixin):
# model = "meta-llama/Llama-3.2-11B-Vision-Instruct"
class
TestMinicpmvServer
(
ImageOpenAITestMixin
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"openbmb/MiniCPM-V-2_6"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--trust-remote-code"
,
"--mem-fraction-static"
,
"0.35"
,
"--cuda-graph-max-bs"
,
"4"
,
],
)
cls
.
base_url
+=
"/v1"
class
TestMinicpmv4Server
(
ImageOpenAITestMixin
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"openbmb/MiniCPM-V-4"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--trust-remote-code"
,
"--mem-fraction-static"
,
"0.35"
,
"--cuda-graph-max-bs"
,
"4"
,
],
)
cls
.
base_url
+=
"/v1"
class
TestInternVL25Server
(
ImageOpenAITestMixin
):
model
=
"OpenGVLab/InternVL2_5-2B"
extra_args
=
[
"--cuda-graph-max-bs=4"
,
]
class
TestInternVL2_5Server
(
ImageOpenAITestMixin
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"OpenGVLab/InternVL2_5-2B"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--trust-remote-code"
,
"--cuda-graph-max-bs"
,
"4"
,
],
)
cls
.
base_url
+=
"/v1"
class
TestMiniCPMV4Server
(
ImageOpenAITestMixin
):
model
=
"openbmb/MiniCPM-V-4"
extra_args
=
[
"--cuda-graph-max-bs=4"
,
]
class
TestMinicpmo2_6Server
(
ImageOpenAITestMixin
,
AudioOpenAITestMixin
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"openbmb/MiniCPM-o-2_6"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--trust-remote-code"
,
"--mem-fraction-static"
,
"0.65"
,
"--cuda-graph-max-bs"
,
"4"
,
],
)
cls
.
base_url
+=
"/v1"
class
TestMiniCPMo26Server
(
ImageOpenAITestMixin
,
AudioOpenAITestMixin
):
model
=
"openbmb/MiniCPM-o-2_6"
extra_args
=
[
"--cuda-graph-max-bs=4"
,
]
class
TestMimoVLServer
(
ImageOpenAITestMixin
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"XiaomiMiMo/MiMo-VL-7B-RL"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
api_key
=
cls
.
api_key
,
other_args
=
[
"--trust-remote-code"
,
"--mem-fraction-static"
,
"0.6"
,
"--cuda-graph-max-bs"
,
"4"
,
],
)
cls
.
base_url
+=
"/v1"
class
TestGemma3itServer
(
ImageOpenAITestMixin
):
model
=
"google/gemma-3-4b-it"
extra_args
=
[
"--cuda-graph-max-bs=4"
,
]
class
TestVILAServer
(
ImageOpenAITestMixin
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"Efficient-Large-Model/NVILA-Lite-2B-hf-0626"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
revision
=
"6bde1de5964b40e61c802b375fff419edc867506"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
api_key
=
cls
.
api_key
,
other_args
=
[
"--trust-remote-code"
,
"--context-length=65536"
,
f
"--revision=
{
cls
.
revision
}
"
,
"--cuda-graph-max-bs"
,
"4"
,
],
)
cls
.
base_url
+=
"/v1"
class
TestKimiVLServer
(
ImageOpenAITestMixin
):
model
=
"moonshotai/Kimi-VL-A3B-Instruct"
extra_args
=
[
"--context-length=8192"
,
"--dtype=bfloat16"
,
]
class
TestPhi4MMServer
(
ImageOpenAITestMixin
,
AudioOpenAITestMixin
):
@
classmethod
def
setUpClass
(
cls
):
# Manually download LoRA adapter_config.json as it's not downloaded by the model loader by default.
from
huggingface_hub
import
constants
,
snapshot_download
def
test_video_images_chat_completion
(
self
):
# model context length exceeded
pass
snapshot_download
(
"microsoft/Phi-4-multimodal-instruct"
,
allow_patterns
=
[
"**/adapter_config.json"
],
)
cls
.
model
=
"microsoft/Phi-4-multimodal-instruct"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
class
TestGLM41VServer
(
ImageOpenAITestMixin
,
VideoOpenAITestMixin
):
model
=
"zai-org/GLM-4.1V-9B-Thinking"
extra_args
=
[
"--reasoning-parser=glm45"
,
]
revision
=
"33e62acdd07cd7d6635badd529aa0a3467bb9c6a"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--trust-remote-code"
,
"--mem-fraction-static"
,
"0.70"
,
"--disable-radix-cache"
,
"--max-loras-per-batch"
,
"2"
,
"--revision"
,
revision
,
"--lora-paths"
,
f
"vision=
{
constants
.
HF_HUB_CACHE
}
/models--microsoft--Phi-4-multimodal-instruct/snapshots/
{
revision
}
/vision-lora"
,
f
"speech=
{
constants
.
HF_HUB_CACHE
}
/models--microsoft--Phi-4-multimodal-instruct/snapshots/
{
revision
}
/speech-lora"
,
"--cuda-graph-max-bs"
,
"4"
,
],
)
cls
.
base_url
+=
"/v1"
def
get_vision_request_kwargs
(
self
):
return
{
"extra_body"
:
{
"lora_path"
:
"vision"
,
"top_k"
:
1
,
"top_p"
:
1.0
,
}
}
def
get_audio_request_kwargs
(
self
):
return
{
"extra_body"
:
{
"lora_path"
:
"speech"
,
"top_k"
:
1
,
"top_p"
:
1.0
,
}
}
# This _test_audio_ambient_completion test is way too complicated to pass for a small LLM
def
test_audio_ambient_completion
(
self
):
pass
class
TestQwen2AudioServer
(
AudioOpenAITestMixin
):
model
=
"Qwen/Qwen2-Audio-7B-Instruct"
if
__name__
==
"__main__"
:
...
...
test/srt/test_vision_openai_server_b.py
deleted
100644 → 0
View file @
d88ac9bc
import
unittest
from
test_vision_openai_server_common
import
*
from
sglang.test.test_utils
import
(
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
popen_launch_server
,
)
class
TestPixtralServer
(
ImageOpenAITestMixin
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"mistral-community/pixtral-12b"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--trust-remote-code"
,
"--mem-fraction-static"
,
"0.70"
,
"--cuda-graph-max-bs"
,
"4"
,
],
)
cls
.
base_url
+=
"/v1"
class
TestMistral3_1Server
(
ImageOpenAITestMixin
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"unsloth/Mistral-Small-3.1-24B-Instruct-2503"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--trust-remote-code"
,
"--mem-fraction-static"
,
"0.75"
,
"--cuda-graph-max-bs"
,
"4"
,
],
)
cls
.
base_url
+=
"/v1"
class
TestDeepseekVL2Server
(
ImageOpenAITestMixin
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"deepseek-ai/deepseek-vl2-small"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--trust-remote-code"
,
"--context-length"
,
"4096"
,
"--cuda-graph-max-bs"
,
"4"
,
],
)
cls
.
base_url
+=
"/v1"
class
TestJanusProServer
(
ImageOpenAITestMixin
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"deepseek-ai/Janus-Pro-7B"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--trust-remote-code"
,
"--mem-fraction-static"
,
"0.35"
,
"--cuda-graph-max-bs"
,
"4"
,
],
)
cls
.
base_url
+=
"/v1"
def
test_video_images_chat_completion
(
self
):
pass
## Skip for ci test
# class TestLlama4Server(TestOpenAIVisionServer):
# @classmethod
# def setUpClass(cls):
# cls.model = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
# cls.base_url = DEFAULT_URL_FOR_TEST
# cls.api_key = "sk-123456"
# cls.process = popen_launch_server(
# cls.model,
# cls.base_url,
# timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
# other_args=[
# "--chat-template",
# "llama-4",
# "--mem-fraction-static",
# "0.8",
# "--tp-size=8",
# "--context-length=8192",
# "--mm-attention-backend",
# "fa3",
# "--cuda-graph-max-bs",
# "4",
# ],
# )
# cls.base_url += "/v1"
class
TestGemma3itServer
(
ImageOpenAITestMixin
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"google/gemma-3-4b-it"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--trust-remote-code"
,
"--mem-fraction-static"
,
"0.70"
,
"--enable-multimodal"
,
"--cuda-graph-max-bs"
,
"4"
,
],
)
cls
.
base_url
+=
"/v1"
class
TestGemma3nServer
(
ImageOpenAITestMixin
,
AudioOpenAITestMixin
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"google/gemma-3n-E4B-it"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--trust-remote-code"
,
"--mem-fraction-static"
,
"0.70"
,
"--cuda-graph-max-bs"
,
"4"
,
],
)
cls
.
base_url
+=
"/v1"
# This _test_audio_ambient_completion test is way too complicated to pass for a small LLM
def
test_audio_ambient_completion
(
self
):
pass
def
_test_mixed_image_audio_chat_completion
(
self
):
self
.
_test_mixed_image_audio_chat_completion
()
class
TestQwen2AudioServer
(
AudioOpenAITestMixin
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"Qwen/Qwen2-Audio-7B-Instruct"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--trust-remote-code"
,
"--mem-fraction-static"
,
"0.70"
,
],
)
cls
.
base_url
+=
"/v1"
# Temporarily skip Kimi-VL for CI test due to issue in transformers=4.57.0
# class TestKimiVLServer(ImageOpenAITestMixin):
# @classmethod
# def setUpClass(cls):
# cls.model = "moonshotai/Kimi-VL-A3B-Instruct"
# cls.base_url = DEFAULT_URL_FOR_TEST
# cls.api_key = "sk-123456"
# cls.process = popen_launch_server(
# cls.model,
# cls.base_url,
# timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
# other_args=[
# "--trust-remote-code",
# "--context-length",
# "4096",
# "--dtype",
# "bfloat16",
# "--cuda-graph-max-bs",
# "4",
# ],
# )
# cls.base_url += "/v1"
# def test_video_images_chat_completion(self):
# pass
class
TestGLM41VServer
(
ImageOpenAITestMixin
,
VideoOpenAITestMixin
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"zai-org/GLM-4.1V-9B-Thinking"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--trust-remote-code"
,
"--mem-fraction-static"
,
"0.68"
,
"--cuda-graph-max-bs"
,
"4"
,
"--reasoning-parser"
,
"glm45"
,
],
)
cls
.
base_url
+=
"/v1"
class
TestQwen3OmniServer
(
OmniOpenAITestMixin
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"Qwen/Qwen3-Omni-30B-A3B-Instruct"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
# workaround to fit into H100
"--trust-remote-code"
,
"--mem-fraction-static"
,
"0.90"
,
"--disable-cuda-graph"
,
"--disable-fast-image-processor"
,
"--grammar-backend"
,
"none"
,
],
)
cls
.
base_url
+=
"/v1"
if
__name__
==
"__main__"
:
del
(
TestOpenAIMLLMServerBase
,
ImageOpenAITestMixin
,
VideoOpenAITestMixin
,
AudioOpenAITestMixin
,
OmniOpenAITestMixin
,
)
unittest
.
main
()
test/srt/test_vision_openai_server_common.py
View file @
3e4c7da2
...
...
@@ -9,7 +9,12 @@ import requests
from
PIL
import
Image
from
sglang.srt.utils
import
kill_process_tree
from
sglang.test.test_utils
import
DEFAULT_URL_FOR_TEST
,
CustomTestCase
from
sglang.test.test_utils
import
(
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
CustomTestCase
,
popen_launch_server
,
)
# image
IMAGE_MAN_IRONING_URL
=
"https://raw.githubusercontent.com/sgl-project/sgl-test-files/refs/heads/main/images/man_ironing_on_back_of_suv.png"
...
...
@@ -24,12 +29,21 @@ AUDIO_BIRD_SONG_URL = "https://raw.githubusercontent.com/sgl-project/sgl-test-fi
class
TestOpenAIMLLMServerBase
(
CustomTestCase
):
model
:
str
extra_args
:
list
=
[]
fixed_args
:
list
=
[
"--trust-remote-code"
,
"--enable-multimodal"
]
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
""
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
None
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
api_key
=
cls
.
api_key
,
other_args
=
cls
.
extra_args
+
cls
.
fixed_args
,
)
cls
.
base_url
+=
"/v1"
@
classmethod
...
...
@@ -421,7 +435,7 @@ class ImageOpenAITestMixin(TestOpenAIMLLMServerBase):
or
"device"
in
video_response
or
"microphone"
in
video_response
),
f
"""
====================== video_response =====================
====================== video_
images
response =====================
{
video_response
}
===========================================================
should contain 'iPod' or 'device' or 'microphone'
...
...
@@ -435,7 +449,7 @@ class ImageOpenAITestMixin(TestOpenAIMLLMServerBase):
or
"Steve"
in
video_response
or
"hand"
in
video_response
),
f
"""
====================== video_response =====================
====================== video_
images
response =====================
{
video_response
}
===========================================================
should contain 'man' or 'person' or 'individual' or 'speaker' or 'presenter' or 'Steve' or 'hand'
...
...
@@ -446,7 +460,7 @@ class ImageOpenAITestMixin(TestOpenAIMLLMServerBase):
or
"display"
in
video_response
or
"hold"
in
video_response
),
f
"""
====================== video_response =====================
====================== video_
images
response =====================
{
video_response
}
===========================================================
should contain 'present' or 'examine' or 'display' or 'hold'
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment