Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
f11481b9
Unverified
Commit
f11481b9
authored
May 19, 2025
by
fzyzcjy
Committed by
GitHub
May 18, 2025
Browse files
Add 4-GPU runner tests and split existing tests (#6383)
parent
9d24c3ff
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
414 additions
and
370 deletions
+414
-370
.github/workflows/pr-test.yml
.github/workflows/pr-test.yml
+19
-0
test/srt/run_suite.py
test/srt/run_suite.py
+7
-4
test/srt/test_pp_single_node.py
test/srt/test_pp_single_node.py
+1
-1
test/srt/test_vision_openai_server_a.py
test/srt/test_vision_openai_server_a.py
+187
-0
test/srt/test_vision_openai_server_b.py
test/srt/test_vision_openai_server_b.py
+200
-0
test/srt/test_vision_openai_server_common.py
test/srt/test_vision_openai_server_common.py
+0
-365
No files found.
.github/workflows/pr-test.yml
View file @
f11481b9
...
@@ -89,6 +89,25 @@ jobs:
...
@@ -89,6 +89,25 @@ jobs:
cd test/srt
cd test/srt
python3 run_suite.py --suite per-commit-2-gpu
python3 run_suite.py --suite per-commit-2-gpu
unittest-test-backend-4-gpu
:
if
:
(github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
github.event.pull_request.draft ==
false
needs
:
[
unit-test-frontend
,
unit-test-backend-2-gpu
]
runs-on
:
4-gpu-runner
steps
:
-
name
:
Checkout code
uses
:
actions/checkout@v4
-
name
:
Install dependencies
run
:
|
bash scripts/ci_install_dependency.sh
-
name
:
Run test
timeout-minutes
:
20
run
:
|
cd test/srt
python3 run_suite.py --suite per-commit-4-gpu
unittest-test-backend-8-gpu
:
unittest-test-backend-8-gpu
:
if
:
(github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
if
:
(github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
github.event.pull_request.draft ==
false
github.event.pull_request.draft ==
false
...
...
test/srt/run_suite.py
View file @
f11481b9
...
@@ -81,7 +81,8 @@ suites = {
...
@@ -81,7 +81,8 @@ suites = {
TestFile
(
"test_vertex_endpoint.py"
,
31
),
TestFile
(
"test_vertex_endpoint.py"
,
31
),
TestFile
(
"test_vision_chunked_prefill.py"
,
175
),
TestFile
(
"test_vision_chunked_prefill.py"
,
175
),
TestFile
(
"test_vlm_accuracy.py"
,
60
),
TestFile
(
"test_vlm_accuracy.py"
,
60
),
TestFile
(
"test_vision_openai_server.py"
,
637
),
TestFile
(
"test_vision_openai_server_a.py"
,
700
),
TestFile
(
"test_vision_openai_server_b.py"
,
700
),
TestFile
(
"test_w8a8_quantization.py"
,
46
),
TestFile
(
"test_w8a8_quantization.py"
,
46
),
TestFile
(
"models/lora/test_lora_cuda_graph.py"
,
250
),
TestFile
(
"models/lora/test_lora_cuda_graph.py"
,
250
),
],
],
...
@@ -104,17 +105,19 @@ suites = {
...
@@ -104,17 +105,19 @@ suites = {
"per-commit-2-gpu-amd"
:
[
"per-commit-2-gpu-amd"
:
[
TestFile
(
"test_mla_tp.py"
,
170
),
TestFile
(
"test_mla_tp.py"
,
170
),
],
],
"per-commit-4-gpu"
:
[
TestFile
(
"test_local_attn.py"
,
250
),
TestFile
(
"test_pp_single_node.py"
,
150
),
],
"per-commit-8-gpu"
:
[
"per-commit-8-gpu"
:
[
# Disabled deepep tests temporarily because it takes too much time.
# Disabled deepep tests temporarily because it takes too much time.
# TODO: re-enable them after reducing the test time with compilation cache and smaller models.
# TODO: re-enable them after reducing the test time with compilation cache and smaller models.
# TestFile("test_deepep_intranode.py", 50),
# TestFile("test_deepep_intranode.py", 50),
# TestFile("test_deepep_low_latency.py", 50),
# TestFile("test_deepep_low_latency.py", 50),
# TestFile("test_moe_deepep_eval_accuracy_large.py", 250),
# TestFile("test_moe_deepep_eval_accuracy_large.py", 250),
TestFile
(
"test_disaggregation.py"
,
210
),
# TestFile("test_disaggregation.py", 210), # disabled since we have different_tp test
TestFile
(
"test_local_attn.py"
,
250
),
TestFile
(
"test_disaggregation_different_tp.py"
,
210
),
TestFile
(
"test_disaggregation_different_tp.py"
,
210
),
TestFile
(
"test_full_deepseek_v3.py"
,
250
),
TestFile
(
"test_full_deepseek_v3.py"
,
250
),
TestFile
(
"test_pp_single_node.py"
,
150
),
],
],
"per-commit-8-gpu-amd"
:
[
"per-commit-8-gpu-amd"
:
[
TestFile
(
"test_full_deepseek_v3.py"
,
250
),
TestFile
(
"test_full_deepseek_v3.py"
,
250
),
...
...
test/srt/test_pp_single_node.py
View file @
f11481b9
...
@@ -34,7 +34,7 @@ class TestPPAccuracy(unittest.TestCase):
...
@@ -34,7 +34,7 @@ class TestPPAccuracy(unittest.TestCase):
"--tp-size"
,
"--tp-size"
,
2
,
2
,
"--pp-size"
,
"--pp-size"
,
4
,
2
,
"--chunked-prefill-size"
,
"--chunked-prefill-size"
,
256
,
256
,
],
],
...
...
test/srt/test_vision_openai_server_a.py
0 → 100644
View file @
f11481b9
"""
Usage:
python3 -m unittest test_vision_openai_server.TestOpenAIVisionServer.test_mixed_batch
python3 -m unittest test_vision_openai_server.TestOpenAIVisionServer.test_multi_images_chat_completion
"""
from
test_vision_openai_server_common
import
*
from
sglang.srt.utils
import
kill_process_tree
from
sglang.test.test_utils
import
(
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
CustomTestCase
,
popen_launch_server
,
)
class
TestQwen2VLServer
(
TestOpenAIVisionServer
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"Qwen/Qwen2-VL-7B-Instruct"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
api_key
=
cls
.
api_key
,
other_args
=
[
"--mem-fraction-static"
,
"0.4"
,
],
)
cls
.
base_url
+=
"/v1"
class
TestQwen2_5_VLServer
(
TestOpenAIVisionServer
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"Qwen/Qwen2.5-VL-7B-Instruct"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
api_key
=
cls
.
api_key
,
other_args
=
[
"--mem-fraction-static"
,
"0.4"
,
],
)
cls
.
base_url
+=
"/v1"
class
TestVLMContextLengthIssue
(
CustomTestCase
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"Qwen/Qwen2-VL-7B-Instruct"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
api_key
=
cls
.
api_key
,
other_args
=
[
"--context-length"
,
"300"
,
"--mem-fraction-static=0.80"
,
],
)
cls
.
base_url
+=
"/v1"
@
classmethod
def
tearDownClass
(
cls
):
kill_process_tree
(
cls
.
process
.
pid
)
def
test_single_image_chat_completion
(
self
):
client
=
openai
.
Client
(
api_key
=
self
.
api_key
,
base_url
=
self
.
base_url
)
with
self
.
assertRaises
(
openai
.
BadRequestError
)
as
cm
:
client
.
chat
.
completions
.
create
(
model
=
"default"
,
messages
=
[
{
"role"
:
"user"
,
"content"
:
[
{
"type"
:
"image_url"
,
"image_url"
:
{
"url"
:
IMAGE_MAN_IRONING_URL
},
},
{
"type"
:
"text"
,
"text"
:
"Give a lengthy description of this picture"
,
},
],
},
],
temperature
=
0
,
)
# context length is checked first, then max_req_input_len, which is calculated from the former
assert
(
"Multimodal prompt is too long after expanding multimodal tokens."
in
str
(
cm
.
exception
)
or
"is longer than the model's context length"
in
str
(
cm
.
exception
)
)
class
TestMllamaServer
(
TestOpenAIVisionServer
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"meta-llama/Llama-3.2-11B-Vision-Instruct"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
api_key
=
cls
.
api_key
,
)
cls
.
base_url
+=
"/v1"
def
test_video_chat_completion
(
self
):
pass
class
TestMinicpmvServer
(
TestOpenAIVisionServer
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"openbmb/MiniCPM-V-2_6"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--trust-remote-code"
,
"--mem-fraction-static"
,
"0.4"
,
],
)
cls
.
base_url
+=
"/v1"
class
TestInternVL2_5Server
(
TestOpenAIVisionServer
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"OpenGVLab/InternVL2_5-2B"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--trust-remote-code"
],
)
cls
.
base_url
+=
"/v1"
class
TestMinicpmoServer
(
TestOpenAIVisionServer
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"openbmb/MiniCPM-o-2_6"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--trust-remote-code"
,
"--mem-fraction-static"
,
"0.7"
,
],
)
cls
.
base_url
+=
"/v1"
def
test_audio_chat_completion
(
self
):
self
.
_test_audio_speech_completion
()
self
.
_test_audio_ambient_completion
()
if
__name__
==
"__main__"
:
unittest
.
main
()
test/srt/test_vision_openai_server_b.py
0 → 100644
View file @
f11481b9
from
test_vision_openai_server_common
import
*
from
sglang.srt.utils
import
kill_process_tree
from
sglang.test.test_utils
import
(
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
CustomTestCase
,
popen_launch_server
,
)
class
TestPixtralServer
(
TestOpenAIVisionServer
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"mistral-community/pixtral-12b"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--trust-remote-code"
,
"--mem-fraction-static"
,
"0.73"
,
],
)
cls
.
base_url
+=
"/v1"
def
test_video_chat_completion
(
self
):
pass
class
TestMistral3_1Server
(
TestOpenAIVisionServer
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"unsloth/Mistral-Small-3.1-24B-Instruct-2503"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--trust-remote-code"
,
"--mem-fraction-static"
,
"0.8"
,
],
)
cls
.
base_url
+=
"/v1"
def
test_video_chat_completion
(
self
):
pass
class
TestDeepseekVL2Server
(
TestOpenAIVisionServer
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"deepseek-ai/deepseek-vl2-small"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--trust-remote-code"
,
"--context-length"
,
"4096"
,
],
)
cls
.
base_url
+=
"/v1"
def
test_video_chat_completion
(
self
):
pass
class
TestDeepseekVL2TinyServer
(
TestOpenAIVisionServer
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"deepseek-ai/deepseek-vl2-tiny"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--trust-remote-code"
,
"--context-length"
,
"4096"
,
],
)
cls
.
base_url
+=
"/v1"
def
test_video_chat_completion
(
self
):
pass
class
TestJanusProServer
(
TestOpenAIVisionServer
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"deepseek-ai/Janus-Pro-7B"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--trust-remote-code"
,
"--mem-fraction-static"
,
"0.4"
,
],
)
cls
.
base_url
+=
"/v1"
def
test_video_chat_completion
(
self
):
pass
def
test_single_image_chat_completion
(
self
):
# Skip this test because it is flaky
pass
## Skip for ci test
# class TestLlama4Server(TestOpenAIVisionServer):
# @classmethod
# def setUpClass(cls):
# cls.model = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
# cls.base_url = DEFAULT_URL_FOR_TEST
# cls.api_key = "sk-123456"
# cls.process = popen_launch_server(
# cls.model,
# cls.base_url,
# timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
# other_args=[
# "--chat-template",
# "llama-4",
# "--mem-fraction-static",
# "0.8",
# "--tp-size=8",
# "--context-length=8192",
# ],
# )
# cls.base_url += "/v1"
# def test_video_chat_completion(self):
# pass
class
TestGemma3itServer
(
TestOpenAIVisionServer
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"google/gemma-3-4b-it"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--trust-remote-code"
,
"--mem-fraction-static"
,
"0.75"
,
"--enable-multimodal"
,
],
)
cls
.
base_url
+=
"/v1"
def
test_video_chat_completion
(
self
):
pass
class
TestKimiVLServer
(
TestOpenAIVisionServer
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"moonshotai/Kimi-VL-A3B-Instruct"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--trust-remote-code"
,
"--context-length"
,
"4096"
,
"--dtype"
,
"bfloat16"
,
],
)
cls
.
base_url
+=
"/v1"
def
test_video_chat_completion
(
self
):
pass
if
__name__
==
"__main__"
:
unittest
.
main
()
test/srt/test_vision_openai_server.py
→
test/srt/test_vision_openai_server
_common
.py
View file @
f11481b9
"""
Usage:
python3 -m unittest test_vision_openai_server.TestOpenAIVisionServer.test_mixed_batch
python3 -m unittest test_vision_openai_server.TestOpenAIVisionServer.test_multi_images_chat_completion
"""
import
base64
import
base64
import
io
import
io
import
json
import
json
...
@@ -472,362 +466,3 @@ class TestOpenAIVisionServer(CustomTestCase):
...
@@ -472,362 +466,3 @@ class TestOpenAIVisionServer(CustomTestCase):
def
test_audio_chat_completion
(
self
):
def
test_audio_chat_completion
(
self
):
pass
pass
class
TestQwen2VLServer
(
TestOpenAIVisionServer
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"Qwen/Qwen2-VL-7B-Instruct"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
api_key
=
cls
.
api_key
,
other_args
=
[
"--mem-fraction-static"
,
"0.4"
,
],
)
cls
.
base_url
+=
"/v1"
class
TestQwen2_5_VLServer
(
TestOpenAIVisionServer
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"Qwen/Qwen2.5-VL-7B-Instruct"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
api_key
=
cls
.
api_key
,
other_args
=
[
"--mem-fraction-static"
,
"0.4"
,
],
)
cls
.
base_url
+=
"/v1"
class
TestVLMContextLengthIssue
(
CustomTestCase
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"Qwen/Qwen2-VL-7B-Instruct"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
api_key
=
cls
.
api_key
,
other_args
=
[
"--context-length"
,
"300"
,
"--mem-fraction-static=0.80"
,
],
)
cls
.
base_url
+=
"/v1"
@
classmethod
def
tearDownClass
(
cls
):
kill_process_tree
(
cls
.
process
.
pid
)
def
test_single_image_chat_completion
(
self
):
client
=
openai
.
Client
(
api_key
=
self
.
api_key
,
base_url
=
self
.
base_url
)
with
self
.
assertRaises
(
openai
.
BadRequestError
)
as
cm
:
client
.
chat
.
completions
.
create
(
model
=
"default"
,
messages
=
[
{
"role"
:
"user"
,
"content"
:
[
{
"type"
:
"image_url"
,
"image_url"
:
{
"url"
:
IMAGE_MAN_IRONING_URL
},
},
{
"type"
:
"text"
,
"text"
:
"Give a lengthy description of this picture"
,
},
],
},
],
temperature
=
0
,
)
# context length is checked first, then max_req_input_len, which is calculated from the former
assert
(
"Multimodal prompt is too long after expanding multimodal tokens."
in
str
(
cm
.
exception
)
or
"is longer than the model's context length"
in
str
(
cm
.
exception
)
)
class
TestMllamaServer
(
TestOpenAIVisionServer
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"meta-llama/Llama-3.2-11B-Vision-Instruct"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
api_key
=
cls
.
api_key
,
)
cls
.
base_url
+=
"/v1"
def
test_video_chat_completion
(
self
):
pass
class
TestMinicpmvServer
(
TestOpenAIVisionServer
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"openbmb/MiniCPM-V-2_6"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--trust-remote-code"
,
"--mem-fraction-static"
,
"0.4"
,
],
)
cls
.
base_url
+=
"/v1"
class
TestInternVL2_5Server
(
TestOpenAIVisionServer
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"OpenGVLab/InternVL2_5-2B"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--trust-remote-code"
],
)
cls
.
base_url
+=
"/v1"
class
TestMinicpmoServer
(
TestOpenAIVisionServer
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"openbmb/MiniCPM-o-2_6"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--trust-remote-code"
,
"--mem-fraction-static"
,
"0.7"
,
],
)
cls
.
base_url
+=
"/v1"
def
test_audio_chat_completion
(
self
):
self
.
_test_audio_speech_completion
()
self
.
_test_audio_ambient_completion
()
class
TestPixtralServer
(
TestOpenAIVisionServer
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"mistral-community/pixtral-12b"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--trust-remote-code"
,
"--mem-fraction-static"
,
"0.73"
,
],
)
cls
.
base_url
+=
"/v1"
def
test_video_chat_completion
(
self
):
pass
class
TestMistral3_1Server
(
TestOpenAIVisionServer
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"unsloth/Mistral-Small-3.1-24B-Instruct-2503"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--trust-remote-code"
,
"--mem-fraction-static"
,
"0.8"
,
],
)
cls
.
base_url
+=
"/v1"
def
test_video_chat_completion
(
self
):
pass
class
TestDeepseekVL2Server
(
TestOpenAIVisionServer
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"deepseek-ai/deepseek-vl2-small"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--trust-remote-code"
,
"--context-length"
,
"4096"
,
],
)
cls
.
base_url
+=
"/v1"
def
test_video_chat_completion
(
self
):
pass
class
TestDeepseekVL2TinyServer
(
TestOpenAIVisionServer
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"deepseek-ai/deepseek-vl2-tiny"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--trust-remote-code"
,
"--context-length"
,
"4096"
,
],
)
cls
.
base_url
+=
"/v1"
def
test_video_chat_completion
(
self
):
pass
class
TestJanusProServer
(
TestOpenAIVisionServer
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"deepseek-ai/Janus-Pro-7B"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--trust-remote-code"
,
"--mem-fraction-static"
,
"0.4"
,
],
)
cls
.
base_url
+=
"/v1"
def
test_video_chat_completion
(
self
):
pass
def
test_single_image_chat_completion
(
self
):
# Skip this test because it is flaky
pass
## Skip for ci test
# class TestLlama4Server(TestOpenAIVisionServer):
# @classmethod
# def setUpClass(cls):
# cls.model = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
# cls.base_url = DEFAULT_URL_FOR_TEST
# cls.api_key = "sk-123456"
# cls.process = popen_launch_server(
# cls.model,
# cls.base_url,
# timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
# other_args=[
# "--chat-template",
# "llama-4",
# "--mem-fraction-static",
# "0.8",
# "--tp-size=8",
# "--context-length=8192",
# ],
# )
# cls.base_url += "/v1"
# def test_video_chat_completion(self):
# pass
class
TestGemma3itServer
(
TestOpenAIVisionServer
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"google/gemma-3-4b-it"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--trust-remote-code"
,
"--mem-fraction-static"
,
"0.75"
,
"--enable-multimodal"
,
],
)
cls
.
base_url
+=
"/v1"
def
test_video_chat_completion
(
self
):
pass
class
TestKimiVLServer
(
TestOpenAIVisionServer
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"moonshotai/Kimi-VL-A3B-Instruct"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--trust-remote-code"
,
"--context-length"
,
"4096"
,
"--dtype"
,
"bfloat16"
,
],
)
cls
.
base_url
+=
"/v1"
def
test_video_chat_completion
(
self
):
pass
if
__name__
==
"__main__"
:
unittest
.
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment