Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
64d1505c
Unverified
Commit
64d1505c
authored
Oct 08, 2025
by
Mick
Committed by
GitHub
Oct 07, 2025
Browse files
ci: unify the model launch method of nightly ci (#11230)
parent
f3764c26
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
197 additions
and
158 deletions
+197
-158
python/sglang/test/test_utils.py
python/sglang/test/test_utils.py
+18
-8
test/srt/test_nightly_text_models_gsm8k_eval.py
test/srt/test_nightly_text_models_gsm8k_eval.py
+36
-27
test/srt/test_nightly_text_models_perf.py
test/srt/test_nightly_text_models_perf.py
+87
-91
test/srt/test_nightly_vlms_mmmu_eval.py
test/srt/test_nightly_vlms_mmmu_eval.py
+23
-18
test/srt/test_nightly_vlms_perf.py
test/srt/test_nightly_vlms_perf.py
+33
-14
No files found.
python/sglang/test/test_utils.py
View file @
64d1505c
...
@@ -20,7 +20,6 @@ from functools import partial
...
@@ -20,7 +20,6 @@ from functools import partial
from
pathlib
import
Path
from
pathlib
import
Path
from
types
import
SimpleNamespace
from
types
import
SimpleNamespace
from
typing
import
Any
,
Awaitable
,
Callable
,
List
,
Optional
,
Tuple
from
typing
import
Any
,
Awaitable
,
Callable
,
List
,
Optional
,
Tuple
from
urllib.parse
import
quote
import
aiohttp
import
aiohttp
import
numpy
as
np
import
numpy
as
np
...
@@ -1652,15 +1651,26 @@ def _ensure_remove_suffix(text: str, suffix: str):
...
@@ -1652,15 +1651,26 @@ def _ensure_remove_suffix(text: str, suffix: str):
return
text
.
removesuffix
(
suffix
)
return
text
.
removesuffix
(
suffix
)
class
ModelDeploySetup
:
class
ModelLaunchSettings
:
def
__init__
(
self
,
model_path
:
str
,
extra_args
:
List
[
str
]
=
[]):
def
__init__
(
self
,
model_path
:
str
,
tp_size
:
int
=
1
,
extra_args
:
Optional
[
List
[
str
]]
=
None
,
env
:
Optional
[
dict
]
=
None
,
):
self
.
model_path
=
model_path
self
.
model_path
=
model_path
if
"--enable-multimodal"
not
in
extra_args
:
self
.
tp_size
=
tp_size
extra_args
.
append
(
"--enable-multimodal"
)
self
.
extra_args
=
list
(
extra_args
)
if
extra_args
else
[]
if
"--trust-remote-code"
not
in
extra_args
:
self
.
env
=
env
extra_args
.
append
(
"--trust-remote-code"
)
if
self
.
tp_size
>
1
and
"--tp"
not
in
self
.
extra_args
:
self
.
extra_args
.
extend
([
"--tp"
,
str
(
self
.
tp_size
)])
self
.
extra_args
=
extra_args
fixed_args
=
[
"--enable-multimodal"
,
"--trust-remote-code"
]
for
fixed_arg
in
fixed_args
:
if
fixed_arg
not
in
self
.
extra_args
:
self
.
extra_args
.
append
(
fixed_arg
)
class
ModelEvalMetrics
:
class
ModelEvalMetrics
:
...
...
test/srt/test_nightly_text_models_gsm8k_eval.py
View file @
64d1505c
...
@@ -12,6 +12,7 @@ from sglang.test.test_utils import (
...
@@ -12,6 +12,7 @@ from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP2
,
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP2
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
DEFAULT_URL_FOR_TEST
,
ModelLaunchSettings
,
check_evaluation_test_results
,
check_evaluation_test_results
,
parse_models
,
parse_models
,
popen_launch_server
,
popen_launch_server
,
...
@@ -44,12 +45,19 @@ MODEL_SCORE_THRESHOLDS = {
...
@@ -44,12 +45,19 @@ MODEL_SCORE_THRESHOLDS = {
class
TestNightlyGsm8KEval
(
unittest
.
TestCase
):
class
TestNightlyGsm8KEval
(
unittest
.
TestCase
):
@
classmethod
@
classmethod
def
setUpClass
(
cls
):
def
setUpClass
(
cls
):
cls
.
model_groups
=
[
cls
.
models
=
[]
(
parse_models
(
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP1
),
False
,
False
),
models_tp1
=
parse_models
(
(
parse_models
(
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP2
),
False
,
True
),
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP1
(
parse_models
(
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP1
),
True
,
False
),
)
+
parse_models
(
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP1
)
(
parse_models
(
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP2
),
True
,
True
),
for
model_path
in
models_tp1
:
]
cls
.
models
.
append
(
ModelLaunchSettings
(
model_path
,
tp_size
=
1
))
models_tp2
=
parse_models
(
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP2
)
+
parse_models
(
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP2
)
for
model_path
in
models_tp2
:
cls
.
models
.
append
(
ModelLaunchSettings
(
model_path
,
tp_size
=
2
))
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
def
test_mgsm_en_all_models
(
self
):
def
test_mgsm_en_all_models
(
self
):
...
@@ -58,26 +66,24 @@ class TestNightlyGsm8KEval(unittest.TestCase):
...
@@ -58,26 +66,24 @@ class TestNightlyGsm8KEval(unittest.TestCase):
)
)
is_first
=
True
is_first
=
True
all_results
=
[]
all_results
=
[]
model_count
=
0
for
model_setup
in
self
.
models
:
for
model_group
,
is_fp8
,
is_tp2
in
self
.
model_groups
:
with
self
.
subTest
(
model
=
model_setup
.
model_path
):
for
model
in
model_group
:
other_args
=
list
(
model_setup
.
extra_args
)
model_count
+=
1
with
self
.
subTest
(
model
=
model
):
if
model_setup
.
model_path
==
"meta-llama/Llama-3.1-70B-Instruct"
:
other_args
=
[
"--tp"
,
"2"
]
if
is_tp2
else
[]
if
model
==
"meta-llama/Llama-3.1-70B-Instruct"
:
other_args
.
extend
([
"--mem-fraction-static"
,
"0.9"
])
other_args
.
extend
([
"--mem-fraction-static"
,
"0.9"
])
process
=
popen_launch_server
(
process
=
popen_launch_server
(
model
=
model
,
model
=
model
_setup
.
model_path
,
other_args
=
other_args
,
other_args
=
other_args
,
base_url
=
self
.
base_url
,
base_url
=
self
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
)
)
try
:
args
=
SimpleNamespace
(
args
=
SimpleNamespace
(
base_url
=
self
.
base_url
,
base_url
=
self
.
base_url
,
model
=
model
,
model
=
model
_setup
.
model_path
,
eval_name
=
"mgsm_en"
,
eval_name
=
"mgsm_en"
,
num_examples
=
None
,
num_examples
=
None
,
num_threads
=
1024
,
num_threads
=
1024
,
...
@@ -85,14 +91,17 @@ class TestNightlyGsm8KEval(unittest.TestCase):
...
@@ -85,14 +91,17 @@ class TestNightlyGsm8KEval(unittest.TestCase):
metrics
=
run_eval
(
args
)
metrics
=
run_eval
(
args
)
print
(
print
(
f
"
{
'='
*
42
}
\n
{
model
}
- metrics=
{
metrics
}
score=
{
metrics
[
'score'
]
}
\n
{
'='
*
42
}
\n
"
f
"
{
'='
*
42
}
\n
{
model
_setup
.
model_path
}
- metrics=
{
metrics
}
score=
{
metrics
[
'score'
]
}
\n
{
'='
*
42
}
\n
"
)
)
write_results_to_json
(
model
,
metrics
,
"w"
if
is_first
else
"a"
)
write_results_to_json
(
model_setup
.
model_path
,
metrics
,
"w"
if
is_first
else
"a"
)
is_first
=
False
is_first
=
False
# 0.0 for empty latency
# 0.0 for empty latency
all_results
.
append
((
model
,
metrics
[
"score"
],
0.0
))
all_results
.
append
((
model_setup
.
model_path
,
metrics
[
"score"
],
0.0
))
finally
:
kill_process_tree
(
process
.
pid
)
kill_process_tree
(
process
.
pid
)
try
:
try
:
...
@@ -107,7 +116,7 @@ class TestNightlyGsm8KEval(unittest.TestCase):
...
@@ -107,7 +116,7 @@ class TestNightlyGsm8KEval(unittest.TestCase):
all_results
,
all_results
,
self
.
__class__
.
__name__
,
self
.
__class__
.
__name__
,
model_accuracy_thresholds
=
MODEL_SCORE_THRESHOLDS
,
model_accuracy_thresholds
=
MODEL_SCORE_THRESHOLDS
,
model_count
=
model_count
,
model_count
=
len
(
self
.
models
)
,
)
)
...
...
test/srt/test_nightly_text_models_perf.py
View file @
64d1505c
...
@@ -8,6 +8,7 @@ from sglang.srt.utils import kill_process_tree
...
@@ -8,6 +8,7 @@ from sglang.srt.utils import kill_process_tree
from
sglang.test.test_utils
import
(
from
sglang.test.test_utils
import
(
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
DEFAULT_URL_FOR_TEST
,
ModelLaunchSettings
,
_parse_int_list_env
,
_parse_int_list_env
,
is_in_ci
,
is_in_ci
,
parse_models
,
parse_models
,
...
@@ -21,14 +22,16 @@ PROFILE_DIR = "performance_profiles_text_models"
...
@@ -21,14 +22,16 @@ PROFILE_DIR = "performance_profiles_text_models"
class
TestNightlyTextModelsPerformance
(
unittest
.
TestCase
):
class
TestNightlyTextModelsPerformance
(
unittest
.
TestCase
):
@
classmethod
@
classmethod
def
setUpClass
(
cls
):
def
setUpClass
(
cls
):
cls
.
model_groups
=
[
cls
.
models
=
[]
(
parse_models
(
"meta-llama/Llama-3.1-8B-Instruct"
),
False
,
False
),
# TODO: replace with DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP1 or other model lists
(
parse_models
(
"Qwen/Qwen2-57B-A14B-Instruct"
),
False
,
True
),
for
model_path
in
parse_models
(
"meta-llama/Llama-3.1-8B-Instruct"
):
cls
.
models
.
append
(
ModelLaunchSettings
(
model_path
,
tp_size
=
1
))
for
model_path
in
parse_models
(
"Qwen/Qwen2-57B-A14B-Instruct"
):
cls
.
models
.
append
(
ModelLaunchSettings
(
model_path
,
tp_size
=
2
))
# (parse_models(DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP1), False, False),
# (parse_models(DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP1), False, False),
# (parse_models(DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP2), False, True),
# (parse_models(DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP2), False, True),
# (parse_models(DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP1), True, False),
# (parse_models(DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP1), True, False),
# (parse_models(DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP2), True, True),
# (parse_models(DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP2), True, True),
]
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
batch_sizes
=
[
1
,
1
,
8
,
16
,
64
]
cls
.
batch_sizes
=
[
1
,
1
,
8
,
16
,
64
]
cls
.
input_lens
=
tuple
(
_parse_int_list_env
(
"NIGHTLY_INPUT_LENS"
,
"4096"
))
cls
.
input_lens
=
tuple
(
_parse_int_list_env
(
"NIGHTLY_INPUT_LENS"
,
"4096"
))
...
@@ -39,34 +42,29 @@ class TestNightlyTextModelsPerformance(unittest.TestCase):
...
@@ -39,34 +42,29 @@ class TestNightlyTextModelsPerformance(unittest.TestCase):
def
test_bench_one_batch
(
self
):
def
test_bench_one_batch
(
self
):
all_benchmark_results
=
[]
all_benchmark_results
=
[]
for
model_group
,
is_fp8
,
is_tp2
in
self
.
model_groups
:
for
model_setup
in
self
.
models
:
for
model
in
model_group
:
benchmark_results
=
[]
benchmark_results
=
[]
with
self
.
subTest
(
model
=
model
):
with
self
.
subTest
(
model
=
model
_setup
.
model_path
):
process
=
popen_launch_server
(
process
=
popen_launch_server
(
model
=
model
,
model
=
model
_setup
.
model_path
,
base_url
=
self
.
base_url
,
base_url
=
self
.
base_url
,
other_args
=
[
"--tp"
,
"2"
]
if
is_tp2
else
[]
,
other_args
=
model_setup
.
extra_args
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
)
)
try
:
try
:
profile_filename
=
(
profile_filename
=
(
f
"
{
model
.
replace
(
'/'
,
'_'
)
}
_
{
int
(
time
.
time
())
}
"
f
"
{
model_setup
.
model_path
.
replace
(
'/'
,
'_'
)
}
_
{
int
(
time
.
time
())
}
"
)
profile_path_prefix
=
os
.
path
.
join
(
PROFILE_DIR
,
profile_filename
)
json_output_file
=
(
f
"results_
{
model
.
replace
(
'/'
,
'_'
)
}
_
{
int
(
time
.
time
())
}
.json"
)
)
profile_path_prefix
=
os
.
path
.
join
(
PROFILE_DIR
,
profile_filename
)
json_output_file
=
f
"results_
{
model_setup
.
model_path
.
replace
(
'/'
,
'_'
)
}
_
{
int
(
time
.
time
())
}
.json"
command
=
[
command
=
[
"python3"
,
"python3"
,
"-m"
,
"-m"
,
"sglang.bench_one_batch_server"
,
"sglang.bench_one_batch_server"
,
"--model"
,
"--model"
,
model
,
model
_setup
.
model_path
,
"--base-url"
,
"--base-url"
,
self
.
base_url
,
self
.
base_url
,
"--batch-size"
,
"--batch-size"
,
...
@@ -89,7 +87,7 @@ class TestNightlyTextModelsPerformance(unittest.TestCase):
...
@@ -89,7 +87,7 @@ class TestNightlyTextModelsPerformance(unittest.TestCase):
if
result
.
returncode
!=
0
:
if
result
.
returncode
!=
0
:
print
(
print
(
f
"Error running benchmark for
{
model
}
with batch size:"
f
"Error running benchmark for
{
model
_setup
.
model_path
}
with batch size:"
)
)
print
(
result
.
stderr
)
print
(
result
.
stderr
)
# Continue to next batch size even if one fails
# Continue to next batch size even if one fails
...
@@ -115,9 +113,7 @@ class TestNightlyTextModelsPerformance(unittest.TestCase):
...
@@ -115,9 +113,7 @@ class TestNightlyTextModelsPerformance(unittest.TestCase):
# Clean up JSON file
# Clean up JSON file
os
.
remove
(
json_output_file
)
os
.
remove
(
json_output_file
)
else
:
else
:
print
(
print
(
f
"Warning: JSON output file
{
json_output_file
}
not found"
)
f
"Warning: JSON output file
{
json_output_file
}
not found"
)
finally
:
finally
:
kill_process_tree
(
process
.
pid
)
kill_process_tree
(
process
.
pid
)
...
...
test/srt/test_nightly_vlms_mmmu_eval.py
View file @
64d1505c
import
json
import
json
import
unittest
import
unittest
import
warnings
import
warnings
from
functools
import
partial
from
types
import
SimpleNamespace
from
types
import
SimpleNamespace
from
sglang.srt.utils
import
kill_process_tree
from
sglang.srt.utils
import
kill_process_tree
...
@@ -8,8 +9,8 @@ from sglang.test.run_eval import run_eval
...
@@ -8,8 +9,8 @@ from sglang.test.run_eval import run_eval
from
sglang.test.test_utils
import
(
from
sglang.test.test_utils
import
(
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
DEFAULT_URL_FOR_TEST
,
ModelDeploySetup
,
ModelEvalMetrics
,
ModelEvalMetrics
,
ModelLaunchSettings
,
check_evaluation_test_results
,
check_evaluation_test_results
,
popen_launch_server
,
popen_launch_server
,
write_results_to_json
,
write_results_to_json
,
...
@@ -17,25 +18,29 @@ from sglang.test.test_utils import (
...
@@ -17,25 +18,29 @@ from sglang.test.test_utils import (
MODEL_THRESHOLDS
=
{
MODEL_THRESHOLDS
=
{
# Conservative thresholds on 100 MMMU samples, especially for latency thresholds
# Conservative thresholds on 100 MMMU samples, especially for latency thresholds
ModelDeploySetup
(
"deepseek-ai/deepseek-vl2-small"
):
ModelEvalMetrics
(
0.330
,
56.1
),
ModelLaunchSettings
(
"deepseek-ai/deepseek-vl2-small"
):
ModelEvalMetrics
(
ModelDeploySetup
(
"deepseek-ai/Janus-Pro-7B"
):
ModelEvalMetrics
(
0.285
,
39.9
),
0.330
,
56.1
ModelDeploySetup
(
"Efficient-Large-Model/NVILA-Lite-2B-hf-0626"
):
ModelEvalMetrics
(
0.305
,
23.8
),
),
ModelDeploySetup
(
"google/gemma-3-4b-it"
):
ModelEvalMetrics
(
0.360
,
10.9
),
ModelLaunchSettings
(
"deepseek-ai/Janus-Pro-7B"
):
ModelEvalMetrics
(
0.285
,
40.3
),
ModelDeploySetup
(
"google/gemma-3n-E4B-it"
):
ModelEvalMetrics
(
0.360
,
15.3
),
ModelLaunchSettings
(
ModelDeploySetup
(
"mistral-community/pixtral-12b"
):
ModelEvalMetrics
(
0.360
,
16.6
),
"Efficient-Large-Model/NVILA-Lite-2B-hf-0626"
ModelDeploySetup
(
"moonshotai/Kimi-VL-A3B-Instruct"
):
ModelEvalMetrics
(
0.330
,
22.3
),
):
ModelEvalMetrics
(
0.305
,
23.8
),
ModelDeploySetup
(
"openbmb/MiniCPM-o-2_6"
):
ModelEvalMetrics
(
0.330
,
29.3
),
ModelLaunchSettings
(
"google/gemma-3-4b-it"
):
ModelEvalMetrics
(
0.360
,
10.9
),
ModelDeploySetup
(
"openbmb/MiniCPM-v-2_6"
):
ModelEvalMetrics
(
0.270
,
24.5
),
ModelLaunchSettings
(
"google/gemma-3n-E4B-it"
):
ModelEvalMetrics
(
0.360
,
15.3
),
ModelDeploySetup
(
"OpenGVLab/InternVL2_5-2B"
):
ModelEvalMetrics
(
0.300
,
14.0
),
ModelLaunchSettings
(
"mistral-community/pixtral-12b"
):
ModelEvalMetrics
(
0.360
,
16.6
),
ModelDeploySetup
(
"Qwen/Qwen2-VL-7B-Instruct"
):
ModelEvalMetrics
(
0.310
,
83.3
),
ModelLaunchSettings
(
"moonshotai/Kimi-VL-A3B-Instruct"
):
ModelEvalMetrics
(
ModelDeploySetup
(
"Qwen/Qwen2.5-VL-7B-Instruct"
):
ModelEvalMetrics
(
0.340
,
31.9
),
0.330
,
22.3
ModelDeploySetup
(
"unsloth/Mistral-Small-3.1-24B-Instruct-2503"
):
ModelEvalMetrics
(
0.310
,
16.7
),
),
ModelDeploySetup
(
"XiaomiMiMo/MiMo-VL-7B-RL"
):
ModelEvalMetrics
(
0.28
,
32.0
),
ModelLaunchSettings
(
"openbmb/MiniCPM-o-2_6"
):
ModelEvalMetrics
(
0.330
,
29.3
),
ModelDeploySetup
(
"zai-org/GLM-4.1V-9B-Thinking"
):
ModelEvalMetrics
(
0.280
,
30.4
),
ModelLaunchSettings
(
"openbmb/MiniCPM-v-2_6"
):
ModelEvalMetrics
(
0.270
,
24.5
),
ModelLaunchSettings
(
"OpenGVLab/InternVL2_5-2B"
):
ModelEvalMetrics
(
0.300
,
14.0
),
ModelLaunchSettings
(
"Qwen/Qwen2-VL-7B-Instruct"
):
ModelEvalMetrics
(
0.310
,
83.3
),
ModelLaunchSettings
(
"Qwen/Qwen2.5-VL-7B-Instruct"
):
ModelEvalMetrics
(
0.340
,
31.9
),
ModelLaunchSettings
(
"unsloth/Mistral-Small-3.1-24B-Instruct-2503"
):
ModelEvalMetrics
(
0.310
,
16.7
),
ModelLaunchSettings
(
"XiaomiMiMo/MiMo-VL-7B-RL"
):
ModelEvalMetrics
(
0.28
,
32.0
),
ModelLaunchSettings
(
"zai-org/GLM-4.1V-9B-Thinking"
):
ModelEvalMetrics
(
0.280
,
30.4
),
}
}
...
...
test/srt/test_nightly_vlms_perf.py
View file @
64d1505c
...
@@ -8,6 +8,7 @@ from sglang.srt.utils import kill_process_tree
...
@@ -8,6 +8,7 @@ from sglang.srt.utils import kill_process_tree
from
sglang.test.test_utils
import
(
from
sglang.test.test_utils
import
(
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
DEFAULT_URL_FOR_TEST
,
ModelLaunchSettings
,
_parse_int_list_env
,
_parse_int_list_env
,
is_in_ci
,
is_in_ci
,
parse_models
,
parse_models
,
...
@@ -19,8 +20,13 @@ PROFILE_DIR = "performance_profiles_vlms"
...
@@ -19,8 +20,13 @@ PROFILE_DIR = "performance_profiles_vlms"
MODEL_DEFAULTS
=
[
MODEL_DEFAULTS
=
[
# Keep conservative defaults. Can be overridden by env NIGHTLY_VLM_MODELS
# Keep conservative defaults. Can be overridden by env NIGHTLY_VLM_MODELS
ModelLaunchSettings
(
"Qwen/Qwen2.5-VL-7B-Instruct"
,
"Qwen/Qwen2.5-VL-7B-Instruct"
,
extra_args
=
[
"--mem-fraction-static=0.7"
],
),
ModelLaunchSettings
(
"google/gemma-3-27b-it"
,
"google/gemma-3-27b-it"
,
),
# "OpenGVLab/InternVL2_5-2B",
# "OpenGVLab/InternVL2_5-2B",
# buggy in official transformers impl
# buggy in official transformers impl
# "openbmb/MiniCPM-V-2_6",
# "openbmb/MiniCPM-V-2_6",
...
@@ -33,9 +39,18 @@ class TestNightlyVLMModelsPerformance(unittest.TestCase):
...
@@ -33,9 +39,18 @@ class TestNightlyVLMModelsPerformance(unittest.TestCase):
warnings
.
filterwarnings
(
warnings
.
filterwarnings
(
"ignore"
,
category
=
ResourceWarning
,
message
=
"unclosed.*socket"
"ignore"
,
category
=
ResourceWarning
,
message
=
"unclosed.*socket"
)
)
cls
.
models
=
parse_models
(
os
.
environ
.
get
(
"NIGHTLY_VLM_MODELS"
,
","
.
join
(
MODEL_DEFAULTS
))
nightly_vlm_models_str
=
os
.
environ
.
get
(
"NIGHTLY_VLM_MODELS"
)
if
nightly_vlm_models_str
:
cls
.
models
=
[]
model_paths
=
parse_models
(
nightly_vlm_models_str
)
for
model_path
in
model_paths
:
cls
.
models
.
append
(
ModelLaunchSettings
(
model_path
,
extra_args
=
VLM_EXTRA_ARGS
)
)
)
else
:
cls
.
models
=
MODEL_DEFAULTS
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
batch_sizes
=
_parse_int_list_env
(
"NIGHTLY_VLM_BATCH_SIZES"
,
"1,1,2,8,16"
)
cls
.
batch_sizes
=
_parse_int_list_env
(
"NIGHTLY_VLM_BATCH_SIZES"
,
"1,1,2,8,16"
)
...
@@ -46,29 +61,31 @@ class TestNightlyVLMModelsPerformance(unittest.TestCase):
...
@@ -46,29 +61,31 @@ class TestNightlyVLMModelsPerformance(unittest.TestCase):
def
test_bench_one_batch
(
self
):
def
test_bench_one_batch
(
self
):
all_benchmark_results
=
[]
all_benchmark_results
=
[]
for
model
in
self
.
models
:
for
model
_setup
in
self
.
models
:
benchmark_results
=
[]
benchmark_results
=
[]
with
self
.
subTest
(
model
=
model
):
with
self
.
subTest
(
model
=
model
_setup
.
model_path
):
process
=
popen_launch_server
(
process
=
popen_launch_server
(
model
=
model
,
model
=
model
_setup
.
model_path
,
base_url
=
self
.
base_url
,
base_url
=
self
.
base_url
,
other_args
=
[
"--mem-fraction-static=0.7"
]
,
other_args
=
model_setup
.
extra_args
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
)
)
try
:
try
:
# Run bench_one_batch_server against the launched server
# Run bench_one_batch_server against the launched server
profile_filename
=
f
"
{
model
.
replace
(
'/'
,
'_'
)
}
"
profile_filename
=
f
"
{
model
_setup
.
model_path
.
replace
(
'/'
,
'_'
)
}
"
# path for this run
# path for this run
profile_path_prefix
=
os
.
path
.
join
(
PROFILE_DIR
,
profile_filename
)
profile_path_prefix
=
os
.
path
.
join
(
PROFILE_DIR
,
profile_filename
)
# JSON output file for this model
# JSON output file for this model
json_output_file
=
f
"results_
{
model
.
replace
(
'/'
,
'_'
)
}
.json"
json_output_file
=
(
f
"results_
{
model_setup
.
model_path
.
replace
(
'/'
,
'_'
)
}
.json"
)
command
=
[
command
=
[
"python3"
,
"python3"
,
"-m"
,
"-m"
,
"sglang.bench_one_batch_server"
,
"sglang.bench_one_batch_server"
,
f
"--model=
{
model
}
"
,
f
"--model=
{
model
_setup
.
model_path
}
"
,
"--base-url"
,
"--base-url"
,
self
.
base_url
,
self
.
base_url
,
"--batch-size"
,
"--batch-size"
,
...
@@ -91,12 +108,14 @@ class TestNightlyVLMModelsPerformance(unittest.TestCase):
...
@@ -91,12 +108,14 @@ class TestNightlyVLMModelsPerformance(unittest.TestCase):
result
=
subprocess
.
run
(
command
,
capture_output
=
True
,
text
=
True
)
result
=
subprocess
.
run
(
command
,
capture_output
=
True
,
text
=
True
)
if
result
.
returncode
!=
0
:
if
result
.
returncode
!=
0
:
print
(
f
"Error running benchmark for
{
model
}
with batch size:"
)
print
(
f
"Error running benchmark for
{
model_setup
.
model_path
}
with batch size:"
)
print
(
result
.
stderr
)
print
(
result
.
stderr
)
# Continue to next batch size even if one fails
# Continue to next batch size even if one fails
continue
continue
print
(
f
"Output for
{
model
}
with batch size:"
)
print
(
f
"Output for
{
model
_setup
.
model_path
}
with batch size:"
)
print
(
result
.
stdout
)
print
(
result
.
stdout
)
# Load and deserialize JSON results
# Load and deserialize JSON results
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment