Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
158e8f1e
Unverified
Commit
158e8f1e
authored
Aug 25, 2024
by
Mingyi
Committed by
GitHub
Aug 25, 2024
Browse files
improve the threshold and ports in tests (#1215)
parent
d3efcb39
Changes
18
Hide whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
122 additions
and
86 deletions
+122
-86
python/sglang/test/test_utils.py
python/sglang/test/test_utils.py
+4
-8
test/srt/sampling/penaltylib/test_srt_endpoint_with_penalizers.py
.../sampling/penaltylib/test_srt_endpoint_with_penalizers.py
+4
-3
test/srt/test_chunked_prefill.py
test/srt/test_chunked_prefill.py
+4
-3
test/srt/test_embedding_openai_server.py
test/srt/test_embedding_openai_server.py
+10
-3
test/srt/test_eval_accuracy_large.py
test/srt/test_eval_accuracy_large.py
+4
-4
test/srt/test_eval_accuracy_large_chunked_prefill.py
test/srt/test_eval_accuracy_large_chunked_prefill.py
+4
-3
test/srt/test_eval_accuracy_large_mixed_chunked_prefill.py
test/srt/test_eval_accuracy_large_mixed_chunked_prefill.py
+4
-3
test/srt/test_eval_accuracy_mini.py
test/srt/test_eval_accuracy_mini.py
+6
-3
test/srt/test_large_max_new_tokens.py
test/srt/test_large_max_new_tokens.py
+4
-3
test/srt/test_moe_serving_throughput.py
test/srt/test_moe_serving_throughput.py
+11
-18
test/srt/test_openai_server.py
test/srt/test_openai_server.py
+7
-3
test/srt/test_serving_throughput.py
test/srt/test_serving_throughput.py
+13
-9
test/srt/test_skip_tokenizer_init.py
test/srt/test_skip_tokenizer_init.py
+7
-3
test/srt/test_srt_endpoint.py
test/srt/test_srt_endpoint.py
+6
-3
test/srt/test_torch_compile.py
test/srt/test_torch_compile.py
+7
-3
test/srt/test_triton_attn_backend.py
test/srt/test_triton_attn_backend.py
+7
-3
test/srt/test_update_weights.py
test/srt/test_update_weights.py
+6
-3
test/srt/test_vision_openai_server.py
test/srt/test_vision_openai_server.py
+14
-8
No files found.
python/sglang/test/test_utils.py
View file @
158e8f1e
...
...
@@ -23,18 +23,14 @@ from sglang.utils import get_exception_traceback
DEFAULT_MODEL_NAME_FOR_TEST
=
"meta-llama/Meta-Llama-3.1-8B-Instruct"
DEFAULT_MOE_MODEL_NAME_FOR_TEST
=
"mistralai/Mixtral-8x7B-Instruct-v0.1"
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
=
600
if
os
.
getenv
(
"SGLANG_IS_IN_CI"
,
"false"
)
==
"true"
:
DEFAULT_PORT_FOR_SRT_TEST_RUNNER
=
5157
DEFAULT_URL_FOR_MOE_TEST
=
"http://127.0.0.1:6157"
DEFAULT_URL_FOR_ACCURACY_TEST
=
"http://127.0.0.1:7157"
DEFAULT_URL_FOR_UNIT_TEST
=
"http://127.0.0.1:8157"
DEFAULT_URL_FOR_E2E_TEST
=
"http://127.0.0.1:9157"
DEFAULT_URL_FOR_TEST
=
"http://127.0.0.1:6157"
else
:
DEFAULT_URL_FOR_MOE_TEST
=
"http://127.0.0.1:1157"
DEFAULT_URL_FOR_ACCURACY_TEST
=
"http://127.0.0.1:1257"
DEFAULT_URL_FOR_UNIT_TEST
=
"http://127.0.0.1:1357"
DEFAULT_URL_FOR_E2E_TEST
=
"http://127.0.0.1:1457"
DEFAULT_PORT_FOR_SRT_TEST_RUNNER
=
1157
DEFAULT_URL_FOR_TEST
=
"http://127.0.0.1:2157"
def
call_generate_lightllm
(
prompt
,
temperature
,
max_tokens
,
stop
=
None
,
url
=
None
):
...
...
test/srt/sampling/penaltylib/test_srt_endpoint_with_penalizers.py
View file @
158e8f1e
...
...
@@ -7,7 +7,8 @@ import requests
from
sglang.srt.utils
import
kill_child_process
from
sglang.test.test_utils
import
(
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_URL_FOR_UNIT_TEST
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
popen_launch_server
,
)
...
...
@@ -17,11 +18,11 @@ class TestBatchPenalizerE2E(unittest.TestCase):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
cls
.
base_url
=
DEFAULT_URL_FOR_
UNIT_
TEST
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
300
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
(
"--random-seed"
,
"0"
,
...
...
test/srt/test_chunked_prefill.py
View file @
158e8f1e
...
...
@@ -5,7 +5,8 @@ from sglang.srt.utils import kill_child_process
from
sglang.test.run_eval
import
run_eval
from
sglang.test.test_utils
import
(
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_URL_FOR_UNIT_TEST
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
popen_launch_server
,
)
...
...
@@ -20,11 +21,11 @@ class TestChunkedPrefill(unittest.TestCase):
other_args
+=
[
"--enable-mixed-chunk"
]
model
=
DEFAULT_MODEL_NAME_FOR_TEST
base_url
=
DEFAULT_URL_FOR_
UNIT_
TEST
base_url
=
DEFAULT_URL_FOR_TEST
process
=
popen_launch_server
(
model
,
base_url
,
timeout
=
300
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
other_args
,
)
...
...
test/srt/test_embedding_openai_server.py
View file @
158e8f1e
...
...
@@ -4,17 +4,24 @@ import openai
from
sglang.srt.hf_transformers_utils
import
get_tokenizer
from
sglang.srt.utils
import
kill_child_process
from
sglang.test.test_utils
import
DEFAULT_URL_FOR_UNIT_TEST
,
popen_launch_server
from
sglang.test.test_utils
import
(
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
popen_launch_server
,
)
class
TestOpenAIServer
(
unittest
.
TestCase
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"intfloat/e5-mistral-7b-instruct"
cls
.
base_url
=
DEFAULT_URL_FOR_
UNIT_
TEST
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
300
,
api_key
=
cls
.
api_key
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
api_key
=
cls
.
api_key
,
)
cls
.
base_url
+=
"/v1"
cls
.
tokenizer
=
get_tokenizer
(
cls
.
model
)
...
...
test/srt/test_eval_accuracy_large.py
View file @
158e8f1e
...
...
@@ -5,8 +5,8 @@ from sglang.srt.utils import kill_child_process
from
sglang.test.run_eval
import
run_eval
from
sglang.test.test_utils
import
(
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_
URL_FOR_ACCURACY_TEST
,
DEFAULT_URL_FOR_
UNIT_
TEST
,
DEFAULT_
TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
popen_launch_server
,
)
...
...
@@ -15,11 +15,11 @@ class TestEvalAccuracyLarge(unittest.TestCase):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
cls
.
base_url
=
DEFAULT_URL_FOR_
ACCURACY_
TEST
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
300
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--log-level-http"
,
"warning"
],
)
...
...
test/srt/test_eval_accuracy_large_chunked_prefill.py
View file @
158e8f1e
...
...
@@ -5,7 +5,8 @@ from sglang.srt.utils import kill_child_process
from
sglang.test.run_eval
import
run_eval
from
sglang.test.test_utils
import
(
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_URL_FOR_ACCURACY_TEST
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
popen_launch_server
,
)
...
...
@@ -14,11 +15,11 @@ class TestEvalAccuracyLargeChunkedPrefill(unittest.TestCase):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
cls
.
base_url
=
DEFAULT_URL_FOR_
ACCURACY_
TEST
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
300
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--log-level-http"
,
"warning"
,
"--chunked-prefill-size"
,
"256"
],
)
...
...
test/srt/test_eval_accuracy_large_mixed_chunked_prefill.py
View file @
158e8f1e
...
...
@@ -5,7 +5,8 @@ from sglang.srt.utils import kill_child_process
from
sglang.test.run_eval
import
run_eval
from
sglang.test.test_utils
import
(
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_URL_FOR_ACCURACY_TEST
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
popen_launch_server
,
)
...
...
@@ -14,11 +15,11 @@ class TestEvalAccuracyLargeChunkedPrefill(unittest.TestCase):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
cls
.
base_url
=
DEFAULT_URL_FOR_
ACCURACY_
TEST
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
300
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--log-level-http"
,
"warning"
,
...
...
test/srt/test_eval_accuracy_mini.py
View file @
158e8f1e
...
...
@@ -5,7 +5,8 @@ from sglang.srt.utils import kill_child_process
from
sglang.test.run_eval
import
run_eval
from
sglang.test.test_utils
import
(
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_URL_FOR_UNIT_TEST
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
popen_launch_server
,
)
...
...
@@ -14,8 +15,10 @@ class TestEvalAccuracyMini(unittest.TestCase):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
cls
.
base_url
=
DEFAULT_URL_FOR_UNIT_TEST
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
300
)
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
)
@
classmethod
def
tearDownClass
(
cls
):
...
...
test/srt/test_large_max_new_tokens.py
View file @
158e8f1e
...
...
@@ -10,7 +10,8 @@ from sglang.srt.hf_transformers_utils import get_tokenizer
from
sglang.srt.utils
import
kill_child_process
from
sglang.test.test_utils
import
(
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_URL_FOR_UNIT_TEST
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
popen_launch_server
,
)
...
...
@@ -19,12 +20,12 @@ class TestOpenAIServer(unittest.TestCase):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
cls
.
base_url
=
DEFAULT_URL_FOR_
UNIT_
TEST
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
300
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
api_key
=
cls
.
api_key
,
other_args
=
(
"--max-total-token"
,
"1024"
),
env
=
{
"SGLANG_CLIP_MAX_NEW_TOKENS"
:
"256"
,
**
os
.
environ
},
...
...
test/srt/test_moe_serving_throughput.py
View file @
158e8f1e
...
...
@@ -7,7 +7,8 @@ from sglang.srt.server_args import ServerArgs
from
sglang.srt.utils
import
kill_child_process
from
sglang.test.test_utils
import
(
DEFAULT_MOE_MODEL_NAME_FOR_TEST
,
DEFAULT_URL_FOR_MOE_TEST
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
popen_launch_server
,
)
...
...
@@ -25,9 +26,12 @@ class TestServingThroughput(unittest.TestCase):
other_args
.
append
(
"--enable-p2p-check"
)
model
=
DEFAULT_MOE_MODEL_NAME_FOR_TEST
base_url
=
DEFAULT_URL_FOR_
MOE_
TEST
base_url
=
DEFAULT_URL_FOR_TEST
process
=
popen_launch_server
(
model
,
base_url
,
timeout
=
300
,
other_args
=
other_args
model
,
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
other_args
,
)
# Run benchmark
...
...
@@ -72,8 +76,8 @@ class TestServingThroughput(unittest.TestCase):
)
if
os
.
getenv
(
"SGLANG_IS_IN_CI"
,
"false"
)
==
"true"
:
# A100 (PCIE)
performance
assert
res
[
"output_throughput"
]
>
9
10
# A100 (PCIE)
: 950, H100 (SMX): 1800
assert
res
[
"output_throughput"
]
>
1
75
0
def
test_default_without_radix_cache
(
self
):
res
=
self
.
run_test
(
...
...
@@ -83,19 +87,8 @@ class TestServingThroughput(unittest.TestCase):
)
if
os
.
getenv
(
"SGLANG_IS_IN_CI"
,
"false"
)
==
"true"
:
# A100 (PCIE) performance
assert
res
[
"output_throughput"
]
>
910
def
test_default_without_chunked_prefill
(
self
):
res
=
self
.
run_test
(
disable_radix_cache
=
ServerArgs
.
disable_radix_cache
,
disable_flashinfer
=
ServerArgs
.
disable_flashinfer
,
chunked_prefill_size
=-
1
,
)
if
os
.
getenv
(
"SGLANG_IS_IN_CI"
,
"false"
)
==
"true"
:
# A100 (PCIE) performance
print
(
res
[
"output_throughput"
])
# A100 (PCIE): 950, H100 (SMX): 1900
assert
res
[
"output_throughput"
]
>
1850
def
test_all_cases
(
self
):
for
disable_radix_cache
in
[
False
,
True
]:
...
...
test/srt/test_openai_server.py
View file @
158e8f1e
...
...
@@ -8,7 +8,8 @@ from sglang.srt.hf_transformers_utils import get_tokenizer
from
sglang.srt.utils
import
kill_child_process
from
sglang.test.test_utils
import
(
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_URL_FOR_UNIT_TEST
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
popen_launch_server
,
)
...
...
@@ -17,10 +18,13 @@ class TestOpenAIServer(unittest.TestCase):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
cls
.
base_url
=
DEFAULT_URL_FOR_
UNIT_
TEST
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
300
,
api_key
=
cls
.
api_key
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
api_key
=
cls
.
api_key
,
)
cls
.
base_url
+=
"/v1"
cls
.
tokenizer
=
get_tokenizer
(
DEFAULT_MODEL_NAME_FOR_TEST
)
...
...
test/srt/test_serving_throughput.py
View file @
158e8f1e
...
...
@@ -7,7 +7,8 @@ from sglang.srt.server_args import ServerArgs
from
sglang.srt.utils
import
kill_child_process
from
sglang.test.test_utils
import
(
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_URL_FOR_E2E_TEST
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
popen_launch_server
,
)
...
...
@@ -23,9 +24,12 @@ class TestServingThroughput(unittest.TestCase):
other_args
.
extend
([
"--chunked-prefill-size"
,
str
(
chunked_prefill_size
)])
model
=
DEFAULT_MODEL_NAME_FOR_TEST
base_url
=
DEFAULT_URL_FOR_
E2E_
TEST
base_url
=
DEFAULT_URL_FOR_TEST
process
=
popen_launch_server
(
model
,
base_url
,
timeout
=
300
,
other_args
=
other_args
model
,
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
other_args
,
)
# Run benchmark
...
...
@@ -70,8 +74,8 @@ class TestServingThroughput(unittest.TestCase):
)
if
os
.
getenv
(
"SGLANG_IS_IN_CI"
,
"false"
)
==
"true"
:
# A100 (PCIE)
performance
assert
res
[
"output_throughput"
]
>
14
00
# A100 (PCIE)
: 1450, H100 (SMX): 2550
assert
res
[
"output_throughput"
]
>
25
00
def
test_default_without_radix_cache
(
self
):
res
=
self
.
run_test
(
...
...
@@ -81,8 +85,8 @@ class TestServingThroughput(unittest.TestCase):
)
if
os
.
getenv
(
"SGLANG_IS_IN_CI"
,
"false"
)
==
"true"
:
# A100 (PCIE)
performance
assert
res
[
"output_throughput"
]
>
145
0
# A100 (PCIE)
: 1500, H100 (SMX): 2850
assert
res
[
"output_throughput"
]
>
280
0
def
test_default_without_chunked_prefill
(
self
):
res
=
self
.
run_test
(
...
...
@@ -92,8 +96,8 @@ class TestServingThroughput(unittest.TestCase):
)
if
os
.
getenv
(
"SGLANG_IS_IN_CI"
,
"false"
)
==
"true"
:
# A100 (PCIE)
performance
assert
res
[
"output_throughput"
]
>
14
00
# A100 (PCIE)
: 1450, H100 (SMX): 2550
assert
res
[
"output_throughput"
]
>
25
00
def
test_all_cases
(
self
):
for
disable_radix_cache
in
[
False
,
True
]:
...
...
test/srt/test_skip_tokenizer_init.py
View file @
158e8f1e
...
...
@@ -6,7 +6,8 @@ import requests
from
sglang.srt.utils
import
kill_child_process
from
sglang.test.test_utils
import
(
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_URL_FOR_UNIT_TEST
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
popen_launch_server
,
)
...
...
@@ -15,9 +16,12 @@ class TestSkipTokenizerInit(unittest.TestCase):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
cls
.
base_url
=
DEFAULT_URL_FOR_
UNIT_
TEST
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
300
,
other_args
=
[
"--skip-tokenizer-init"
]
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--skip-tokenizer-init"
],
)
@
classmethod
...
...
test/srt/test_srt_endpoint.py
View file @
158e8f1e
...
...
@@ -6,7 +6,8 @@ import requests
from
sglang.srt.utils
import
kill_child_process
from
sglang.test.test_utils
import
(
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_URL_FOR_UNIT_TEST
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
popen_launch_server
,
)
...
...
@@ -15,8 +16,10 @@ class TestSRTEndpoint(unittest.TestCase):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
cls
.
base_url
=
DEFAULT_URL_FOR_UNIT_TEST
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
300
)
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
)
@
classmethod
def
tearDownClass
(
cls
):
...
...
test/srt/test_torch_compile.py
View file @
158e8f1e
...
...
@@ -5,7 +5,8 @@ from sglang.srt.utils import kill_child_process
from
sglang.test.run_eval
import
run_eval
from
sglang.test.test_utils
import
(
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_URL_FOR_UNIT_TEST
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
popen_launch_server
,
)
...
...
@@ -14,9 +15,12 @@ class TestTorchCompile(unittest.TestCase):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
cls
.
base_url
=
DEFAULT_URL_FOR_
UNIT_
TEST
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
300
,
other_args
=
[
"--enable-torch-compile"
]
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--enable-torch-compile"
],
)
@
classmethod
...
...
test/srt/test_triton_attn_backend.py
View file @
158e8f1e
...
...
@@ -5,7 +5,8 @@ from sglang.srt.utils import kill_child_process
from
sglang.test.run_eval
import
run_eval
from
sglang.test.test_utils
import
(
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_URL_FOR_UNIT_TEST
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
popen_launch_server
,
)
...
...
@@ -14,9 +15,12 @@ class TestTritonAttnBackend(unittest.TestCase):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
cls
.
base_url
=
DEFAULT_URL_FOR_
UNIT_
TEST
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
300
,
other_args
=
[
"--disable-flashinfer"
]
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--disable-flashinfer"
],
)
@
classmethod
...
...
test/srt/test_update_weights.py
View file @
158e8f1e
...
...
@@ -6,7 +6,8 @@ import requests
from
sglang.srt.utils
import
kill_child_process
from
sglang.test.test_utils
import
(
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_URL_FOR_UNIT_TEST
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
popen_launch_server
,
)
...
...
@@ -15,8 +16,10 @@ class TestReplaceWeights(unittest.TestCase):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
cls
.
base_url
=
DEFAULT_URL_FOR_UNIT_TEST
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
300
)
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
)
@
classmethod
def
tearDownClass
(
cls
):
...
...
test/srt/test_vision_openai_server.py
View file @
158e8f1e
...
...
@@ -11,19 +11,23 @@ from decord import VideoReader, cpu
from
PIL
import
Image
from
sglang.srt.utils
import
kill_child_process
from
sglang.test.test_utils
import
DEFAULT_URL_FOR_UNIT_TEST
,
popen_launch_server
from
sglang.test.test_utils
import
(
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
popen_launch_server
,
)
class
TestOpenAIVisionServer
(
unittest
.
TestCase
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"lmms-lab/llava-onevision-qwen2-0.5b-ov"
cls
.
base_url
=
DEFAULT_URL_FOR_
UNIT_
TEST
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
300
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
api_key
=
cls
.
api_key
,
other_args
=
[
"--chat-template"
,
...
...
@@ -67,7 +71,7 @@ class TestOpenAIVisionServer(unittest.TestCase):
assert
response
.
choices
[
0
].
message
.
role
==
"assistant"
text
=
response
.
choices
[
0
].
message
.
content
assert
isinstance
(
text
,
str
)
assert
"
logo
"
in
text
,
text
assert
"
man"
in
text
or
"cab
"
in
text
,
text
assert
response
.
id
assert
response
.
created
assert
response
.
usage
.
prompt_tokens
>
0
...
...
@@ -86,18 +90,19 @@ class TestOpenAIVisionServer(unittest.TestCase):
{
"type"
:
"image_url"
,
"image_url"
:
{
"url"
:
"https://raw.githubusercontent.com/sgl-project/sglang/main/
assets/logo
.png"
"url"
:
"https://raw.githubusercontent.com/sgl-project/sglang/main/
test/lang/example_image
.png"
},
},
{
"type"
:
"image_url"
,
"image_url"
:
{
"url"
:
"https://raw.githubusercontent.com/sgl-project/sglang/main/
test/lang/example_image
.png"
"url"
:
"https://raw.githubusercontent.com/sgl-project/sglang/main/
assets/logo
.png"
},
},
{
"type"
:
"text"
,
"text"
:
"I have shown you two images. Please describe the two images to me."
,
"text"
:
"I have two very different images. They are not related at all. "
"Please describe the first image in one sentence, and then describe the second image in another sentence."
,
},
],
},
...
...
@@ -108,8 +113,9 @@ class TestOpenAIVisionServer(unittest.TestCase):
assert
response
.
choices
[
0
].
message
.
role
==
"assistant"
text
=
response
.
choices
[
0
].
message
.
content
assert
isinstance
(
text
,
str
)
print
(
text
)
assert
"man"
in
text
or
"cab"
in
text
,
text
assert
"logo"
in
text
,
text
#
assert "logo" in text, text
assert
response
.
id
assert
response
.
created
assert
response
.
usage
.
prompt_tokens
>
0
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment