Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
158e8f1e
Unverified
Commit
158e8f1e
authored
Aug 25, 2024
by
Mingyi
Committed by
GitHub
Aug 25, 2024
Browse files
improve the threshold and ports in tests (#1215)
parent
d3efcb39
Changes
18
Show whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
122 additions
and
86 deletions
+122
-86
python/sglang/test/test_utils.py
python/sglang/test/test_utils.py
+4
-8
test/srt/sampling/penaltylib/test_srt_endpoint_with_penalizers.py
.../sampling/penaltylib/test_srt_endpoint_with_penalizers.py
+4
-3
test/srt/test_chunked_prefill.py
test/srt/test_chunked_prefill.py
+4
-3
test/srt/test_embedding_openai_server.py
test/srt/test_embedding_openai_server.py
+10
-3
test/srt/test_eval_accuracy_large.py
test/srt/test_eval_accuracy_large.py
+4
-4
test/srt/test_eval_accuracy_large_chunked_prefill.py
test/srt/test_eval_accuracy_large_chunked_prefill.py
+4
-3
test/srt/test_eval_accuracy_large_mixed_chunked_prefill.py
test/srt/test_eval_accuracy_large_mixed_chunked_prefill.py
+4
-3
test/srt/test_eval_accuracy_mini.py
test/srt/test_eval_accuracy_mini.py
+6
-3
test/srt/test_large_max_new_tokens.py
test/srt/test_large_max_new_tokens.py
+4
-3
test/srt/test_moe_serving_throughput.py
test/srt/test_moe_serving_throughput.py
+11
-18
test/srt/test_openai_server.py
test/srt/test_openai_server.py
+7
-3
test/srt/test_serving_throughput.py
test/srt/test_serving_throughput.py
+13
-9
test/srt/test_skip_tokenizer_init.py
test/srt/test_skip_tokenizer_init.py
+7
-3
test/srt/test_srt_endpoint.py
test/srt/test_srt_endpoint.py
+6
-3
test/srt/test_torch_compile.py
test/srt/test_torch_compile.py
+7
-3
test/srt/test_triton_attn_backend.py
test/srt/test_triton_attn_backend.py
+7
-3
test/srt/test_update_weights.py
test/srt/test_update_weights.py
+6
-3
test/srt/test_vision_openai_server.py
test/srt/test_vision_openai_server.py
+14
-8
No files found.
python/sglang/test/test_utils.py
View file @
158e8f1e
...
@@ -23,18 +23,14 @@ from sglang.utils import get_exception_traceback
...
@@ -23,18 +23,14 @@ from sglang.utils import get_exception_traceback
DEFAULT_MODEL_NAME_FOR_TEST
=
"meta-llama/Meta-Llama-3.1-8B-Instruct"
DEFAULT_MODEL_NAME_FOR_TEST
=
"meta-llama/Meta-Llama-3.1-8B-Instruct"
DEFAULT_MOE_MODEL_NAME_FOR_TEST
=
"mistralai/Mixtral-8x7B-Instruct-v0.1"
DEFAULT_MOE_MODEL_NAME_FOR_TEST
=
"mistralai/Mixtral-8x7B-Instruct-v0.1"
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
=
600
if
os
.
getenv
(
"SGLANG_IS_IN_CI"
,
"false"
)
==
"true"
:
if
os
.
getenv
(
"SGLANG_IS_IN_CI"
,
"false"
)
==
"true"
:
DEFAULT_PORT_FOR_SRT_TEST_RUNNER
=
5157
DEFAULT_PORT_FOR_SRT_TEST_RUNNER
=
5157
DEFAULT_URL_FOR_MOE_TEST
=
"http://127.0.0.1:6157"
DEFAULT_URL_FOR_TEST
=
"http://127.0.0.1:6157"
DEFAULT_URL_FOR_ACCURACY_TEST
=
"http://127.0.0.1:7157"
DEFAULT_URL_FOR_UNIT_TEST
=
"http://127.0.0.1:8157"
DEFAULT_URL_FOR_E2E_TEST
=
"http://127.0.0.1:9157"
else
:
else
:
DEFAULT_URL_FOR_MOE_TEST
=
"http://127.0.0.1:1157"
DEFAULT_PORT_FOR_SRT_TEST_RUNNER
=
1157
DEFAULT_URL_FOR_ACCURACY_TEST
=
"http://127.0.0.1:1257"
DEFAULT_URL_FOR_TEST
=
"http://127.0.0.1:2157"
DEFAULT_URL_FOR_UNIT_TEST
=
"http://127.0.0.1:1357"
DEFAULT_URL_FOR_E2E_TEST
=
"http://127.0.0.1:1457"
def
call_generate_lightllm
(
prompt
,
temperature
,
max_tokens
,
stop
=
None
,
url
=
None
):
def
call_generate_lightllm
(
prompt
,
temperature
,
max_tokens
,
stop
=
None
,
url
=
None
):
...
...
test/srt/sampling/penaltylib/test_srt_endpoint_with_penalizers.py
View file @
158e8f1e
...
@@ -7,7 +7,8 @@ import requests
...
@@ -7,7 +7,8 @@ import requests
from
sglang.srt.utils
import
kill_child_process
from
sglang.srt.utils
import
kill_child_process
from
sglang.test.test_utils
import
(
from
sglang.test.test_utils
import
(
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_URL_FOR_UNIT_TEST
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
popen_launch_server
,
popen_launch_server
,
)
)
...
@@ -17,11 +18,11 @@ class TestBatchPenalizerE2E(unittest.TestCase):
...
@@ -17,11 +18,11 @@ class TestBatchPenalizerE2E(unittest.TestCase):
@
classmethod
@
classmethod
def
setUpClass
(
cls
):
def
setUpClass
(
cls
):
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
cls
.
base_url
=
DEFAULT_URL_FOR_
UNIT_
TEST
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
process
=
popen_launch_server
(
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
model
,
cls
.
base_url
,
cls
.
base_url
,
timeout
=
300
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
(
other_args
=
(
"--random-seed"
,
"--random-seed"
,
"0"
,
"0"
,
...
...
test/srt/test_chunked_prefill.py
View file @
158e8f1e
...
@@ -5,7 +5,8 @@ from sglang.srt.utils import kill_child_process
...
@@ -5,7 +5,8 @@ from sglang.srt.utils import kill_child_process
from
sglang.test.run_eval
import
run_eval
from
sglang.test.run_eval
import
run_eval
from
sglang.test.test_utils
import
(
from
sglang.test.test_utils
import
(
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_URL_FOR_UNIT_TEST
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
popen_launch_server
,
popen_launch_server
,
)
)
...
@@ -20,11 +21,11 @@ class TestChunkedPrefill(unittest.TestCase):
...
@@ -20,11 +21,11 @@ class TestChunkedPrefill(unittest.TestCase):
other_args
+=
[
"--enable-mixed-chunk"
]
other_args
+=
[
"--enable-mixed-chunk"
]
model
=
DEFAULT_MODEL_NAME_FOR_TEST
model
=
DEFAULT_MODEL_NAME_FOR_TEST
base_url
=
DEFAULT_URL_FOR_
UNIT_
TEST
base_url
=
DEFAULT_URL_FOR_TEST
process
=
popen_launch_server
(
process
=
popen_launch_server
(
model
,
model
,
base_url
,
base_url
,
timeout
=
300
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
other_args
,
other_args
=
other_args
,
)
)
...
...
test/srt/test_embedding_openai_server.py
View file @
158e8f1e
...
@@ -4,17 +4,24 @@ import openai
...
@@ -4,17 +4,24 @@ import openai
from
sglang.srt.hf_transformers_utils
import
get_tokenizer
from
sglang.srt.hf_transformers_utils
import
get_tokenizer
from
sglang.srt.utils
import
kill_child_process
from
sglang.srt.utils
import
kill_child_process
from
sglang.test.test_utils
import
DEFAULT_URL_FOR_UNIT_TEST
,
popen_launch_server
from
sglang.test.test_utils
import
(
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
popen_launch_server
,
)
class
TestOpenAIServer
(
unittest
.
TestCase
):
class
TestOpenAIServer
(
unittest
.
TestCase
):
@
classmethod
@
classmethod
def
setUpClass
(
cls
):
def
setUpClass
(
cls
):
cls
.
model
=
"intfloat/e5-mistral-7b-instruct"
cls
.
model
=
"intfloat/e5-mistral-7b-instruct"
cls
.
base_url
=
DEFAULT_URL_FOR_
UNIT_
TEST
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
300
,
api_key
=
cls
.
api_key
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
api_key
=
cls
.
api_key
,
)
)
cls
.
base_url
+=
"/v1"
cls
.
base_url
+=
"/v1"
cls
.
tokenizer
=
get_tokenizer
(
cls
.
model
)
cls
.
tokenizer
=
get_tokenizer
(
cls
.
model
)
...
...
test/srt/test_eval_accuracy_large.py
View file @
158e8f1e
...
@@ -5,8 +5,8 @@ from sglang.srt.utils import kill_child_process
...
@@ -5,8 +5,8 @@ from sglang.srt.utils import kill_child_process
from
sglang.test.run_eval
import
run_eval
from
sglang.test.run_eval
import
run_eval
from
sglang.test.test_utils
import
(
from
sglang.test.test_utils
import
(
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_
URL_FOR_ACCURACY_TEST
,
DEFAULT_
TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_
UNIT_
TEST
,
DEFAULT_URL_FOR_TEST
,
popen_launch_server
,
popen_launch_server
,
)
)
...
@@ -15,11 +15,11 @@ class TestEvalAccuracyLarge(unittest.TestCase):
...
@@ -15,11 +15,11 @@ class TestEvalAccuracyLarge(unittest.TestCase):
@
classmethod
@
classmethod
def
setUpClass
(
cls
):
def
setUpClass
(
cls
):
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
cls
.
base_url
=
DEFAULT_URL_FOR_
ACCURACY_
TEST
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
process
=
popen_launch_server
(
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
model
,
cls
.
base_url
,
cls
.
base_url
,
timeout
=
300
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--log-level-http"
,
"warning"
],
other_args
=
[
"--log-level-http"
,
"warning"
],
)
)
...
...
test/srt/test_eval_accuracy_large_chunked_prefill.py
View file @
158e8f1e
...
@@ -5,7 +5,8 @@ from sglang.srt.utils import kill_child_process
...
@@ -5,7 +5,8 @@ from sglang.srt.utils import kill_child_process
from
sglang.test.run_eval
import
run_eval
from
sglang.test.run_eval
import
run_eval
from
sglang.test.test_utils
import
(
from
sglang.test.test_utils
import
(
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_URL_FOR_ACCURACY_TEST
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
popen_launch_server
,
popen_launch_server
,
)
)
...
@@ -14,11 +15,11 @@ class TestEvalAccuracyLargeChunkedPrefill(unittest.TestCase):
...
@@ -14,11 +15,11 @@ class TestEvalAccuracyLargeChunkedPrefill(unittest.TestCase):
@
classmethod
@
classmethod
def
setUpClass
(
cls
):
def
setUpClass
(
cls
):
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
cls
.
base_url
=
DEFAULT_URL_FOR_
ACCURACY_
TEST
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
process
=
popen_launch_server
(
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
model
,
cls
.
base_url
,
cls
.
base_url
,
timeout
=
300
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--log-level-http"
,
"warning"
,
"--chunked-prefill-size"
,
"256"
],
other_args
=
[
"--log-level-http"
,
"warning"
,
"--chunked-prefill-size"
,
"256"
],
)
)
...
...
test/srt/test_eval_accuracy_large_mixed_chunked_prefill.py
View file @
158e8f1e
...
@@ -5,7 +5,8 @@ from sglang.srt.utils import kill_child_process
...
@@ -5,7 +5,8 @@ from sglang.srt.utils import kill_child_process
from
sglang.test.run_eval
import
run_eval
from
sglang.test.run_eval
import
run_eval
from
sglang.test.test_utils
import
(
from
sglang.test.test_utils
import
(
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_URL_FOR_ACCURACY_TEST
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
popen_launch_server
,
popen_launch_server
,
)
)
...
@@ -14,11 +15,11 @@ class TestEvalAccuracyLargeChunkedPrefill(unittest.TestCase):
...
@@ -14,11 +15,11 @@ class TestEvalAccuracyLargeChunkedPrefill(unittest.TestCase):
@
classmethod
@
classmethod
def
setUpClass
(
cls
):
def
setUpClass
(
cls
):
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
cls
.
base_url
=
DEFAULT_URL_FOR_
ACCURACY_
TEST
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
process
=
popen_launch_server
(
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
model
,
cls
.
base_url
,
cls
.
base_url
,
timeout
=
300
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
other_args
=
[
"--log-level-http"
,
"--log-level-http"
,
"warning"
,
"warning"
,
...
...
test/srt/test_eval_accuracy_mini.py
View file @
158e8f1e
...
@@ -5,7 +5,8 @@ from sglang.srt.utils import kill_child_process
...
@@ -5,7 +5,8 @@ from sglang.srt.utils import kill_child_process
from
sglang.test.run_eval
import
run_eval
from
sglang.test.run_eval
import
run_eval
from
sglang.test.test_utils
import
(
from
sglang.test.test_utils
import
(
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_URL_FOR_UNIT_TEST
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
popen_launch_server
,
popen_launch_server
,
)
)
...
@@ -14,8 +15,10 @@ class TestEvalAccuracyMini(unittest.TestCase):
...
@@ -14,8 +15,10 @@ class TestEvalAccuracyMini(unittest.TestCase):
@
classmethod
@
classmethod
def
setUpClass
(
cls
):
def
setUpClass
(
cls
):
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
cls
.
base_url
=
DEFAULT_URL_FOR_UNIT_TEST
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
300
)
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
)
@
classmethod
@
classmethod
def
tearDownClass
(
cls
):
def
tearDownClass
(
cls
):
...
...
test/srt/test_large_max_new_tokens.py
View file @
158e8f1e
...
@@ -10,7 +10,8 @@ from sglang.srt.hf_transformers_utils import get_tokenizer
...
@@ -10,7 +10,8 @@ from sglang.srt.hf_transformers_utils import get_tokenizer
from
sglang.srt.utils
import
kill_child_process
from
sglang.srt.utils
import
kill_child_process
from
sglang.test.test_utils
import
(
from
sglang.test.test_utils
import
(
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_URL_FOR_UNIT_TEST
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
popen_launch_server
,
popen_launch_server
,
)
)
...
@@ -19,12 +20,12 @@ class TestOpenAIServer(unittest.TestCase):
...
@@ -19,12 +20,12 @@ class TestOpenAIServer(unittest.TestCase):
@
classmethod
@
classmethod
def
setUpClass
(
cls
):
def
setUpClass
(
cls
):
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
cls
.
base_url
=
DEFAULT_URL_FOR_
UNIT_
TEST
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
model
,
cls
.
base_url
,
cls
.
base_url
,
timeout
=
300
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
api_key
=
cls
.
api_key
,
api_key
=
cls
.
api_key
,
other_args
=
(
"--max-total-token"
,
"1024"
),
other_args
=
(
"--max-total-token"
,
"1024"
),
env
=
{
"SGLANG_CLIP_MAX_NEW_TOKENS"
:
"256"
,
**
os
.
environ
},
env
=
{
"SGLANG_CLIP_MAX_NEW_TOKENS"
:
"256"
,
**
os
.
environ
},
...
...
test/srt/test_moe_serving_throughput.py
View file @
158e8f1e
...
@@ -7,7 +7,8 @@ from sglang.srt.server_args import ServerArgs
...
@@ -7,7 +7,8 @@ from sglang.srt.server_args import ServerArgs
from
sglang.srt.utils
import
kill_child_process
from
sglang.srt.utils
import
kill_child_process
from
sglang.test.test_utils
import
(
from
sglang.test.test_utils
import
(
DEFAULT_MOE_MODEL_NAME_FOR_TEST
,
DEFAULT_MOE_MODEL_NAME_FOR_TEST
,
DEFAULT_URL_FOR_MOE_TEST
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
popen_launch_server
,
popen_launch_server
,
)
)
...
@@ -25,9 +26,12 @@ class TestServingThroughput(unittest.TestCase):
...
@@ -25,9 +26,12 @@ class TestServingThroughput(unittest.TestCase):
other_args
.
append
(
"--enable-p2p-check"
)
other_args
.
append
(
"--enable-p2p-check"
)
model
=
DEFAULT_MOE_MODEL_NAME_FOR_TEST
model
=
DEFAULT_MOE_MODEL_NAME_FOR_TEST
base_url
=
DEFAULT_URL_FOR_
MOE_
TEST
base_url
=
DEFAULT_URL_FOR_TEST
process
=
popen_launch_server
(
process
=
popen_launch_server
(
model
,
base_url
,
timeout
=
300
,
other_args
=
other_args
model
,
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
other_args
,
)
)
# Run benchmark
# Run benchmark
...
@@ -72,8 +76,8 @@ class TestServingThroughput(unittest.TestCase):
...
@@ -72,8 +76,8 @@ class TestServingThroughput(unittest.TestCase):
)
)
if
os
.
getenv
(
"SGLANG_IS_IN_CI"
,
"false"
)
==
"true"
:
if
os
.
getenv
(
"SGLANG_IS_IN_CI"
,
"false"
)
==
"true"
:
# A100 (PCIE)
performance
# A100 (PCIE)
: 950, H100 (SMX): 1800
assert
res
[
"output_throughput"
]
>
9
10
assert
res
[
"output_throughput"
]
>
1
75
0
def
test_default_without_radix_cache
(
self
):
def
test_default_without_radix_cache
(
self
):
res
=
self
.
run_test
(
res
=
self
.
run_test
(
...
@@ -83,19 +87,8 @@ class TestServingThroughput(unittest.TestCase):
...
@@ -83,19 +87,8 @@ class TestServingThroughput(unittest.TestCase):
)
)
if
os
.
getenv
(
"SGLANG_IS_IN_CI"
,
"false"
)
==
"true"
:
if
os
.
getenv
(
"SGLANG_IS_IN_CI"
,
"false"
)
==
"true"
:
# A100 (PCIE) performance
# A100 (PCIE): 950, H100 (SMX): 1900
assert
res
[
"output_throughput"
]
>
910
assert
res
[
"output_throughput"
]
>
1850
def
test_default_without_chunked_prefill
(
self
):
res
=
self
.
run_test
(
disable_radix_cache
=
ServerArgs
.
disable_radix_cache
,
disable_flashinfer
=
ServerArgs
.
disable_flashinfer
,
chunked_prefill_size
=-
1
,
)
if
os
.
getenv
(
"SGLANG_IS_IN_CI"
,
"false"
)
==
"true"
:
# A100 (PCIE) performance
print
(
res
[
"output_throughput"
])
def
test_all_cases
(
self
):
def
test_all_cases
(
self
):
for
disable_radix_cache
in
[
False
,
True
]:
for
disable_radix_cache
in
[
False
,
True
]:
...
...
test/srt/test_openai_server.py
View file @
158e8f1e
...
@@ -8,7 +8,8 @@ from sglang.srt.hf_transformers_utils import get_tokenizer
...
@@ -8,7 +8,8 @@ from sglang.srt.hf_transformers_utils import get_tokenizer
from
sglang.srt.utils
import
kill_child_process
from
sglang.srt.utils
import
kill_child_process
from
sglang.test.test_utils
import
(
from
sglang.test.test_utils
import
(
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_URL_FOR_UNIT_TEST
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
popen_launch_server
,
popen_launch_server
,
)
)
...
@@ -17,10 +18,13 @@ class TestOpenAIServer(unittest.TestCase):
...
@@ -17,10 +18,13 @@ class TestOpenAIServer(unittest.TestCase):
@
classmethod
@
classmethod
def
setUpClass
(
cls
):
def
setUpClass
(
cls
):
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
cls
.
base_url
=
DEFAULT_URL_FOR_
UNIT_
TEST
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
300
,
api_key
=
cls
.
api_key
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
api_key
=
cls
.
api_key
,
)
)
cls
.
base_url
+=
"/v1"
cls
.
base_url
+=
"/v1"
cls
.
tokenizer
=
get_tokenizer
(
DEFAULT_MODEL_NAME_FOR_TEST
)
cls
.
tokenizer
=
get_tokenizer
(
DEFAULT_MODEL_NAME_FOR_TEST
)
...
...
test/srt/test_serving_throughput.py
View file @
158e8f1e
...
@@ -7,7 +7,8 @@ from sglang.srt.server_args import ServerArgs
...
@@ -7,7 +7,8 @@ from sglang.srt.server_args import ServerArgs
from
sglang.srt.utils
import
kill_child_process
from
sglang.srt.utils
import
kill_child_process
from
sglang.test.test_utils
import
(
from
sglang.test.test_utils
import
(
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_URL_FOR_E2E_TEST
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
popen_launch_server
,
popen_launch_server
,
)
)
...
@@ -23,9 +24,12 @@ class TestServingThroughput(unittest.TestCase):
...
@@ -23,9 +24,12 @@ class TestServingThroughput(unittest.TestCase):
other_args
.
extend
([
"--chunked-prefill-size"
,
str
(
chunked_prefill_size
)])
other_args
.
extend
([
"--chunked-prefill-size"
,
str
(
chunked_prefill_size
)])
model
=
DEFAULT_MODEL_NAME_FOR_TEST
model
=
DEFAULT_MODEL_NAME_FOR_TEST
base_url
=
DEFAULT_URL_FOR_
E2E_
TEST
base_url
=
DEFAULT_URL_FOR_TEST
process
=
popen_launch_server
(
process
=
popen_launch_server
(
model
,
base_url
,
timeout
=
300
,
other_args
=
other_args
model
,
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
other_args
,
)
)
# Run benchmark
# Run benchmark
...
@@ -70,8 +74,8 @@ class TestServingThroughput(unittest.TestCase):
...
@@ -70,8 +74,8 @@ class TestServingThroughput(unittest.TestCase):
)
)
if
os
.
getenv
(
"SGLANG_IS_IN_CI"
,
"false"
)
==
"true"
:
if
os
.
getenv
(
"SGLANG_IS_IN_CI"
,
"false"
)
==
"true"
:
# A100 (PCIE)
performance
# A100 (PCIE)
: 1450, H100 (SMX): 2550
assert
res
[
"output_throughput"
]
>
14
00
assert
res
[
"output_throughput"
]
>
25
00
def
test_default_without_radix_cache
(
self
):
def
test_default_without_radix_cache
(
self
):
res
=
self
.
run_test
(
res
=
self
.
run_test
(
...
@@ -81,8 +85,8 @@ class TestServingThroughput(unittest.TestCase):
...
@@ -81,8 +85,8 @@ class TestServingThroughput(unittest.TestCase):
)
)
if
os
.
getenv
(
"SGLANG_IS_IN_CI"
,
"false"
)
==
"true"
:
if
os
.
getenv
(
"SGLANG_IS_IN_CI"
,
"false"
)
==
"true"
:
# A100 (PCIE)
performance
# A100 (PCIE)
: 1500, H100 (SMX): 2850
assert
res
[
"output_throughput"
]
>
145
0
assert
res
[
"output_throughput"
]
>
280
0
def
test_default_without_chunked_prefill
(
self
):
def
test_default_without_chunked_prefill
(
self
):
res
=
self
.
run_test
(
res
=
self
.
run_test
(
...
@@ -92,8 +96,8 @@ class TestServingThroughput(unittest.TestCase):
...
@@ -92,8 +96,8 @@ class TestServingThroughput(unittest.TestCase):
)
)
if
os
.
getenv
(
"SGLANG_IS_IN_CI"
,
"false"
)
==
"true"
:
if
os
.
getenv
(
"SGLANG_IS_IN_CI"
,
"false"
)
==
"true"
:
# A100 (PCIE)
performance
# A100 (PCIE)
: 1450, H100 (SMX): 2550
assert
res
[
"output_throughput"
]
>
14
00
assert
res
[
"output_throughput"
]
>
25
00
def
test_all_cases
(
self
):
def
test_all_cases
(
self
):
for
disable_radix_cache
in
[
False
,
True
]:
for
disable_radix_cache
in
[
False
,
True
]:
...
...
test/srt/test_skip_tokenizer_init.py
View file @
158e8f1e
...
@@ -6,7 +6,8 @@ import requests
...
@@ -6,7 +6,8 @@ import requests
from
sglang.srt.utils
import
kill_child_process
from
sglang.srt.utils
import
kill_child_process
from
sglang.test.test_utils
import
(
from
sglang.test.test_utils
import
(
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_URL_FOR_UNIT_TEST
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
popen_launch_server
,
popen_launch_server
,
)
)
...
@@ -15,9 +16,12 @@ class TestSkipTokenizerInit(unittest.TestCase):
...
@@ -15,9 +16,12 @@ class TestSkipTokenizerInit(unittest.TestCase):
@
classmethod
@
classmethod
def
setUpClass
(
cls
):
def
setUpClass
(
cls
):
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
cls
.
base_url
=
DEFAULT_URL_FOR_
UNIT_
TEST
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
process
=
popen_launch_server
(
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
300
,
other_args
=
[
"--skip-tokenizer-init"
]
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--skip-tokenizer-init"
],
)
)
@
classmethod
@
classmethod
...
...
test/srt/test_srt_endpoint.py
View file @
158e8f1e
...
@@ -6,7 +6,8 @@ import requests
...
@@ -6,7 +6,8 @@ import requests
from
sglang.srt.utils
import
kill_child_process
from
sglang.srt.utils
import
kill_child_process
from
sglang.test.test_utils
import
(
from
sglang.test.test_utils
import
(
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_URL_FOR_UNIT_TEST
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
popen_launch_server
,
popen_launch_server
,
)
)
...
@@ -15,8 +16,10 @@ class TestSRTEndpoint(unittest.TestCase):
...
@@ -15,8 +16,10 @@ class TestSRTEndpoint(unittest.TestCase):
@
classmethod
@
classmethod
def
setUpClass
(
cls
):
def
setUpClass
(
cls
):
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
cls
.
base_url
=
DEFAULT_URL_FOR_UNIT_TEST
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
300
)
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
)
@
classmethod
@
classmethod
def
tearDownClass
(
cls
):
def
tearDownClass
(
cls
):
...
...
test/srt/test_torch_compile.py
View file @
158e8f1e
...
@@ -5,7 +5,8 @@ from sglang.srt.utils import kill_child_process
...
@@ -5,7 +5,8 @@ from sglang.srt.utils import kill_child_process
from
sglang.test.run_eval
import
run_eval
from
sglang.test.run_eval
import
run_eval
from
sglang.test.test_utils
import
(
from
sglang.test.test_utils
import
(
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_URL_FOR_UNIT_TEST
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
popen_launch_server
,
popen_launch_server
,
)
)
...
@@ -14,9 +15,12 @@ class TestTorchCompile(unittest.TestCase):
...
@@ -14,9 +15,12 @@ class TestTorchCompile(unittest.TestCase):
@
classmethod
@
classmethod
def
setUpClass
(
cls
):
def
setUpClass
(
cls
):
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
cls
.
base_url
=
DEFAULT_URL_FOR_
UNIT_
TEST
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
process
=
popen_launch_server
(
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
300
,
other_args
=
[
"--enable-torch-compile"
]
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--enable-torch-compile"
],
)
)
@
classmethod
@
classmethod
...
...
test/srt/test_triton_attn_backend.py
View file @
158e8f1e
...
@@ -5,7 +5,8 @@ from sglang.srt.utils import kill_child_process
...
@@ -5,7 +5,8 @@ from sglang.srt.utils import kill_child_process
from
sglang.test.run_eval
import
run_eval
from
sglang.test.run_eval
import
run_eval
from
sglang.test.test_utils
import
(
from
sglang.test.test_utils
import
(
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_URL_FOR_UNIT_TEST
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
popen_launch_server
,
popen_launch_server
,
)
)
...
@@ -14,9 +15,12 @@ class TestTritonAttnBackend(unittest.TestCase):
...
@@ -14,9 +15,12 @@ class TestTritonAttnBackend(unittest.TestCase):
@
classmethod
@
classmethod
def
setUpClass
(
cls
):
def
setUpClass
(
cls
):
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
cls
.
base_url
=
DEFAULT_URL_FOR_
UNIT_
TEST
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
process
=
popen_launch_server
(
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
300
,
other_args
=
[
"--disable-flashinfer"
]
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--disable-flashinfer"
],
)
)
@
classmethod
@
classmethod
...
...
test/srt/test_update_weights.py
View file @
158e8f1e
...
@@ -6,7 +6,8 @@ import requests
...
@@ -6,7 +6,8 @@ import requests
from
sglang.srt.utils
import
kill_child_process
from
sglang.srt.utils
import
kill_child_process
from
sglang.test.test_utils
import
(
from
sglang.test.test_utils
import
(
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_URL_FOR_UNIT_TEST
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
popen_launch_server
,
popen_launch_server
,
)
)
...
@@ -15,8 +16,10 @@ class TestReplaceWeights(unittest.TestCase):
...
@@ -15,8 +16,10 @@ class TestReplaceWeights(unittest.TestCase):
@
classmethod
@
classmethod
def
setUpClass
(
cls
):
def
setUpClass
(
cls
):
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
cls
.
model
=
DEFAULT_MODEL_NAME_FOR_TEST
cls
.
base_url
=
DEFAULT_URL_FOR_UNIT_TEST
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
300
)
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
)
@
classmethod
@
classmethod
def
tearDownClass
(
cls
):
def
tearDownClass
(
cls
):
...
...
test/srt/test_vision_openai_server.py
View file @
158e8f1e
...
@@ -11,19 +11,23 @@ from decord import VideoReader, cpu
...
@@ -11,19 +11,23 @@ from decord import VideoReader, cpu
from
PIL
import
Image
from
PIL
import
Image
from
sglang.srt.utils
import
kill_child_process
from
sglang.srt.utils
import
kill_child_process
from
sglang.test.test_utils
import
DEFAULT_URL_FOR_UNIT_TEST
,
popen_launch_server
from
sglang.test.test_utils
import
(
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
DEFAULT_URL_FOR_TEST
,
popen_launch_server
,
)
class
TestOpenAIVisionServer
(
unittest
.
TestCase
):
class
TestOpenAIVisionServer
(
unittest
.
TestCase
):
@
classmethod
@
classmethod
def
setUpClass
(
cls
):
def
setUpClass
(
cls
):
cls
.
model
=
"lmms-lab/llava-onevision-qwen2-0.5b-ov"
cls
.
model
=
"lmms-lab/llava-onevision-qwen2-0.5b-ov"
cls
.
base_url
=
DEFAULT_URL_FOR_
UNIT_
TEST
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
model
,
cls
.
base_url
,
cls
.
base_url
,
timeout
=
300
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
api_key
=
cls
.
api_key
,
api_key
=
cls
.
api_key
,
other_args
=
[
other_args
=
[
"--chat-template"
,
"--chat-template"
,
...
@@ -67,7 +71,7 @@ class TestOpenAIVisionServer(unittest.TestCase):
...
@@ -67,7 +71,7 @@ class TestOpenAIVisionServer(unittest.TestCase):
assert
response
.
choices
[
0
].
message
.
role
==
"assistant"
assert
response
.
choices
[
0
].
message
.
role
==
"assistant"
text
=
response
.
choices
[
0
].
message
.
content
text
=
response
.
choices
[
0
].
message
.
content
assert
isinstance
(
text
,
str
)
assert
isinstance
(
text
,
str
)
assert
"
logo
"
in
text
,
text
assert
"
man"
in
text
or
"cab
"
in
text
,
text
assert
response
.
id
assert
response
.
id
assert
response
.
created
assert
response
.
created
assert
response
.
usage
.
prompt_tokens
>
0
assert
response
.
usage
.
prompt_tokens
>
0
...
@@ -86,18 +90,19 @@ class TestOpenAIVisionServer(unittest.TestCase):
...
@@ -86,18 +90,19 @@ class TestOpenAIVisionServer(unittest.TestCase):
{
{
"type"
:
"image_url"
,
"type"
:
"image_url"
,
"image_url"
:
{
"image_url"
:
{
"url"
:
"https://raw.githubusercontent.com/sgl-project/sglang/main/
assets/logo
.png"
"url"
:
"https://raw.githubusercontent.com/sgl-project/sglang/main/
test/lang/example_image
.png"
},
},
},
},
{
{
"type"
:
"image_url"
,
"type"
:
"image_url"
,
"image_url"
:
{
"image_url"
:
{
"url"
:
"https://raw.githubusercontent.com/sgl-project/sglang/main/
test/lang/example_image
.png"
"url"
:
"https://raw.githubusercontent.com/sgl-project/sglang/main/
assets/logo
.png"
},
},
},
},
{
{
"type"
:
"text"
,
"type"
:
"text"
,
"text"
:
"I have shown you two images. Please describe the two images to me."
,
"text"
:
"I have two very different images. They are not related at all. "
"Please describe the first image in one sentence, and then describe the second image in another sentence."
,
},
},
],
],
},
},
...
@@ -108,8 +113,9 @@ class TestOpenAIVisionServer(unittest.TestCase):
...
@@ -108,8 +113,9 @@ class TestOpenAIVisionServer(unittest.TestCase):
assert
response
.
choices
[
0
].
message
.
role
==
"assistant"
assert
response
.
choices
[
0
].
message
.
role
==
"assistant"
text
=
response
.
choices
[
0
].
message
.
content
text
=
response
.
choices
[
0
].
message
.
content
assert
isinstance
(
text
,
str
)
assert
isinstance
(
text
,
str
)
print
(
text
)
assert
"man"
in
text
or
"cab"
in
text
,
text
assert
"man"
in
text
or
"cab"
in
text
,
text
assert
"logo"
in
text
,
text
#
assert "logo" in text, text
assert
response
.
id
assert
response
.
id
assert
response
.
created
assert
response
.
created
assert
response
.
usage
.
prompt_tokens
>
0
assert
response
.
usage
.
prompt_tokens
>
0
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment