Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
254fd130
Unverified
Commit
254fd130
authored
Nov 25, 2024
by
Lianmin Zheng
Committed by
GitHub
Nov 25, 2024
Browse files
[CI] Split test cases in CI for better load balancing (#2180)
parent
538fa0ae
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
37 additions
and
23 deletions
+37
-23
scripts/ci_install_dependency.sh
scripts/ci_install_dependency.sh
+4
-0
test/srt/run_suite.py
test/srt/run_suite.py
+2
-1
test/srt/test_chunked_prefill.py
test/srt/test_chunked_prefill.py
+1
-21
test/srt/test_no_chunked_prefill.py
test/srt/test_no_chunked_prefill.py
+29
-0
test/srt/test_no_overlap_scheduler.py
test/srt/test_no_overlap_scheduler.py
+0
-0
test/srt/test_srt_endpoint.py
test/srt/test_srt_endpoint.py
+1
-1
No files found.
scripts/ci_install_dependency.sh
View file @
254fd130
...
...
@@ -6,3 +6,7 @@ pip install --upgrade pip
pip
install
-e
"python[all]"
pip
install
transformers
==
4.45.2 sentence_transformers accelerate peft
pip
install
flashinfer
-i
https://flashinfer.ai/whl/cu121/torch2.4/
--force-reinstall
# for compling eagle kernels
pip
install
cutex
# for compling xgrammar kernels
pip
install
cuda-python nvidia-cuda-nvrtc-cu12
test/srt/run_suite.py
View file @
254fd130
...
...
@@ -17,7 +17,8 @@ suites = {
"test_json_constrained.py"
,
"test_large_max_new_tokens.py"
,
"test_metrics.py"
,
"test_non_overlap_scheduler.py"
,
"test_no_chunked_prefill.py"
,
"test_no_overlap_scheduler.py"
,
"test_openai_server.py"
,
"test_pytorch_sampling_backend.py"
,
"test_radix_attention.py"
,
...
...
test/srt/test_chunked_prefill.py
View file @
254fd130
...
...
@@ -4,12 +4,7 @@ python3 -m unittest test_chunked_prefill.TestChunkedPrefill.test_mixed_chunked_p
import
unittest
from
sglang.test.test_utils
import
(
DEFAULT_MODEL_NAME_FOR_TEST
,
run_bench_serving
,
run_mmlu_test
,
run_mulit_request_test
,
)
from
sglang.test.test_utils
import
run_mmlu_test
,
run_mulit_request_test
class
TestChunkedPrefill
(
unittest
.
TestCase
):
...
...
@@ -25,21 +20,6 @@ class TestChunkedPrefill(unittest.TestCase):
def
test_mixed_chunked_prefill_without_radix_cache
(
self
):
run_mmlu_test
(
disable_radix_cache
=
True
,
enable_mixed_chunk
=
True
)
def
test_no_chunked_prefill
(
self
):
run_mmlu_test
(
disable_radix_cache
=
False
,
enable_mixed_chunk
=
False
,
chunked_prefill_size
=-
1
)
def
test_no_chunked_prefill_without_radix_cache
(
self
):
res
=
run_bench_serving
(
model
=
DEFAULT_MODEL_NAME_FOR_TEST
,
num_prompts
=
10
,
request_rate
=
float
(
"inf"
),
other_server_args
=
[
"--disable-radix-cache"
,
"--chunked-prefill-size"
,
"-1"
],
)
assert
res
[
"completed"
]
==
10
def
test_mixed_chunked_prefill_multi_requests
(
self
):
run_mulit_request_test
(
enable_mixed_chunk
=
True
,
...
...
test/srt/test_no_chunked_prefill.py
0 → 100644
View file @
254fd130
import
unittest
from
sglang.test.test_utils
import
(
DEFAULT_MODEL_NAME_FOR_TEST
,
run_bench_serving
,
run_mmlu_test
,
)
class
TestNoChunkedPrefill
(
unittest
.
TestCase
):
def
test_no_chunked_prefill
(
self
):
run_mmlu_test
(
disable_radix_cache
=
False
,
enable_mixed_chunk
=
False
,
chunked_prefill_size
=-
1
)
def
test_no_chunked_prefill_without_radix_cache
(
self
):
res
=
run_bench_serving
(
model
=
DEFAULT_MODEL_NAME_FOR_TEST
,
num_prompts
=
10
,
request_rate
=
float
(
"inf"
),
other_server_args
=
[
"--disable-radix-cache"
,
"--chunked-prefill-size"
,
"-1"
],
)
assert
res
[
"completed"
]
==
10
if
__name__
==
"__main__"
:
unittest
.
main
()
test/srt/test_no
n
_overlap_scheduler.py
→
test/srt/test_no_overlap_scheduler.py
View file @
254fd130
File moved
test/srt/test_srt_endpoint.py
View file @
254fd130
...
...
@@ -211,7 +211,7 @@ class TestSRTEndpoint(unittest.TestCase):
diff
=
np
.
abs
(
output_logprobs
-
output_logprobs_score
)
max_diff
=
np
.
max
(
diff
)
self
.
assertLess
(
max_diff
,
0.2
)
self
.
assertLess
(
max_diff
,
0.2
5
)
def
test_get_server_info
(
self
):
response
=
requests
.
get
(
self
.
base_url
+
"/get_server_info"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment