Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
73dfd2df
Unverified
Commit
73dfd2df
authored
Oct 30, 2025
by
Liangsheng Yin
Committed by
GitHub
Oct 30, 2025
Browse files
[Test] Enhance radix cache test for spec cases (#12394)
parent
df5192cf
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
61 additions
and
56 deletions
+61
-56
python/sglang/test/kits/matched_stop_kit.py
python/sglang/test/kits/matched_stop_kit.py
+0
-0
python/sglang/test/kits/radix_cache_server_kit.py
python/sglang/test/kits/radix_cache_server_kit.py
+50
-0
test/srt/openai_server/validation/test_matched_stop.py
test/srt/openai_server/validation/test_matched_stop.py
+1
-1
test/srt/run_suite.py
test/srt/run_suite.py
+1
-1
test/srt/test_eagle_infer_beta.py
test/srt/test_eagle_infer_beta.py
+5
-1
test/srt/test_radix_attention.py
test/srt/test_radix_attention.py
+4
-53
No files found.
python/sglang/test/kit
_
matched_stop.py
→
python/sglang/test/kit
s/
matched_stop
_kit
.py
View file @
73dfd2df
File moved
python/sglang/test/kits/radix_cache_server_kit.py
0 → 100644
View file @
73dfd2df
import
random
import
requests
def
gen_radix_tree
(
num_nodes
=
400
,
chunk_len
=
256
):
num0
=
num_nodes
//
2
num1
=
num_nodes
-
num0
nodes
=
[{
"input_ids"
:
[
37
]
*
117
,
"decode_len"
:
217
}]
for
_
in
range
(
num0
):
parent
=
random
.
choice
(
nodes
)
unique_len
=
random
.
randint
(
0
,
chunk_len
)
decode_len
=
random
.
randint
(
0
,
chunk_len
)
token_id
=
random
.
randint
(
0
,
32000
)
child
=
{
"input_ids"
:
parent
[
"input_ids"
]
+
[
token_id
]
*
unique_len
,
"decode_len"
:
decode_len
,
}
nodes
.
append
(
child
)
while
num1
>
0
:
num_branch
=
random
.
randint
(
1
,
min
(
num1
,
10
))
parent
=
random
.
choice
(
nodes
)
for
_
in
range
(
num_branch
):
unique_len
=
random
.
randint
(
0
,
chunk_len
)
decode_len
=
random
.
randint
(
0
,
chunk_len
)
token_id
=
random
.
randint
(
0
,
32000
)
child
=
{
"input_ids"
:
parent
[
"input_ids"
]
+
[
token_id
]
*
unique_len
,
"decode_len"
:
decode_len
,
}
nodes
.
append
(
child
)
num1
-=
num_branch
random
.
shuffle
(
nodes
)
return
nodes
def
run_radix_attention_test
(
base_url
:
str
):
nodes
=
gen_radix_tree
()
data
=
{
"input_ids"
:
[
node
[
"input_ids"
]
for
node
in
nodes
],
"sampling_params"
:
[
{
"max_new_tokens"
:
node
[
"decode_len"
],
"temperature"
:
0
}
for
node
in
nodes
],
}
res
=
requests
.
post
(
base_url
+
"/generate"
,
json
=
data
)
assert
res
.
status_code
==
200
test/srt/openai_server/validation/test_matched_stop.py
View file @
73dfd2df
...
...
@@ -2,7 +2,7 @@ import unittest
from
sglang.srt.sampling.sampling_params
import
MAX_LEN
,
get_max_seq_length
from
sglang.srt.utils
import
kill_process_tree
from
sglang.test.kit
_
matched_stop
import
MatchedStopMixin
from
sglang.test.kit
s.
matched_stop
_kit
import
MatchedStopMixin
from
sglang.test.test_utils
import
(
DEFAULT_MODEL_NAME_FOR_TEST
,
DEFAULT_URL_FOR_TEST
,
...
...
test/srt/run_suite.py
View file @
73dfd2df
...
...
@@ -78,7 +78,7 @@ suites = {
TestFile
(
"test_deterministic.py"
,
320
),
TestFile
(
"test_eagle_infer_a.py"
,
370
),
TestFile
(
"test_eagle_infer_b.py"
,
700
),
TestFile
(
"test_eagle_infer_beta.py"
,
30
0
),
TestFile
(
"test_eagle_infer_beta.py"
,
9
0
),
TestFile
(
"test_ebnf_constrained.py"
,
108
),
TestFile
(
"test_eval_fp8_accuracy.py"
,
303
),
TestFile
(
"test_fa3.py"
,
376
),
...
...
test/srt/test_eagle_infer_beta.py
View file @
73dfd2df
...
...
@@ -4,7 +4,8 @@ from types import SimpleNamespace
from
sglang.srt.environ
import
envs
from
sglang.srt.utils
import
kill_process_tree
from
sglang.test.few_shot_gsm8k
import
run_eval
from
sglang.test.kit_matched_stop
import
MatchedStopMixin
from
sglang.test.kits.matched_stop_kit
import
MatchedStopMixin
from
sglang.test.kits.radix_cache_server_kit
import
run_radix_attention_test
from
sglang.test.test_utils
import
(
DEFAULT_EAGLE_DRAFT_MODEL_FOR_TEST
,
DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST
,
...
...
@@ -65,6 +66,9 @@ class TestEagleServerBase(CustomTestCase, MatchedStopMixin):
def
tearDownClass
(
cls
):
kill_process_tree
(
cls
.
process
.
pid
)
def
test_radix_attention
(
self
):
run_radix_attention_test
(
self
.
base_url
)
def
test_gsm8k
(
self
):
args
=
SimpleNamespace
(
num_shots
=
5
,
...
...
test/srt/test_radix_attention.py
View file @
73dfd2df
import
os
import
random
import
unittest
import
request
s
from
sglang.srt.environ
import
env
s
from
sglang.test.kits.radix_cache_server_kit
import
run_radix_attention_test
from
sglang.test.test_utils
import
(
DEFAULT_SMALL_MODEL_NAME_FOR_TEST
,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
...
...
@@ -15,52 +13,6 @@ from sglang.test.test_utils import (
)
def
gen_radix_tree
(
num_nodes
=
400
,
chunk_len
=
256
):
num0
=
num_nodes
//
2
num1
=
num_nodes
-
num0
nodes
=
[{
"input_ids"
:
[
37
]
*
117
,
"decode_len"
:
217
}]
for
_
in
range
(
num0
):
parent
=
random
.
choice
(
nodes
)
unique_len
=
random
.
randint
(
0
,
chunk_len
)
decode_len
=
random
.
randint
(
0
,
chunk_len
)
token_id
=
random
.
randint
(
0
,
32000
)
child
=
{
"input_ids"
:
parent
[
"input_ids"
]
+
[
token_id
]
*
unique_len
,
"decode_len"
:
decode_len
,
}
nodes
.
append
(
child
)
while
num1
>
0
:
num_branch
=
random
.
randint
(
1
,
min
(
num1
,
10
))
parent
=
random
.
choice
(
nodes
)
for
_
in
range
(
num_branch
):
unique_len
=
random
.
randint
(
0
,
chunk_len
)
decode_len
=
random
.
randint
(
0
,
chunk_len
)
token_id
=
random
.
randint
(
0
,
32000
)
child
=
{
"input_ids"
:
parent
[
"input_ids"
]
+
[
token_id
]
*
unique_len
,
"decode_len"
:
decode_len
,
}
nodes
.
append
(
child
)
num1
-=
num_branch
random
.
shuffle
(
nodes
)
return
nodes
def
run_test
(
base_url
,
nodes
):
data
=
{
"input_ids"
:
[
node
[
"input_ids"
]
for
node
in
nodes
],
"sampling_params"
:
[
{
"max_new_tokens"
:
node
[
"decode_len"
],
"temperature"
:
0
}
for
node
in
nodes
],
}
res
=
requests
.
post
(
base_url
+
"/generate"
,
json
=
data
)
assert
res
.
status_code
==
200
class
TestRadixCacheFCFS
(
CustomTestCase
):
@
classmethod
def
setUpClass
(
cls
):
...
...
@@ -85,8 +37,7 @@ class TestRadixCacheFCFS(CustomTestCase):
kill_process_tree
(
cls
.
process
.
pid
)
def
test_radix_attention
(
self
):
nodes
=
gen_radix_tree
()
run_test
(
self
.
base_url
,
nodes
)
run_radix_attention_test
(
self
.
base_url
)
@
unittest
.
skipIf
(
is_in_ci
(),
"To reduce the CI execution time."
)
...
...
@@ -132,5 +83,5 @@ class TestRadixCacheNonOverlapLPM(TestRadixCacheFCFS):
if
__name__
==
"__main__"
:
os
.
environ
[
"
SGLANG_TEST_RETRACT
"
]
=
"t
rue
"
envs
.
SGLANG_TEST_RETRACT
.
set
(
T
rue
)
unittest
.
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment