Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
zhaoyu6
sglang
Commits
ba589b88
"vscode:/vscode.git/clone" did not exist on "ab7857019a438958a0c5c891c22439e1c9de9ec4"
Unverified
Commit
ba589b88
authored
Jun 13, 2025
by
Lianmin Zheng
Committed by
GitHub
Jun 13, 2025
Browse files
Improve test cases for eagle infer (#7173)
parent
50876abc
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
46 additions
and
34 deletions
+46
-34
test/srt/run_suite.py
test/srt/run_suite.py
+2
-2
test/srt/test_eagle_infer_a.py
test/srt/test_eagle_infer_a.py
+1
-1
test/srt/test_eagle_infer_b.py
test/srt/test_eagle_infer_b.py
+43
-31
No files found.
test/srt/run_suite.py
View file @
ba589b88
...
@@ -31,8 +31,8 @@ suites = {
...
@@ -31,8 +31,8 @@ suites = {
TestFile
(
"test_block_int8.py"
,
22
),
TestFile
(
"test_block_int8.py"
,
22
),
TestFile
(
"test_create_kvindices.py"
,
2
),
TestFile
(
"test_create_kvindices.py"
,
2
),
TestFile
(
"test_chunked_prefill.py"
,
313
),
TestFile
(
"test_chunked_prefill.py"
,
313
),
TestFile
(
"test_eagle_infer_a.py"
,
3
0
0
),
TestFile
(
"test_eagle_infer_a.py"
,
3
7
0
),
TestFile
(
"test_eagle_infer_b.py"
,
30
0
),
TestFile
(
"test_eagle_infer_b.py"
,
27
0
),
TestFile
(
"test_ebnf_constrained.py"
,
108
),
TestFile
(
"test_ebnf_constrained.py"
,
108
),
TestFile
(
"test_enable_thinking.py"
,
70
),
TestFile
(
"test_enable_thinking.py"
,
70
),
TestFile
(
"test_embedding_openai_server.py"
,
141
),
TestFile
(
"test_embedding_openai_server.py"
,
141
),
...
...
test/srt/test_eagle_infer_a.py
View file @
ba589b88
...
@@ -129,7 +129,7 @@ class TestEAGLEEngine(CustomTestCase):
...
@@ -129,7 +129,7 @@ class TestEAGLEEngine(CustomTestCase):
output
[
"meta_info"
][
"completion_tokens"
]
output
[
"meta_info"
][
"completion_tokens"
]
/
output
[
"meta_info"
][
"e2e_latency"
]
/
output
[
"meta_info"
][
"e2e_latency"
]
)
)
print
(
f
"
{
acc_length
=
}
"
)
print
(
f
"
{
acc_length
=
:.
4
f
}
,
{
speed
=
}
"
)
if
engine
.
server_args
.
model_path
==
DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST
:
if
engine
.
server_args
.
model_path
==
DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST
:
self
.
assertGreater
(
acc_length
,
3.6
)
self
.
assertGreater
(
acc_length
,
3.6
)
...
...
test/srt/test_eagle_infer_b.py
View file @
ba589b88
...
@@ -10,7 +10,6 @@ from types import SimpleNamespace
...
@@ -10,7 +10,6 @@ from types import SimpleNamespace
import
numpy
as
np
import
numpy
as
np
import
requests
import
requests
import
torch
from
sglang.srt.utils
import
kill_process_tree
from
sglang.srt.utils
import
kill_process_tree
from
sglang.test.few_shot_gsm8k
import
run_eval
from
sglang.test.few_shot_gsm8k
import
run_eval
...
@@ -24,10 +23,6 @@ from sglang.test.test_utils import (
...
@@ -24,10 +23,6 @@ from sglang.test.test_utils import (
run_logprob_check
,
run_logprob_check
,
)
)
torch_dtype
=
torch
.
float16
prefill_tolerance
=
5e-2
decode_tolerance
:
float
=
5e-2
class
TestEAGLEServer
(
CustomTestCase
):
class
TestEAGLEServer
(
CustomTestCase
):
PROMPTS
=
[
PROMPTS
=
[
...
@@ -202,7 +197,11 @@ class TestEAGLEServer(CustomTestCase):
...
@@ -202,7 +197,11 @@ class TestEAGLEServer(CustomTestCase):
"""Test the output logprobs are close to the input logprobs if we run a prefill again."""
"""Test the output logprobs are close to the input logprobs if we run a prefill again."""
def
run_generate
(
def
run_generate
(
prompt
,
return_logprob
=
False
,
max_new_tokens
=
512
,
logprob_start_len
=-
1
prompt
,
return_logprob
=
False
,
max_new_tokens
=
512
,
logprob_start_len
=-
1
,
temperature
=
1.0
,
):
):
if
isinstance
(
prompt
,
str
):
if
isinstance
(
prompt
,
str
):
...
@@ -215,20 +214,27 @@ class TestEAGLEServer(CustomTestCase):
...
@@ -215,20 +214,27 @@ class TestEAGLEServer(CustomTestCase):
json
=
{
json
=
{
**
prompt_kwargs
,
**
prompt_kwargs
,
"sampling_params"
:
{
"sampling_params"
:
{
"temperature"
:
1.0
,
"temperature"
:
temperature
,
"max_new_tokens"
:
max_new_tokens
,
"max_new_tokens"
:
max_new_tokens
,
"ignore_eos"
:
True
,
"ignore_eos"
:
True
,
},
},
"return_logprob"
:
return_logprob
,
"return_logprob"
:
return_logprob
,
"return_text_in_logprobs"
:
True
,
"return_text_in_logprobs"
:
True
,
"logprob_start_len"
:
logprob_start_len
,
"logprob_start_len"
:
logprob_start_len
,
"temp_scaled_logprobs"
:
True
,
},
},
)
)
return
response
.
json
()
return
response
.
json
()
prompt
=
"I have a very good idea on how to"
prompt
=
"I have a very good idea on how to"
gen
=
run_generate
(
prompt
,
return_logprob
=
True
,
logprob_start_len
=
0
)
for
temperature
in
[
1.0
]:
gen
=
run_generate
(
prompt
,
return_logprob
=
True
,
logprob_start_len
=
0
,
temperature
=
temperature
,
)
output_logprobs
=
np
.
array
(
output_logprobs
=
np
.
array
(
[
x
[
0
]
for
x
in
gen
[
"meta_info"
][
"output_token_logprobs"
]]
[
x
[
0
]
for
x
in
gen
[
"meta_info"
][
"output_token_logprobs"
]]
)
)
...
@@ -239,12 +245,18 @@ class TestEAGLEServer(CustomTestCase):
...
@@ -239,12 +245,18 @@ class TestEAGLEServer(CustomTestCase):
new_prompt
=
input_tokens
+
output_tokens
new_prompt
=
input_tokens
+
output_tokens
score
=
run_generate
(
score
=
run_generate
(
new_prompt
,
return_logprob
=
True
,
logprob_start_len
=
0
,
max_new_tokens
=
0
new_prompt
,
return_logprob
=
True
,
logprob_start_len
=
0
,
max_new_tokens
=
0
,
temperature
=
temperature
,
)
)
output_logprobs_score
=
np
.
array
(
output_logprobs_score
=
np
.
array
(
[
[
x
[
0
]
x
[
0
]
for
x
in
score
[
"meta_info"
][
"input_token_logprobs"
][
num_prompts_tokens
:]
for
x
in
score
[
"meta_info"
][
"input_token_logprobs"
][
num_prompts_tokens
:
]
]
]
)
)
...
@@ -253,7 +265,7 @@ class TestEAGLEServer(CustomTestCase):
...
@@ -253,7 +265,7 @@ class TestEAGLEServer(CustomTestCase):
diff
=
np
.
abs
(
output_logprobs
-
output_logprobs_score
)
diff
=
np
.
abs
(
output_logprobs
-
output_logprobs_score
)
max_diff
=
np
.
max
(
diff
)
max_diff
=
np
.
max
(
diff
)
self
.
assertLess
(
max_diff
,
0.25
)
self
.
assertLess
(
max_diff
,
0.25
5
)
def
test_logprob_mixed
(
self
):
def
test_logprob_mixed
(
self
):
args
=
[]
args
=
[]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment