Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
ba589b88
Unverified
Commit
ba589b88
authored
Jun 13, 2025
by
Lianmin Zheng
Committed by
GitHub
Jun 13, 2025
Browse files
Improve test cases for eagle infer (#7173)
parent
50876abc
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
46 additions
and
34 deletions
+46
-34
test/srt/run_suite.py
test/srt/run_suite.py
+2
-2
test/srt/test_eagle_infer_a.py
test/srt/test_eagle_infer_a.py
+1
-1
test/srt/test_eagle_infer_b.py
test/srt/test_eagle_infer_b.py
+43
-31
No files found.
test/srt/run_suite.py
View file @
ba589b88
...
...
@@ -31,8 +31,8 @@ suites = {
TestFile
(
"test_block_int8.py"
,
22
),
TestFile
(
"test_create_kvindices.py"
,
2
),
TestFile
(
"test_chunked_prefill.py"
,
313
),
TestFile
(
"test_eagle_infer_a.py"
,
3
0
0
),
TestFile
(
"test_eagle_infer_b.py"
,
30
0
),
TestFile
(
"test_eagle_infer_a.py"
,
3
7
0
),
TestFile
(
"test_eagle_infer_b.py"
,
27
0
),
TestFile
(
"test_ebnf_constrained.py"
,
108
),
TestFile
(
"test_enable_thinking.py"
,
70
),
TestFile
(
"test_embedding_openai_server.py"
,
141
),
...
...
test/srt/test_eagle_infer_a.py
View file @
ba589b88
...
...
@@ -129,7 +129,7 @@ class TestEAGLEEngine(CustomTestCase):
output
[
"meta_info"
][
"completion_tokens"
]
/
output
[
"meta_info"
][
"e2e_latency"
]
)
print
(
f
"
{
acc_length
=
}
"
)
print
(
f
"
{
acc_length
=
:.
4
f
}
,
{
speed
=
}
"
)
if
engine
.
server_args
.
model_path
==
DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST
:
self
.
assertGreater
(
acc_length
,
3.6
)
...
...
test/srt/test_eagle_infer_b.py
View file @
ba589b88
...
...
@@ -10,7 +10,6 @@ from types import SimpleNamespace
import
numpy
as
np
import
requests
import
torch
from
sglang.srt.utils
import
kill_process_tree
from
sglang.test.few_shot_gsm8k
import
run_eval
...
...
@@ -24,10 +23,6 @@ from sglang.test.test_utils import (
run_logprob_check
,
)
torch_dtype
=
torch
.
float16
prefill_tolerance
=
5e-2
decode_tolerance
:
float
=
5e-2
class
TestEAGLEServer
(
CustomTestCase
):
PROMPTS
=
[
...
...
@@ -202,7 +197,11 @@ class TestEAGLEServer(CustomTestCase):
"""Test the output logprobs are close to the input logprobs if we run a prefill again."""
def
run_generate
(
prompt
,
return_logprob
=
False
,
max_new_tokens
=
512
,
logprob_start_len
=-
1
prompt
,
return_logprob
=
False
,
max_new_tokens
=
512
,
logprob_start_len
=-
1
,
temperature
=
1.0
,
):
if
isinstance
(
prompt
,
str
):
...
...
@@ -215,45 +214,58 @@ class TestEAGLEServer(CustomTestCase):
json
=
{
**
prompt_kwargs
,
"sampling_params"
:
{
"temperature"
:
1.0
,
"temperature"
:
temperature
,
"max_new_tokens"
:
max_new_tokens
,
"ignore_eos"
:
True
,
},
"return_logprob"
:
return_logprob
,
"return_text_in_logprobs"
:
True
,
"logprob_start_len"
:
logprob_start_len
,
"temp_scaled_logprobs"
:
True
,
},
)
return
response
.
json
()
prompt
=
"I have a very good idea on how to"
gen
=
run_generate
(
prompt
,
return_logprob
=
True
,
logprob_start_len
=
0
)
output_logprobs
=
np
.
array
(
[
x
[
0
]
for
x
in
gen
[
"meta_info"
][
"output_token_logprobs"
]]
)
num_prompts_tokens
=
gen
[
"meta_info"
][
"prompt_tokens"
]
input_tokens
=
[
x
[
1
]
for
x
in
gen
[
"meta_info"
][
"input_token_logprobs"
]]
output_tokens
=
[
x
[
1
]
for
x
in
gen
[
"meta_info"
][
"output_token_logprobs"
]]
new_prompt
=
input_tokens
+
output_tokens
score
=
run_generate
(
new_prompt
,
return_logprob
=
True
,
logprob_start_len
=
0
,
max_new_tokens
=
0
)
output_logprobs_score
=
np
.
array
(
[
x
[
0
]
for
x
in
score
[
"meta_info"
][
"input_token_logprobs"
][
num_prompts_tokens
:]
]
)
for
temperature
in
[
1.0
]:
gen
=
run_generate
(
prompt
,
return_logprob
=
True
,
logprob_start_len
=
0
,
temperature
=
temperature
,
)
output_logprobs
=
np
.
array
(
[
x
[
0
]
for
x
in
gen
[
"meta_info"
][
"output_token_logprobs"
]]
)
num_prompts_tokens
=
gen
[
"meta_info"
][
"prompt_tokens"
]
input_tokens
=
[
x
[
1
]
for
x
in
gen
[
"meta_info"
][
"input_token_logprobs"
]]
output_tokens
=
[
x
[
1
]
for
x
in
gen
[
"meta_info"
][
"output_token_logprobs"
]]
new_prompt
=
input_tokens
+
output_tokens
score
=
run_generate
(
new_prompt
,
return_logprob
=
True
,
logprob_start_len
=
0
,
max_new_tokens
=
0
,
temperature
=
temperature
,
)
output_logprobs_score
=
np
.
array
(
[
x
[
0
]
for
x
in
score
[
"meta_info"
][
"input_token_logprobs"
][
num_prompts_tokens
:
]
]
)
print
(
f
"
{
output_logprobs
[
-
10
:]
=
}
"
)
print
(
f
"
{
output_logprobs_score
[
-
10
:]
=
}
"
)
print
(
f
"
{
output_logprobs
[
-
10
:]
=
}
"
)
print
(
f
"
{
output_logprobs_score
[
-
10
:]
=
}
"
)
diff
=
np
.
abs
(
output_logprobs
-
output_logprobs_score
)
max_diff
=
np
.
max
(
diff
)
self
.
assertLess
(
max_diff
,
0.25
)
diff
=
np
.
abs
(
output_logprobs
-
output_logprobs_score
)
max_diff
=
np
.
max
(
diff
)
self
.
assertLess
(
max_diff
,
0.25
5
)
def
test_logprob_mixed
(
self
):
args
=
[]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment