Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
ba589b88
Unverified
Commit
ba589b88
authored
Jun 13, 2025
by
Lianmin Zheng
Committed by
GitHub
Jun 13, 2025
Browse files
Improve test cases for eagle infer (#7173)
parent
50876abc
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
46 additions
and
34 deletions
+46
-34
test/srt/run_suite.py
test/srt/run_suite.py
+2
-2
test/srt/test_eagle_infer_a.py
test/srt/test_eagle_infer_a.py
+1
-1
test/srt/test_eagle_infer_b.py
test/srt/test_eagle_infer_b.py
+43
-31
No files found.
test/srt/run_suite.py
View file @
ba589b88
...
@@ -31,8 +31,8 @@ suites = {
...
@@ -31,8 +31,8 @@ suites = {
TestFile
(
"test_block_int8.py"
,
22
),
TestFile
(
"test_block_int8.py"
,
22
),
TestFile
(
"test_create_kvindices.py"
,
2
),
TestFile
(
"test_create_kvindices.py"
,
2
),
TestFile
(
"test_chunked_prefill.py"
,
313
),
TestFile
(
"test_chunked_prefill.py"
,
313
),
TestFile
(
"test_eagle_infer_a.py"
,
3
0
0
),
TestFile
(
"test_eagle_infer_a.py"
,
3
7
0
),
TestFile
(
"test_eagle_infer_b.py"
,
30
0
),
TestFile
(
"test_eagle_infer_b.py"
,
27
0
),
TestFile
(
"test_ebnf_constrained.py"
,
108
),
TestFile
(
"test_ebnf_constrained.py"
,
108
),
TestFile
(
"test_enable_thinking.py"
,
70
),
TestFile
(
"test_enable_thinking.py"
,
70
),
TestFile
(
"test_embedding_openai_server.py"
,
141
),
TestFile
(
"test_embedding_openai_server.py"
,
141
),
...
...
test/srt/test_eagle_infer_a.py
View file @
ba589b88
...
@@ -129,7 +129,7 @@ class TestEAGLEEngine(CustomTestCase):
...
@@ -129,7 +129,7 @@ class TestEAGLEEngine(CustomTestCase):
output
[
"meta_info"
][
"completion_tokens"
]
output
[
"meta_info"
][
"completion_tokens"
]
/
output
[
"meta_info"
][
"e2e_latency"
]
/
output
[
"meta_info"
][
"e2e_latency"
]
)
)
print
(
f
"
{
acc_length
=
}
"
)
print
(
f
"
{
acc_length
=
:.
4
f
}
,
{
speed
=
}
"
)
if
engine
.
server_args
.
model_path
==
DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST
:
if
engine
.
server_args
.
model_path
==
DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST
:
self
.
assertGreater
(
acc_length
,
3.6
)
self
.
assertGreater
(
acc_length
,
3.6
)
...
...
test/srt/test_eagle_infer_b.py
View file @
ba589b88
...
@@ -10,7 +10,6 @@ from types import SimpleNamespace
...
@@ -10,7 +10,6 @@ from types import SimpleNamespace
import
numpy
as
np
import
numpy
as
np
import
requests
import
requests
import
torch
from
sglang.srt.utils
import
kill_process_tree
from
sglang.srt.utils
import
kill_process_tree
from
sglang.test.few_shot_gsm8k
import
run_eval
from
sglang.test.few_shot_gsm8k
import
run_eval
...
@@ -24,10 +23,6 @@ from sglang.test.test_utils import (
...
@@ -24,10 +23,6 @@ from sglang.test.test_utils import (
run_logprob_check
,
run_logprob_check
,
)
)
torch_dtype
=
torch
.
float16
prefill_tolerance
=
5e-2
decode_tolerance
:
float
=
5e-2
class
TestEAGLEServer
(
CustomTestCase
):
class
TestEAGLEServer
(
CustomTestCase
):
PROMPTS
=
[
PROMPTS
=
[
...
@@ -202,7 +197,11 @@ class TestEAGLEServer(CustomTestCase):
...
@@ -202,7 +197,11 @@ class TestEAGLEServer(CustomTestCase):
"""Test the output logprobs are close to the input logprobs if we run a prefill again."""
"""Test the output logprobs are close to the input logprobs if we run a prefill again."""
def
run_generate
(
def
run_generate
(
prompt
,
return_logprob
=
False
,
max_new_tokens
=
512
,
logprob_start_len
=-
1
prompt
,
return_logprob
=
False
,
max_new_tokens
=
512
,
logprob_start_len
=-
1
,
temperature
=
1.0
,
):
):
if
isinstance
(
prompt
,
str
):
if
isinstance
(
prompt
,
str
):
...
@@ -215,45 +214,58 @@ class TestEAGLEServer(CustomTestCase):
...
@@ -215,45 +214,58 @@ class TestEAGLEServer(CustomTestCase):
json
=
{
json
=
{
**
prompt_kwargs
,
**
prompt_kwargs
,
"sampling_params"
:
{
"sampling_params"
:
{
"temperature"
:
1.0
,
"temperature"
:
temperature
,
"max_new_tokens"
:
max_new_tokens
,
"max_new_tokens"
:
max_new_tokens
,
"ignore_eos"
:
True
,
"ignore_eos"
:
True
,
},
},
"return_logprob"
:
return_logprob
,
"return_logprob"
:
return_logprob
,
"return_text_in_logprobs"
:
True
,
"return_text_in_logprobs"
:
True
,
"logprob_start_len"
:
logprob_start_len
,
"logprob_start_len"
:
logprob_start_len
,
"temp_scaled_logprobs"
:
True
,
},
},
)
)
return
response
.
json
()
return
response
.
json
()
prompt
=
"I have a very good idea on how to"
prompt
=
"I have a very good idea on how to"
gen
=
run_generate
(
prompt
,
return_logprob
=
True
,
logprob_start_len
=
0
)
for
temperature
in
[
1.0
]:
output_logprobs
=
np
.
array
(
gen
=
run_generate
(
[
x
[
0
]
for
x
in
gen
[
"meta_info"
][
"output_token_logprobs"
]]
prompt
,
)
return_logprob
=
True
,
num_prompts_tokens
=
gen
[
"meta_info"
][
"prompt_tokens"
]
logprob_start_len
=
0
,
temperature
=
temperature
,
input_tokens
=
[
x
[
1
]
for
x
in
gen
[
"meta_info"
][
"input_token_logprobs"
]]
)
output_tokens
=
[
x
[
1
]
for
x
in
gen
[
"meta_info"
][
"output_token_logprobs"
]]
output_logprobs
=
np
.
array
(
[
x
[
0
]
for
x
in
gen
[
"meta_info"
][
"output_token_logprobs"
]]
new_prompt
=
input_tokens
+
output_tokens
)
score
=
run_generate
(
num_prompts_tokens
=
gen
[
"meta_info"
][
"prompt_tokens"
]
new_prompt
,
return_logprob
=
True
,
logprob_start_len
=
0
,
max_new_tokens
=
0
)
input_tokens
=
[
x
[
1
]
for
x
in
gen
[
"meta_info"
][
"input_token_logprobs"
]]
output_logprobs_score
=
np
.
array
(
output_tokens
=
[
x
[
1
]
for
x
in
gen
[
"meta_info"
][
"output_token_logprobs"
]]
[
x
[
0
]
new_prompt
=
input_tokens
+
output_tokens
for
x
in
score
[
"meta_info"
][
"input_token_logprobs"
][
num_prompts_tokens
:]
score
=
run_generate
(
]
new_prompt
,
)
return_logprob
=
True
,
logprob_start_len
=
0
,
max_new_tokens
=
0
,
temperature
=
temperature
,
)
output_logprobs_score
=
np
.
array
(
[
x
[
0
]
for
x
in
score
[
"meta_info"
][
"input_token_logprobs"
][
num_prompts_tokens
:
]
]
)
print
(
f
"
{
output_logprobs
[
-
10
:]
=
}
"
)
print
(
f
"
{
output_logprobs
[
-
10
:]
=
}
"
)
print
(
f
"
{
output_logprobs_score
[
-
10
:]
=
}
"
)
print
(
f
"
{
output_logprobs_score
[
-
10
:]
=
}
"
)
diff
=
np
.
abs
(
output_logprobs
-
output_logprobs_score
)
diff
=
np
.
abs
(
output_logprobs
-
output_logprobs_score
)
max_diff
=
np
.
max
(
diff
)
max_diff
=
np
.
max
(
diff
)
self
.
assertLess
(
max_diff
,
0.25
)
self
.
assertLess
(
max_diff
,
0.25
5
)
def
test_logprob_mixed
(
self
):
def
test_logprob_mixed
(
self
):
args
=
[]
args
=
[]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment