Unverified Commit ba589b88 authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

Improve test cases for eagle infer (#7173)

parent 50876abc
...@@ -31,8 +31,8 @@ suites = { ...@@ -31,8 +31,8 @@ suites = {
TestFile("test_block_int8.py", 22), TestFile("test_block_int8.py", 22),
TestFile("test_create_kvindices.py", 2), TestFile("test_create_kvindices.py", 2),
TestFile("test_chunked_prefill.py", 313), TestFile("test_chunked_prefill.py", 313),
TestFile("test_eagle_infer_a.py", 300), TestFile("test_eagle_infer_a.py", 370),
TestFile("test_eagle_infer_b.py", 300), TestFile("test_eagle_infer_b.py", 270),
TestFile("test_ebnf_constrained.py", 108), TestFile("test_ebnf_constrained.py", 108),
TestFile("test_enable_thinking.py", 70), TestFile("test_enable_thinking.py", 70),
TestFile("test_embedding_openai_server.py", 141), TestFile("test_embedding_openai_server.py", 141),
......
...@@ -129,7 +129,7 @@ class TestEAGLEEngine(CustomTestCase): ...@@ -129,7 +129,7 @@ class TestEAGLEEngine(CustomTestCase):
output["meta_info"]["completion_tokens"] output["meta_info"]["completion_tokens"]
/ output["meta_info"]["e2e_latency"] / output["meta_info"]["e2e_latency"]
) )
print(f"{acc_length=}") print(f"{acc_length=:.4f}, {speed=}")
if engine.server_args.model_path == DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST: if engine.server_args.model_path == DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST:
self.assertGreater(acc_length, 3.6) self.assertGreater(acc_length, 3.6)
......
...@@ -10,7 +10,6 @@ from types import SimpleNamespace ...@@ -10,7 +10,6 @@ from types import SimpleNamespace
import numpy as np import numpy as np
import requests import requests
import torch
from sglang.srt.utils import kill_process_tree from sglang.srt.utils import kill_process_tree
from sglang.test.few_shot_gsm8k import run_eval from sglang.test.few_shot_gsm8k import run_eval
...@@ -24,10 +23,6 @@ from sglang.test.test_utils import ( ...@@ -24,10 +23,6 @@ from sglang.test.test_utils import (
run_logprob_check, run_logprob_check,
) )
torch_dtype = torch.float16
prefill_tolerance = 5e-2
decode_tolerance: float = 5e-2
class TestEAGLEServer(CustomTestCase): class TestEAGLEServer(CustomTestCase):
PROMPTS = [ PROMPTS = [
...@@ -202,7 +197,11 @@ class TestEAGLEServer(CustomTestCase): ...@@ -202,7 +197,11 @@ class TestEAGLEServer(CustomTestCase):
"""Test the output logprobs are close to the input logprobs if we run a prefill again.""" """Test the output logprobs are close to the input logprobs if we run a prefill again."""
def run_generate( def run_generate(
prompt, return_logprob=False, max_new_tokens=512, logprob_start_len=-1 prompt,
return_logprob=False,
max_new_tokens=512,
logprob_start_len=-1,
temperature=1.0,
): ):
if isinstance(prompt, str): if isinstance(prompt, str):
...@@ -215,20 +214,27 @@ class TestEAGLEServer(CustomTestCase): ...@@ -215,20 +214,27 @@ class TestEAGLEServer(CustomTestCase):
json={ json={
**prompt_kwargs, **prompt_kwargs,
"sampling_params": { "sampling_params": {
"temperature": 1.0, "temperature": temperature,
"max_new_tokens": max_new_tokens, "max_new_tokens": max_new_tokens,
"ignore_eos": True, "ignore_eos": True,
}, },
"return_logprob": return_logprob, "return_logprob": return_logprob,
"return_text_in_logprobs": True, "return_text_in_logprobs": True,
"logprob_start_len": logprob_start_len, "logprob_start_len": logprob_start_len,
"temp_scaled_logprobs": True,
}, },
) )
return response.json() return response.json()
prompt = "I have a very good idea on how to" prompt = "I have a very good idea on how to"
gen = run_generate(prompt, return_logprob=True, logprob_start_len=0) for temperature in [1.0]:
gen = run_generate(
prompt,
return_logprob=True,
logprob_start_len=0,
temperature=temperature,
)
output_logprobs = np.array( output_logprobs = np.array(
[x[0] for x in gen["meta_info"]["output_token_logprobs"]] [x[0] for x in gen["meta_info"]["output_token_logprobs"]]
) )
...@@ -239,12 +245,18 @@ class TestEAGLEServer(CustomTestCase): ...@@ -239,12 +245,18 @@ class TestEAGLEServer(CustomTestCase):
new_prompt = input_tokens + output_tokens new_prompt = input_tokens + output_tokens
score = run_generate( score = run_generate(
new_prompt, return_logprob=True, logprob_start_len=0, max_new_tokens=0 new_prompt,
return_logprob=True,
logprob_start_len=0,
max_new_tokens=0,
temperature=temperature,
) )
output_logprobs_score = np.array( output_logprobs_score = np.array(
[ [
x[0] x[0]
for x in score["meta_info"]["input_token_logprobs"][num_prompts_tokens:] for x in score["meta_info"]["input_token_logprobs"][
num_prompts_tokens:
]
] ]
) )
...@@ -253,7 +265,7 @@ class TestEAGLEServer(CustomTestCase): ...@@ -253,7 +265,7 @@ class TestEAGLEServer(CustomTestCase):
diff = np.abs(output_logprobs - output_logprobs_score) diff = np.abs(output_logprobs - output_logprobs_score)
max_diff = np.max(diff) max_diff = np.max(diff)
self.assertLess(max_diff, 0.25) self.assertLess(max_diff, 0.255)
def test_logprob_mixed(self): def test_logprob_mixed(self):
args = [] args = []
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment