Unverified Commit 24f7cb1e authored by Zhihao Zhang's avatar Zhihao Zhang Committed by GitHub
Browse files

[speculative decoding] rename lookahead to ngram (#11010)


Co-authored-by: default avatara4zhangfei <a4zhangfei@qq.com>
parent e05555fa
......@@ -79,7 +79,7 @@ suites = {
TestFile("test_hidden_states.py", 55),
TestFile("test_hybrid_attn_backend.py", 100),
TestFile("test_standalone_speculative_decoding.py", 250),
TestFile("test_lookahead_speculative_decoding.py", 250),
TestFile("test_ngram_speculative_decoding.py", 250),
TestFile("test_input_embeddings.py", 38),
TestFile("test_io_struct.py", 8),
TestFile("test_jinja_template_utils.py", 1),
......
......@@ -7,7 +7,7 @@ import requests
from sglang.srt.utils import kill_process_tree
from sglang.test.few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k
from sglang.test.test_utils import (
DEFAULT_LOOKAHEAD_SPECULATIVE_TARGET_MODEL_FOR_TEST,
DEFAULT_NGRAM_SPECULATIVE_TARGET_MODEL_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
CustomTestCase,
......@@ -23,7 +23,7 @@ DEFAULT_SERVER_ARGS = [
"--cuda-graph-max-bs",
"8",
"--speculative-algorithm",
"LOOKAHEAD",
"NGRAM",
"--speculative-num-draft-tokens",
"16",
"--mem-fraction-static",
......@@ -33,7 +33,7 @@ DEFAULT_SERVER_ARGS = [
class TestStandaloneSpeculativeDecodingBase(CustomTestCase):
model = DEFAULT_LOOKAHEAD_SPECULATIVE_TARGET_MODEL_FOR_TEST
model = DEFAULT_NGRAM_SPECULATIVE_TARGET_MODEL_FOR_TEST
base_url = DEFAULT_URL_FOR_TEST
accuracy_threshold = 0.79 # derived tests need to override this
spec_decode_threshold = 1.8 # derived spec decoding tests need to override this
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment