"docs/advanced_features/quantization.md" did not exist on "e8e18dcdcca0e6d4eacccd074bea9da2ad6a3e18"
Unverified Commit 24f7cb1e authored by Zhihao Zhang's avatar Zhihao Zhang Committed by GitHub
Browse files

[speculative decoding] rename lookahead to ngram (#11010)


Co-authored-by: default avatara4zhangfei <a4zhangfei@qq.com>
parent e05555fa
...@@ -79,7 +79,7 @@ suites = { ...@@ -79,7 +79,7 @@ suites = {
TestFile("test_hidden_states.py", 55), TestFile("test_hidden_states.py", 55),
TestFile("test_hybrid_attn_backend.py", 100), TestFile("test_hybrid_attn_backend.py", 100),
TestFile("test_standalone_speculative_decoding.py", 250), TestFile("test_standalone_speculative_decoding.py", 250),
TestFile("test_lookahead_speculative_decoding.py", 250), TestFile("test_ngram_speculative_decoding.py", 250),
TestFile("test_input_embeddings.py", 38), TestFile("test_input_embeddings.py", 38),
TestFile("test_io_struct.py", 8), TestFile("test_io_struct.py", 8),
TestFile("test_jinja_template_utils.py", 1), TestFile("test_jinja_template_utils.py", 1),
......
...@@ -7,7 +7,7 @@ import requests ...@@ -7,7 +7,7 @@ import requests
from sglang.srt.utils import kill_process_tree from sglang.srt.utils import kill_process_tree
from sglang.test.few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k from sglang.test.few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k
from sglang.test.test_utils import ( from sglang.test.test_utils import (
DEFAULT_LOOKAHEAD_SPECULATIVE_TARGET_MODEL_FOR_TEST, DEFAULT_NGRAM_SPECULATIVE_TARGET_MODEL_FOR_TEST,
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST, DEFAULT_URL_FOR_TEST,
CustomTestCase, CustomTestCase,
...@@ -23,7 +23,7 @@ DEFAULT_SERVER_ARGS = [ ...@@ -23,7 +23,7 @@ DEFAULT_SERVER_ARGS = [
"--cuda-graph-max-bs", "--cuda-graph-max-bs",
"8", "8",
"--speculative-algorithm", "--speculative-algorithm",
"LOOKAHEAD", "NGRAM",
"--speculative-num-draft-tokens", "--speculative-num-draft-tokens",
"16", "16",
"--mem-fraction-static", "--mem-fraction-static",
...@@ -33,7 +33,7 @@ DEFAULT_SERVER_ARGS = [ ...@@ -33,7 +33,7 @@ DEFAULT_SERVER_ARGS = [
class TestStandaloneSpeculativeDecodingBase(CustomTestCase): class TestStandaloneSpeculativeDecodingBase(CustomTestCase):
model = DEFAULT_LOOKAHEAD_SPECULATIVE_TARGET_MODEL_FOR_TEST model = DEFAULT_NGRAM_SPECULATIVE_TARGET_MODEL_FOR_TEST
base_url = DEFAULT_URL_FOR_TEST base_url = DEFAULT_URL_FOR_TEST
accuracy_threshold = 0.79 # derived tests need to override this accuracy_threshold = 0.79 # derived tests need to override this
spec_decode_threshold = 1.8 # derived spec decoding tests need to override this spec_decode_threshold = 1.8 # derived spec decoding tests need to override this
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment