Unverified Commit 8491c794 authored by Liangsheng Yin's avatar Liangsheng Yin Committed by GitHub
Browse files

[misc] depdencies & enviroment flag (#12113)

parent bda3758f
......@@ -81,7 +81,6 @@ modelopt = ["nvidia-modelopt"]
test = [
"accelerate",
"expecttest",
"gguf",
"jsonlines",
"matplotlib",
"pandas",
......
......@@ -231,6 +231,7 @@ class Envs:
SGLANG_TRITON_DECODE_SPLIT_TILE_SIZE = EnvInt(256)
# Overlap Spec V2
SGLANG_ENABLE_SPEC_V2 = EnvBool(False)
SGLANG_ENABLE_OVERLAP_PLAN_STREAM = EnvBool(False)
# VLM
......
......@@ -27,6 +27,7 @@ from typing import Dict, List, Literal, Optional, Union
import orjson
from sglang.srt.connector import ConnectorType
from sglang.srt.environ import envs
from sglang.srt.function_call.function_call_parser import FunctionCallParser
from sglang.srt.lora.lora_registry import LoRARef
from sglang.srt.parser.reasoning_parser import ReasoningParser
......@@ -342,7 +343,6 @@ class ServerArgs:
nsa_decode_backend: str = "fa3"
# Speculative decoding
enable_beta_spec: bool = False
speculative_algorithm: Optional[str] = None
speculative_draft_model_path: Optional[str] = None
speculative_draft_model_revision: Optional[str] = None
......@@ -1431,13 +1431,16 @@ class ServerArgs:
"Max running requests is reset to 48 for speculative decoding. You can override this by explicitly setting --max-running-requests."
)
if self.speculative_algorithm == "EAGLE" and self.enable_beta_spec:
if (
self.speculative_algorithm == "EAGLE"
and envs.SGLANG_ENABLE_SPEC_V2.get()
):
self.disable_overlap_schedule = False
logger.warning(
"Beta spec is enabled for eagle speculative decoding and overlap schedule is turned on."
)
if not self.enable_beta_spec:
if not envs.SGLANG_ENABLE_SPEC_V2.get():
self.disable_overlap_schedule = True
logger.warning(
"Overlap scheduler is disabled because of using eagle3 or standalone speculative decoding."
......@@ -2573,7 +2576,6 @@ class ServerArgs:
)
# Speculative decoding
parser.add_argument("--enable-beta-spec", action="store_true")
parser.add_argument(
"--speculative-algorithm",
type=str,
......
import unittest
from types import SimpleNamespace
from sglang.srt.environ import envs
from sglang.srt.utils import kill_process_tree
from sglang.test.few_shot_gsm8k import run_eval
from sglang.test.kit_matched_stop import MatchedStopMixin
......@@ -29,7 +30,6 @@ class TestEagleServerBase(CustomTestCase, MatchedStopMixin):
def setUpClass(cls):
cls.base_url = DEFAULT_URL_FOR_TEST
launch_args = [
"--enable-beta-spec",
"--trust-remote-code",
"--attention-backend",
cls.attention_backend,
......@@ -53,12 +53,13 @@ class TestEagleServerBase(CustomTestCase, MatchedStopMixin):
*[str(i) for i in range(1, cls.max_running_requests + 1)],
]
launch_args.extend(cls.other_launch_args)
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=launch_args,
)
with envs.SGLANG_ENABLE_SPEC_V2.override(True):
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=launch_args,
)
@classmethod
def tearDownClass(cls):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment