Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
8491c794
Unverified
Commit
8491c794
authored
Oct 26, 2025
by
Liangsheng Yin
Committed by
GitHub
Oct 26, 2025
Browse files
[misc] depdencies & enviroment flag (#12113)
parent
bda3758f
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
15 additions
and
12 deletions
+15
-12
python/pyproject.toml
python/pyproject.toml
+0
-1
python/sglang/srt/environ.py
python/sglang/srt/environ.py
+1
-0
python/sglang/srt/server_args.py
python/sglang/srt/server_args.py
+6
-4
test/srt/test_eagle_infer_beta.py
test/srt/test_eagle_infer_beta.py
+8
-7
No files found.
python/pyproject.toml
View file @
8491c794
...
@@ -81,7 +81,6 @@ modelopt = ["nvidia-modelopt"]
...
@@ -81,7 +81,6 @@ modelopt = ["nvidia-modelopt"]
test
=
[
test
=
[
"accelerate"
,
"accelerate"
,
"expecttest"
,
"expecttest"
,
"gguf"
,
"jsonlines"
,
"jsonlines"
,
"matplotlib"
,
"matplotlib"
,
"pandas"
,
"pandas"
,
...
...
python/sglang/srt/environ.py
View file @
8491c794
...
@@ -231,6 +231,7 @@ class Envs:
...
@@ -231,6 +231,7 @@ class Envs:
SGLANG_TRITON_DECODE_SPLIT_TILE_SIZE
=
EnvInt
(
256
)
SGLANG_TRITON_DECODE_SPLIT_TILE_SIZE
=
EnvInt
(
256
)
# Overlap Spec V2
# Overlap Spec V2
SGLANG_ENABLE_SPEC_V2
=
EnvBool
(
False
)
SGLANG_ENABLE_OVERLAP_PLAN_STREAM
=
EnvBool
(
False
)
SGLANG_ENABLE_OVERLAP_PLAN_STREAM
=
EnvBool
(
False
)
# VLM
# VLM
...
...
python/sglang/srt/server_args.py
View file @
8491c794
...
@@ -27,6 +27,7 @@ from typing import Dict, List, Literal, Optional, Union
...
@@ -27,6 +27,7 @@ from typing import Dict, List, Literal, Optional, Union
import
orjson
import
orjson
from
sglang.srt.connector
import
ConnectorType
from
sglang.srt.connector
import
ConnectorType
from
sglang.srt.environ
import
envs
from
sglang.srt.function_call.function_call_parser
import
FunctionCallParser
from
sglang.srt.function_call.function_call_parser
import
FunctionCallParser
from
sglang.srt.lora.lora_registry
import
LoRARef
from
sglang.srt.lora.lora_registry
import
LoRARef
from
sglang.srt.parser.reasoning_parser
import
ReasoningParser
from
sglang.srt.parser.reasoning_parser
import
ReasoningParser
...
@@ -342,7 +343,6 @@ class ServerArgs:
...
@@ -342,7 +343,6 @@ class ServerArgs:
nsa_decode_backend
:
str
=
"fa3"
nsa_decode_backend
:
str
=
"fa3"
# Speculative decoding
# Speculative decoding
enable_beta_spec
:
bool
=
False
speculative_algorithm
:
Optional
[
str
]
=
None
speculative_algorithm
:
Optional
[
str
]
=
None
speculative_draft_model_path
:
Optional
[
str
]
=
None
speculative_draft_model_path
:
Optional
[
str
]
=
None
speculative_draft_model_revision
:
Optional
[
str
]
=
None
speculative_draft_model_revision
:
Optional
[
str
]
=
None
...
@@ -1431,13 +1431,16 @@ class ServerArgs:
...
@@ -1431,13 +1431,16 @@ class ServerArgs:
"Max running requests is reset to 48 for speculative decoding. You can override this by explicitly setting --max-running-requests."
"Max running requests is reset to 48 for speculative decoding. You can override this by explicitly setting --max-running-requests."
)
)
if
self
.
speculative_algorithm
==
"EAGLE"
and
self
.
enable_beta_spec
:
if
(
self
.
speculative_algorithm
==
"EAGLE"
and
envs
.
SGLANG_ENABLE_SPEC_V2
.
get
()
):
self
.
disable_overlap_schedule
=
False
self
.
disable_overlap_schedule
=
False
logger
.
warning
(
logger
.
warning
(
"Beta spec is enabled for eagle speculative decoding and overlap schedule is turned on."
"Beta spec is enabled for eagle speculative decoding and overlap schedule is turned on."
)
)
if
not
self
.
enable_beta_spec
:
if
not
envs
.
SGLANG_ENABLE_SPEC_V2
.
get
()
:
self
.
disable_overlap_schedule
=
True
self
.
disable_overlap_schedule
=
True
logger
.
warning
(
logger
.
warning
(
"Overlap scheduler is disabled because of using eagle3 or standalone speculative decoding."
"Overlap scheduler is disabled because of using eagle3 or standalone speculative decoding."
...
@@ -2573,7 +2576,6 @@ class ServerArgs:
...
@@ -2573,7 +2576,6 @@ class ServerArgs:
)
)
# Speculative decoding
# Speculative decoding
parser
.
add_argument
(
"--enable-beta-spec"
,
action
=
"store_true"
)
parser
.
add_argument
(
parser
.
add_argument
(
"--speculative-algorithm"
,
"--speculative-algorithm"
,
type
=
str
,
type
=
str
,
...
...
test/srt/test_eagle_infer_beta.py
View file @
8491c794
import
unittest
import
unittest
from
types
import
SimpleNamespace
from
types
import
SimpleNamespace
from
sglang.srt.environ
import
envs
from
sglang.srt.utils
import
kill_process_tree
from
sglang.srt.utils
import
kill_process_tree
from
sglang.test.few_shot_gsm8k
import
run_eval
from
sglang.test.few_shot_gsm8k
import
run_eval
from
sglang.test.kit_matched_stop
import
MatchedStopMixin
from
sglang.test.kit_matched_stop
import
MatchedStopMixin
...
@@ -29,7 +30,6 @@ class TestEagleServerBase(CustomTestCase, MatchedStopMixin):
...
@@ -29,7 +30,6 @@ class TestEagleServerBase(CustomTestCase, MatchedStopMixin):
def
setUpClass
(
cls
):
def
setUpClass
(
cls
):
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
launch_args
=
[
launch_args
=
[
"--enable-beta-spec"
,
"--trust-remote-code"
,
"--trust-remote-code"
,
"--attention-backend"
,
"--attention-backend"
,
cls
.
attention_backend
,
cls
.
attention_backend
,
...
@@ -53,12 +53,13 @@ class TestEagleServerBase(CustomTestCase, MatchedStopMixin):
...
@@ -53,12 +53,13 @@ class TestEagleServerBase(CustomTestCase, MatchedStopMixin):
*
[
str
(
i
)
for
i
in
range
(
1
,
cls
.
max_running_requests
+
1
)],
*
[
str
(
i
)
for
i
in
range
(
1
,
cls
.
max_running_requests
+
1
)],
]
]
launch_args
.
extend
(
cls
.
other_launch_args
)
launch_args
.
extend
(
cls
.
other_launch_args
)
cls
.
process
=
popen_launch_server
(
with
envs
.
SGLANG_ENABLE_SPEC_V2
.
override
(
True
):
cls
.
model
,
cls
.
process
=
popen_launch_server
(
cls
.
base_url
,
cls
.
model
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
cls
.
base_url
,
other_args
=
launch_args
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
)
other_args
=
launch_args
,
)
@
classmethod
@
classmethod
def
tearDownClass
(
cls
):
def
tearDownClass
(
cls
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment