Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
6a261aac
Unverified
Commit
6a261aac
authored
Oct 02, 2025
by
Lianmin Zheng
Committed by
GitHub
Oct 02, 2025
Browse files
Minor fixes for server_args, parallel_state, and test_deterministic.py (#11159)
parent
7ff740a6
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
17 additions
and
6 deletions
+17
-6
python/sglang/srt/distributed/parallel_state.py
python/sglang/srt/distributed/parallel_state.py
+2
-1
python/sglang/srt/metrics/collector.py
python/sglang/srt/metrics/collector.py
+3
-3
python/sglang/srt/server_args.py
python/sglang/srt/server_args.py
+11
-1
python/sglang/test/test_deterministic.py
python/sglang/test/test_deterministic.py
+1
-1
No files found.
python/sglang/srt/distributed/parallel_state.py
View file @
6a261aac
...
...
@@ -1085,7 +1085,8 @@ class GroupCoordinator:
comm_group
=
metadata_group
if
tensor
.
is_cpu
else
group
work
=
send_func
(
tensor
,
self
.
ranks
[
dst
],
group
=
comm_group
)
p2p_works
.
append
(
P2PWork
(
work
,
tensor
))
if
async_send
:
p2p_works
.
append
(
P2PWork
(
work
,
tensor
))
return
p2p_works
def
recv_tensor_dict
(
...
...
python/sglang/srt/metrics/collector.py
View file @
6a261aac
...
...
@@ -809,13 +809,13 @@ class TokenizerMetricsCollector:
def
check_time_to_first_token_straggler
(
self
,
value
:
float
)
->
bool
:
his
=
self
.
histogram_time_to_first_token
.
labels
(
**
self
.
labels
)
total_observations
=
sum
(
bucket
.
_value
for
bucket
in
his
.
_buckets
)
if
total_observations
<
100
:
if
total_observations
<
100
0
:
return
False
p99_threshold
=
total_observations
*
0.99
p99
9
_threshold
=
total_observations
*
0.99
9
cumulative_count
=
0
for
i
,
bucket
in
enumerate
(
his
.
_buckets
):
cumulative_count
+=
bucket
.
_value
if
cumulative_count
>
p99_threshold
:
if
cumulative_count
>
p99
9
_threshold
:
return
value
>=
his
.
_upper_bounds
[
i
]
return
False
...
...
python/sglang/srt/server_args.py
View file @
6a261aac
...
...
@@ -116,6 +116,8 @@ GRAMMAR_BACKEND_CHOICES = ["xgrammar", "outlines", "llguidance", "none"]
DETERMINISTIC_ATTENTION_BACKEND_CHOICES
=
[
"flashinfer"
,
"fa3"
,
"triton"
]
RADIX_EVICTION_POLICY_CHOICES
=
[
"lru"
,
"lfu"
]
# Allow external code to add more choices
def
add_load_format_choices
(
choices
):
...
...
@@ -138,6 +140,14 @@ def add_grammar_backend_choices(choices):
GRAMMAR_BACKEND_CHOICES
.
extend
(
choices
)
def
add_deterministic_attention_backend_choices
(
choices
):
DETERMINISTIC_ATTENTION_BACKEND_CHOICES
.
extend
(
choices
)
def
add_radix_eviction_policy_choices
(
choices
):
RADIX_EVICTION_POLICY_CHOICES
.
extend
(
choices
)
@
dataclasses
.
dataclass
class
ServerArgs
:
# Model and tokenizer
...
...
@@ -2243,7 +2253,7 @@ class ServerArgs:
parser
.
add_argument
(
"--radix-eviction-policy"
,
type
=
str
,
choices
=
[
"lru"
,
"lfu"
]
,
choices
=
RADIX_EVICTION_POLICY_CHOICES
,
default
=
ServerArgs
.
radix_eviction_policy
,
help
=
"The eviction policy of radix trees. 'lru' stands for Least Recently Used, 'lfu' stands for Least Frequently Used."
,
)
...
...
python/sglang/test/test_deterministic.py
View file @
6a261aac
...
...
@@ -29,7 +29,7 @@ class BenchArgs:
port
:
int
=
30000
batch_size
:
int
=
1
temperature
:
float
=
0.0
sampling_seed
:
int
=
None
sampling_seed
:
int
=
42
max_new_tokens
:
int
=
100
frequency_penalty
:
float
=
0.0
presence_penalty
:
float
=
0.0
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment