Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
zhaoyu6
sglang
Commits
6a261aac
"tests/vscode:/vscode.git/clone" did not exist on "d36b3c63d9e62eaba70b66d634eadaa56d85c362"
Unverified
Commit
6a261aac
authored
Oct 02, 2025
by
Lianmin Zheng
Committed by
GitHub
Oct 02, 2025
Browse files
Minor fixes for server_args, parallel_state, and test_deterministic.py (#11159)
parent
7ff740a6
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
17 additions
and
6 deletions
+17
-6
python/sglang/srt/distributed/parallel_state.py
python/sglang/srt/distributed/parallel_state.py
+2
-1
python/sglang/srt/metrics/collector.py
python/sglang/srt/metrics/collector.py
+3
-3
python/sglang/srt/server_args.py
python/sglang/srt/server_args.py
+11
-1
python/sglang/test/test_deterministic.py
python/sglang/test/test_deterministic.py
+1
-1
No files found.
python/sglang/srt/distributed/parallel_state.py
View file @
6a261aac
...
@@ -1085,7 +1085,8 @@ class GroupCoordinator:
...
@@ -1085,7 +1085,8 @@ class GroupCoordinator:
comm_group
=
metadata_group
if
tensor
.
is_cpu
else
group
comm_group
=
metadata_group
if
tensor
.
is_cpu
else
group
work
=
send_func
(
tensor
,
self
.
ranks
[
dst
],
group
=
comm_group
)
work
=
send_func
(
tensor
,
self
.
ranks
[
dst
],
group
=
comm_group
)
p2p_works
.
append
(
P2PWork
(
work
,
tensor
))
if
async_send
:
p2p_works
.
append
(
P2PWork
(
work
,
tensor
))
return
p2p_works
return
p2p_works
def
recv_tensor_dict
(
def
recv_tensor_dict
(
...
...
python/sglang/srt/metrics/collector.py
View file @
6a261aac
...
@@ -809,13 +809,13 @@ class TokenizerMetricsCollector:
...
@@ -809,13 +809,13 @@ class TokenizerMetricsCollector:
def
check_time_to_first_token_straggler
(
self
,
value
:
float
)
->
bool
:
def
check_time_to_first_token_straggler
(
self
,
value
:
float
)
->
bool
:
his
=
self
.
histogram_time_to_first_token
.
labels
(
**
self
.
labels
)
his
=
self
.
histogram_time_to_first_token
.
labels
(
**
self
.
labels
)
total_observations
=
sum
(
bucket
.
_value
for
bucket
in
his
.
_buckets
)
total_observations
=
sum
(
bucket
.
_value
for
bucket
in
his
.
_buckets
)
if
total_observations
<
100
:
if
total_observations
<
100
0
:
return
False
return
False
p99_threshold
=
total_observations
*
0.99
p99
9
_threshold
=
total_observations
*
0.99
9
cumulative_count
=
0
cumulative_count
=
0
for
i
,
bucket
in
enumerate
(
his
.
_buckets
):
for
i
,
bucket
in
enumerate
(
his
.
_buckets
):
cumulative_count
+=
bucket
.
_value
cumulative_count
+=
bucket
.
_value
if
cumulative_count
>
p99_threshold
:
if
cumulative_count
>
p99
9
_threshold
:
return
value
>=
his
.
_upper_bounds
[
i
]
return
value
>=
his
.
_upper_bounds
[
i
]
return
False
return
False
...
...
python/sglang/srt/server_args.py
View file @
6a261aac
...
@@ -116,6 +116,8 @@ GRAMMAR_BACKEND_CHOICES = ["xgrammar", "outlines", "llguidance", "none"]
...
@@ -116,6 +116,8 @@ GRAMMAR_BACKEND_CHOICES = ["xgrammar", "outlines", "llguidance", "none"]
DETERMINISTIC_ATTENTION_BACKEND_CHOICES
=
[
"flashinfer"
,
"fa3"
,
"triton"
]
DETERMINISTIC_ATTENTION_BACKEND_CHOICES
=
[
"flashinfer"
,
"fa3"
,
"triton"
]
RADIX_EVICTION_POLICY_CHOICES
=
[
"lru"
,
"lfu"
]
# Allow external code to add more choices
# Allow external code to add more choices
def
add_load_format_choices
(
choices
):
def
add_load_format_choices
(
choices
):
...
@@ -138,6 +140,14 @@ def add_grammar_backend_choices(choices):
...
@@ -138,6 +140,14 @@ def add_grammar_backend_choices(choices):
GRAMMAR_BACKEND_CHOICES
.
extend
(
choices
)
GRAMMAR_BACKEND_CHOICES
.
extend
(
choices
)
def
add_deterministic_attention_backend_choices
(
choices
):
DETERMINISTIC_ATTENTION_BACKEND_CHOICES
.
extend
(
choices
)
def
add_radix_eviction_policy_choices
(
choices
):
RADIX_EVICTION_POLICY_CHOICES
.
extend
(
choices
)
@
dataclasses
.
dataclass
@
dataclasses
.
dataclass
class
ServerArgs
:
class
ServerArgs
:
# Model and tokenizer
# Model and tokenizer
...
@@ -2243,7 +2253,7 @@ class ServerArgs:
...
@@ -2243,7 +2253,7 @@ class ServerArgs:
parser
.
add_argument
(
parser
.
add_argument
(
"--radix-eviction-policy"
,
"--radix-eviction-policy"
,
type
=
str
,
type
=
str
,
choices
=
[
"lru"
,
"lfu"
]
,
choices
=
RADIX_EVICTION_POLICY_CHOICES
,
default
=
ServerArgs
.
radix_eviction_policy
,
default
=
ServerArgs
.
radix_eviction_policy
,
help
=
"The eviction policy of radix trees. 'lru' stands for Least Recently Used, 'lfu' stands for Least Frequently Used."
,
help
=
"The eviction policy of radix trees. 'lru' stands for Least Recently Used, 'lfu' stands for Least Frequently Used."
,
)
)
...
...
python/sglang/test/test_deterministic.py
View file @
6a261aac
...
@@ -29,7 +29,7 @@ class BenchArgs:
...
@@ -29,7 +29,7 @@ class BenchArgs:
port
:
int
=
30000
port
:
int
=
30000
batch_size
:
int
=
1
batch_size
:
int
=
1
temperature
:
float
=
0.0
temperature
:
float
=
0.0
sampling_seed
:
int
=
None
sampling_seed
:
int
=
42
max_new_tokens
:
int
=
100
max_new_tokens
:
int
=
100
frequency_penalty
:
float
=
0.0
frequency_penalty
:
float
=
0.0
presence_penalty
:
float
=
0.0
presence_penalty
:
float
=
0.0
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment