Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
6b143d62
"tests/test_utils/test_registry.py" did not exist on "1d54ee217e48f637630151286cb944f4c0b92ab3"
Unverified
Commit
6b143d62
authored
Oct 15, 2025
by
Stefan He
Committed by
GitHub
Oct 15, 2025
Browse files
Clean up some Qwen3-Next and deterministic code (#11585)
parent
6bc503af
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
1 addition
and
15 deletions
+1
-15
python/sglang/srt/configs/mamba_utils.py
python/sglang/srt/configs/mamba_utils.py
+1
-1
python/sglang/srt/configs/qwen3_next.py
python/sglang/srt/configs/qwen3_next.py
+0
-3
python/sglang/srt/models/falcon_h1.py
python/sglang/srt/models/falcon_h1.py
+0
-7
python/sglang/test/test_deterministic.py
python/sglang/test/test_deterministic.py
+0
-4
No files found.
python/sglang/srt/configs/mamba_utils.py
View file @
6b143d62
...
...
@@ -70,7 +70,7 @@ class Mamba2StateShape:
# These are not TP-ed as they depend on A, dt_bias, D
# - they are typically small
# e.g.,
(h_heads, head_dim, state_size) = (128, 64
, 128)
# e.g.,
QWen3-Next: (32, 128
, 128)
temporal_state_shape
=
(
divide
(
num_heads
,
tp_world_size
),
head_dim
,
state_size
)
return
Mamba2StateShape
(
conv
=
conv_state_shape
,
...
...
python/sglang/srt/configs/qwen3_next.py
View file @
6b143d62
...
...
@@ -27,12 +27,9 @@ from sglang.srt.layers.dp_attention import get_attention_tp_size
logger
=
logging
.
get_logger
(
__name__
)
# NOTE: HybridLayerType
class
HybridLayerType
(
enum
.
Enum
):
full_attention
=
"attention"
swa_attention
=
"swa_attention"
linear_attention
=
"linear_attention"
mamba2
=
"mamba"
class
Qwen3NextConfig
(
PretrainedConfig
):
...
...
python/sglang/srt/models/falcon_h1.py
View file @
6b143d62
...
...
@@ -450,13 +450,6 @@ class FalconH1Model(nn.Module):
return
hidden_states
class
HybridLayerType
(
enum
.
Enum
):
full_attention
=
"attention"
swa_attention
=
"swa_attention"
linear_attention
=
"linear_attention"
mamba2
=
"mamba"
class
FalconH1ForCausalLM
(
nn
.
Module
):
fall_back_to_pt_during_load
=
False
...
...
python/sglang/test/test_deterministic.py
View file @
6b143d62
...
...
@@ -226,10 +226,6 @@ def send_prefix(args, batch_size: int, prompts: List[str]):
def
test_deterministic
(
args
):
# First do some warmups
for
i
in
range
(
3
):
send_single
(
args
,
16
,
args
.
profile
)
if
args
.
test_mode
==
"single"
:
# In single mode, we test the deterministic behavior by sending the same prompt in batch sizes ranging from 1 to n_trials.
texts
=
[]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment