Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
b498cd21
"torchvision/csrc/vscode:/vscode.git/clone" did not exist on "512ea299d4b2d2bbac3498a75a2d8c0190cfcb39"
Unverified
Commit
b498cd21
authored
Aug 18, 2025
by
fzyzcjy
Committed by
GitHub
Aug 17, 2025
Browse files
Tiny make fp4 moe method parameters more static (#8520)
parent
0fc54b97
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
11 additions
and
6 deletions
+11
-6
python/sglang/srt/layers/quantization/modelopt_quant.py
python/sglang/srt/layers/quantization/modelopt_quant.py
+11
-6
No files found.
python/sglang/srt/layers/quantization/modelopt_quant.py
View file @
b498cd21
...
...
@@ -812,6 +812,11 @@ class ModelOptNvFp4FusedMoEMethod(FusedMoEMethodBase):
)
layer
.
register_parameter
(
"w13_weight_scale"
,
w13_weight_scale
)
# Only use `swizzle_blockscale` for shapes, not for real content
layer
.
w13_blockscale_swizzled
=
Parameter
(
self
.
swizzle_blockscale
(
layer
.
w13_weight_scale
),
requires_grad
=
False
)
w2_weight_scale
=
ModelWeightParameter
(
data
=
torch
.
empty
(
layer
.
num_local_experts
,
...
...
@@ -826,6 +831,10 @@ class ModelOptNvFp4FusedMoEMethod(FusedMoEMethodBase):
)
layer
.
register_parameter
(
"w2_weight_scale"
,
w2_weight_scale
)
layer
.
w2_blockscale_swizzled
=
Parameter
(
self
.
swizzle_blockscale
(
layer
.
w2_weight_scale
),
requires_grad
=
False
)
from
sglang.srt.layers.moe.fused_moe_triton
import
FusedMoeWeightScaleSupported
extra_weight_attrs
.
update
(
...
...
@@ -1129,16 +1138,12 @@ class ModelOptNvFp4FusedMoEMethod(FusedMoEMethodBase):
# Process w13 weights
w13_blockscale_swizzled
=
self
.
swizzle_blockscale
(
layer
.
w13_weight_scale
)
layer
.
w13_blockscale_swizzled
=
Parameter
(
w13_blockscale_swizzled
,
requires_grad
=
False
)
layer
.
w13_blockscale_swizzled
.
data
.
copy_
(
w13_blockscale_swizzled
)
layer
.
w13_weight
=
Parameter
(
layer
.
w13_weight
.
data
,
requires_grad
=
False
)
# Process w2 weights
w2_blockscale_swizzled
=
self
.
swizzle_blockscale
(
layer
.
w2_weight_scale
)
layer
.
w2_blockscale_swizzled
=
Parameter
(
w2_blockscale_swizzled
,
requires_grad
=
False
)
layer
.
w2_blockscale_swizzled
.
data
.
copy_
(
w2_blockscale_swizzled
)
layer
.
w2_weight
=
Parameter
(
layer
.
w2_weight
.
data
,
requires_grad
=
False
)
# Both flashinfer cutlass and regular cutlass use same processing for w2
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment