Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
83262dcb
"docs/vscode:/vscode.git/clone" did not exist on "fd3d5502d4ab458332d110c59370379a7482b4d5"
Unverified
Commit
83262dcb
authored
Aug 12, 2025
by
Jianwei Dong
Committed by
GitHub
Aug 11, 2025
Browse files
Fix mismatch between padded_scales shape and reshape dimensions in modelopt quantization (#8766)
parent
c46c75f8
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
4 additions
and
4 deletions
+4
-4
python/sglang/srt/layers/quantization/modelopt_quant.py
python/sglang/srt/layers/quantization/modelopt_quant.py
+4
-4
No files found.
python/sglang/srt/layers/quantization/modelopt_quant.py
View file @
83262dcb
...
@@ -677,9 +677,9 @@ class ModelOptFp4LinearMethod(LinearMethodBase):
...
@@ -677,9 +677,9 @@ class ModelOptFp4LinearMethod(LinearMethodBase):
padded_scales
=
padded_scales
.
permute
((
0
,
1
,
4
,
3
,
2
,
5
))
padded_scales
=
padded_scales
.
permute
((
0
,
1
,
4
,
3
,
2
,
5
))
padded_scales
=
padded_scales
.
contiguous
().
cuda
()
padded_scales
=
padded_scales
.
contiguous
().
cuda
()
padded_scales
=
(
padded_scales
=
(
padded_scales
.
reshape
(
M
,
K
)
padded_scales
.
reshape
(
M
_padded
,
K_padded
)
if
scale_ndim
==
2
if
scale_ndim
==
2
else
padded_scales
.
reshape
(
B
,
M
,
K
)
else
padded_scales
.
reshape
(
B
,
M
_padded
,
K_padded
)
)
)
layer
.
weight_scale_interleaved
=
Parameter
(
padded_scales
,
requires_grad
=
False
)
layer
.
weight_scale_interleaved
=
Parameter
(
padded_scales
,
requires_grad
=
False
)
...
@@ -878,9 +878,9 @@ class ModelOptNvFp4FusedMoEMethod(FusedMoEMethodBase):
...
@@ -878,9 +878,9 @@ class ModelOptNvFp4FusedMoEMethod(FusedMoEMethodBase):
swizzled_scale
=
padded_scale
.
permute
((
0
,
1
,
4
,
3
,
2
,
5
))
swizzled_scale
=
padded_scale
.
permute
((
0
,
1
,
4
,
3
,
2
,
5
))
swizzled_scale
=
swizzled_scale
.
contiguous
().
cuda
()
swizzled_scale
=
swizzled_scale
.
contiguous
().
cuda
()
return
(
return
(
swizzled_scale
.
reshape
(
M
,
K
)
swizzled_scale
.
reshape
(
M
_padded
,
K_padded
)
if
scale_ndim
==
2
if
scale_ndim
==
2
else
swizzled_scale
.
reshape
(
B
,
M
,
K
)
else
swizzled_scale
.
reshape
(
B
,
M
_padded
,
K_padded
)
)
)
def
prepare_static_weights_for_kernel
(
def
prepare_static_weights_for_kernel
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment