Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
0be9516e
Unverified
Commit
0be9516e
authored
Apr 07, 2026
by
Wei Zhao
Committed by
GitHub
Apr 07, 2026
Browse files
[Bug] Fix Trtllm Fp8 MoE Weight Shuffle Memory Fragamentation (#39054)
Signed-off-by:
wzhao18
<
wzhao18.sz@gmail.com
>
parent
7b9de7c8
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
12 additions
and
9 deletions
+12
-9
vllm/model_executor/layers/quantization/utils/flashinfer_utils.py
...el_executor/layers/quantization/utils/flashinfer_utils.py
+12
-9
No files found.
vllm/model_executor/layers/quantization/utils/flashinfer_utils.py
View file @
0be9516e
...
...
@@ -322,20 +322,23 @@ def _shuffle_deepseek_fp8_moe_weights(
block_k
=
128
num_experts
=
w13
.
shape
[
0
]
w13_shuffled
:
list
[
torch
.
Tensor
]
=
[]
w2_shuffled
:
list
[
torch
.
Tensor
]
=
[]
M13
,
K13
=
w13
.
shape
[
1
],
w13
.
shape
[
2
]
M2
,
K2
=
w2
.
shape
[
1
],
w2
.
shape
[
2
]
w13_out
=
torch
.
empty
(
num_experts
,
K13
//
block_k
,
M13
,
block_k
,
dtype
=
torch
.
uint8
,
device
=
w13
.
device
)
w2_out
=
torch
.
empty
(
num_experts
,
K2
//
block_k
,
M2
,
block_k
,
dtype
=
torch
.
uint8
,
device
=
w2
.
device
)
for
i
in
range
(
num_experts
):
t13
=
shuffle_matrix_a
(
w13
[
i
].
view
(
torch
.
uint8
),
epilogue_tile_m
)
t13
=
convert_to_block_layout
(
t13
,
block_k
)
w13_shuffled
.
append
(
t13
)
w13_out
[
i
]
=
convert_to_block_layout
(
t13
,
block_k
)
t2
=
shuffle_matrix_a
(
w2
[
i
].
view
(
torch
.
uint8
),
epilogue_tile_m
)
t2
=
convert_to_block_layout
(
t2
,
block_k
)
w2_shuffled
.
append
(
t2
)
w2_out
[
i
]
=
convert_to_block_layout
(
t2
,
block_k
)
w13_out
=
torch
.
stack
(
w13_shuffled
).
view
(
torch
.
float8_e4m3fn
)
w2_out
=
torch
.
stack
(
w2_shuffled
).
view
(
torch
.
float8_e4m3fn
)
return
w13_out
,
w2_out
return
w13_out
.
view
(
torch
.
float8_e4m3fn
),
w2_out
.
view
(
torch
.
float8_e4m3fn
)
def
_shuffle_mxfp8_moe_weights
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment