Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
59d7ffc1
Unverified
Commit
59d7ffc1
authored
Sep 13, 2025
by
Michael Goin
Committed by
GitHub
Sep 13, 2025
Browse files
[CI Failure] Fix test_flashinfer_cutlass_mxfp4_mxfp8_fused_moe (#24750)
Signed-off-by:
mgoin
<
mgoin64@gmail.com
>
parent
1da0f144
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
3 additions
and
2 deletions
+3
-2
csrc/attention/mla/sm100_cutlass_mla_kernel.cu
csrc/attention/mla/sm100_cutlass_mla_kernel.cu
+1
-0
tests/kernels/moe/test_mxfp4_moe.py
tests/kernels/moe/test_mxfp4_moe.py
+2
-2
No files found.
csrc/attention/mla/sm100_cutlass_mla_kernel.cu
View file @
59d7ffc1
...
@@ -43,6 +43,7 @@ void sm100_cutlass_mla_decode(
...
@@ -43,6 +43,7 @@ void sm100_cutlass_mla_decode(
torch
::
Tensor
const
&
seq_lens
,
torch
::
Tensor
const
&
seq_lens
,
torch
::
Tensor
const
&
page_table
,
torch
::
Tensor
const
&
page_table
,
torch
::
Tensor
const
&
workspace
,
torch
::
Tensor
const
&
workspace
,
double
sm_scale
,
int64_t
num_kv_splits
)
{
int64_t
num_kv_splits
)
{
TORCH_CHECK
(
false
,
"CUDA version must be >= 12.4 for cutlass_mla_decode"
);
TORCH_CHECK
(
false
,
"CUDA version must be >= 12.4 for cutlass_mla_decode"
);
}
}
...
...
tests/kernels/moe/test_mxfp4_moe.py
View file @
59d7ffc1
...
@@ -771,11 +771,11 @@ def test_flashinfer_cutlass_mxfp4_mxfp8_fused_moe(
...
@@ -771,11 +771,11 @@ def test_flashinfer_cutlass_mxfp4_mxfp8_fused_moe(
w13_ref
=
dequant_mxfp4_batches
(
w13_ref
=
dequant_mxfp4_batches
(
w13_q
.
view
(
torch
.
uint8
),
w13_q
.
view
(
torch
.
uint8
),
w13_scale
.
view
(
torch
.
uint8
).
reshape
(
-
1
)).
to
(
torch
.
float32
).
reshape
(
w13_scale
.
view
(
torch
.
uint8
).
reshape
(
-
1
)).
to
(
torch
.
float32
).
reshape
(
num_experts
,
2
*
intermediate_size
,
hidden_size
)
num_experts
,
2
*
intermediate_size
,
hidden_size
)
.
to
(
device
)
w2_ref
=
dequant_mxfp4_batches
(
w2_ref
=
dequant_mxfp4_batches
(
w2_q
.
view
(
torch
.
uint8
),
w2_q
.
view
(
torch
.
uint8
),
w2_scale
.
view
(
torch
.
uint8
).
reshape
(
-
1
)).
to
(
torch
.
float32
).
reshape
(
w2_scale
.
view
(
torch
.
uint8
).
reshape
(
-
1
)).
to
(
torch
.
float32
).
reshape
(
num_experts
,
hidden_size
,
intermediate_size
)
num_experts
,
hidden_size
,
intermediate_size
)
.
to
(
device
)
# Quantize activations for SM100 path and dequantize for reference
# Quantize activations for SM100 path and dequantize for reference
hidden_states_q
,
hidden_states_sf
=
mxfp8_quantize
(
hidden_states
,
True
,
32
)
hidden_states_q
,
hidden_states_sf
=
mxfp8_quantize
(
hidden_states
,
True
,
32
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment