Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
4034c3d3
Unverified
Commit
4034c3d3
authored
Mar 02, 2026
by
Turner Jabbour
Committed by
GitHub
Mar 02, 2026
Browse files
[Core] Move test utility to test file (#35672)
Signed-off-by:
Turner Jabbour
<
doubleujabbour@gmail.com
>
parent
7560d674
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
13 additions
and
24 deletions
+13
-24
tests/kernels/moe/test_gpt_oss_triton_kernels.py
tests/kernels/moe/test_gpt_oss_triton_kernels.py
+2
-1
tests/kernels/moe/test_modular_oai_triton_moe.py
tests/kernels/moe/test_modular_oai_triton_moe.py
+1
-2
tests/kernels/moe/utils.py
tests/kernels/moe/utils.py
+10
-0
vllm/model_executor/layers/utils.py
vllm/model_executor/layers/utils.py
+0
-21
No files found.
tests/kernels/moe/test_gpt_oss_triton_kernels.py
View file @
4034c3d3
...
@@ -26,9 +26,10 @@ from vllm.model_executor.layers.fused_moe.config import mxfp4_w4a16_moe_quant_co
...
@@ -26,9 +26,10 @@ from vllm.model_executor.layers.fused_moe.config import mxfp4_w4a16_moe_quant_co
from
vllm.model_executor.layers.fused_moe.gpt_oss_triton_kernels_moe
import
(
from
vllm.model_executor.layers.fused_moe.gpt_oss_triton_kernels_moe
import
(
triton_kernel_moe_forward
,
triton_kernel_moe_forward
,
)
)
from
vllm.model_executor.layers.utils
import
shuffle_weight
from
vllm.utils.math_utils
import
round_up
from
vllm.utils.math_utils
import
round_up
from
.utils
import
shuffle_weight
def
deshuffle
(
w
:
torch
.
Tensor
):
def
deshuffle
(
w
:
torch
.
Tensor
):
first
=
w
[...,
::
2
]
first
=
w
[...,
::
2
]
...
...
tests/kernels/moe/test_modular_oai_triton_moe.py
View file @
4034c3d3
...
@@ -33,11 +33,10 @@ from vllm.model_executor.layers.fused_moe.modular_kernel import FusedMoEModularK
...
@@ -33,11 +33,10 @@ from vllm.model_executor.layers.fused_moe.modular_kernel import FusedMoEModularK
from
vllm.model_executor.layers.fused_moe.prepare_finalize
import
(
from
vllm.model_executor.layers.fused_moe.prepare_finalize
import
(
MoEPrepareAndFinalizeNoEP
,
MoEPrepareAndFinalizeNoEP
,
)
)
from
vllm.model_executor.layers.utils
import
shuffle_weight
from
vllm.platforms
import
current_platform
from
vllm.platforms
import
current_platform
from
vllm.utils.torch_utils
import
set_random_seed
from
vllm.utils.torch_utils
import
set_random_seed
from
.utils
import
make_dummy_moe_config
from
.utils
import
make_dummy_moe_config
,
shuffle_weight
MNK
=
[
MNK
=
[
(
1
,
512
,
384
),
(
1
,
512
,
384
),
...
...
tests/kernels/moe/utils.py
View file @
4034c3d3
...
@@ -33,6 +33,16 @@ from vllm.utils.deep_gemm import per_block_cast_to_fp8
...
@@ -33,6 +33,16 @@ from vllm.utils.deep_gemm import per_block_cast_to_fp8
from
vllm.utils.math_utils
import
round_up
from
vllm.utils.math_utils
import
round_up
def
shuffle_weight
(
w
:
torch
.
Tensor
)
->
torch
.
Tensor
:
"""Fold weights to adjacent locations for Triton MoE / SwiGLU kernel layout."""
shape
=
w
.
shape
n
=
shape
[
-
1
]
first
=
w
[...,
:
n
//
2
]
second
=
w
[...,
n
//
2
:]
stacked
=
torch
.
stack
((
first
,
second
),
dim
=-
1
)
return
stacked
.
reshape
(
shape
)
def
make_dummy_moe_config
(
def
make_dummy_moe_config
(
num_experts
:
int
=
1
,
num_experts
:
int
=
1
,
experts_per_token
:
int
=
1
,
experts_per_token
:
int
=
1
,
...
...
vllm/model_executor/layers/utils.py
View file @
4034c3d3
...
@@ -31,27 +31,6 @@ def is_layer_moe_router_gate(prefix: str) -> bool:
...
@@ -31,27 +31,6 @@ def is_layer_moe_router_gate(prefix: str) -> bool:
return
prefix
.
rsplit
(
"."
,
1
)[
-
1
]
in
MOE_LAYER_ROUTER_GATE_SUFFIXES
return
prefix
.
rsplit
(
"."
,
1
)[
-
1
]
in
MOE_LAYER_ROUTER_GATE_SUFFIXES
def
shuffle_weight
(
w
:
torch
.
Tensor
)
->
torch
.
Tensor
:
# Shuffle weight along the last dimension so that
# we folded the weights to adjance location
# Example:
# input:
# [[1, 2, 3, 4, 5, 6],
# [7, 8, 9, 10, 11, 12]]
# output:
# [[1, 4, 2, 5, 3, 6],
# [7, 10, 8, 11, 9, 12]]
# This will be used together with triton swiglu kernel
shape
=
w
.
shape
N
=
shape
[
-
1
]
first
=
w
[...,
:
N
//
2
]
second
=
w
[...,
N
//
2
:]
stacked
=
torch
.
stack
((
first
,
second
),
dim
=-
1
)
w_shuffled
=
stacked
.
reshape
(
shape
)
return
w_shuffled
def
get_token_bin_counts_and_mask
(
def
get_token_bin_counts_and_mask
(
tokens
:
torch
.
Tensor
,
tokens
:
torch
.
Tensor
,
vocab_size
:
int
,
vocab_size
:
int
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment