Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
327a02d8
Unverified
Commit
327a02d8
authored
Jan 18, 2026
by
bnellnm
Committed by
GitHub
Jan 18, 2026
Browse files
[MoE Refactor] Separate Router into OO Classes (#30623)
Signed-off-by:
Bill Nell
<
bnell@redhat.com
>
parent
2f03035a
Changes
45
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
9 additions
and
6 deletions
+9
-6
vllm/model_executor/layers/quantization/mxfp4.py
vllm/model_executor/layers/quantization/mxfp4.py
+4
-4
vllm/model_executor/layers/quantization/quark/quark_moe.py
vllm/model_executor/layers/quantization/quark/quark_moe.py
+1
-1
vllm/model_executor/layers/quantization/rtn.py
vllm/model_executor/layers/quantization/rtn.py
+1
-1
vllm/model_executor/models/ernie45_moe.py
vllm/model_executor/models/ernie45_moe.py
+1
-0
vllm/model_executor/models/ernie45_vl_moe.py
vllm/model_executor/models/ernie45_vl_moe.py
+2
-0
No files found.
vllm/model_executor/layers/quantization/mxfp4.py
View file @
327a02d8
...
@@ -14,6 +14,7 @@ from vllm.model_executor.layers.fused_moe import (
...
@@ -14,6 +14,7 @@ from vllm.model_executor.layers.fused_moe import (
FusedMoE
,
FusedMoE
,
FusedMoEConfig
,
FusedMoEConfig
,
FusedMoEMethodBase
,
FusedMoEMethodBase
,
FusedMoERouter
,
)
)
from
vllm.model_executor.layers.fused_moe
import
modular_kernel
as
mk
from
vllm.model_executor.layers.fused_moe
import
modular_kernel
as
mk
from
vllm.model_executor.layers.fused_moe.config
import
(
from
vllm.model_executor.layers.fused_moe.config
import
(
...
@@ -27,7 +28,6 @@ from vllm.model_executor.layers.fused_moe.fused_marlin_moe import (
...
@@ -27,7 +28,6 @@ from vllm.model_executor.layers.fused_moe.fused_marlin_moe import (
MarlinExperts
,
MarlinExperts
,
fused_marlin_moe
,
fused_marlin_moe
,
)
)
from
vllm.model_executor.layers.fused_moe.fused_moe_router
import
FusedMoERouter
from
vllm.model_executor.layers.fused_moe.gpt_oss_triton_kernels_moe
import
(
from
vllm.model_executor.layers.fused_moe.gpt_oss_triton_kernels_moe
import
(
OAITritonExperts
,
OAITritonExperts
,
UnfusedOAITritonExperts
,
UnfusedOAITritonExperts
,
...
@@ -936,9 +936,9 @@ class Mxfp4MoEMethod(FusedMoEMethodBase):
...
@@ -936,9 +936,9 @@ class Mxfp4MoEMethod(FusedMoEMethodBase):
layer
.
apply_router_weight_on_input
,
layer
.
apply_router_weight_on_input
,
layer
.
scoring_func
,
layer
.
scoring_func
,
layer
.
activation
,
layer
.
activation
,
layer
.
expert_load_view
,
layer
.
eplb_state
.
expert_load_view
,
layer
.
logical_to_physical_map
,
layer
.
eplb_state
.
logical_to_physical_map
,
layer
.
logical_replica_count
,
layer
.
eplb_state
.
logical_replica_count
,
),
"MXFP4 are not supported with this configuration."
),
"MXFP4 are not supported with this configuration."
if
(
if
(
...
...
vllm/model_executor/layers/quantization/quark/quark_moe.py
View file @
327a02d8
...
@@ -548,7 +548,7 @@ class QuarkW4A8Fp8MoEMethod(QuarkMoEMethod):
...
@@ -548,7 +548,7 @@ class QuarkW4A8Fp8MoEMethod(QuarkMoEMethod):
x
:
torch
.
Tensor
,
x
:
torch
.
Tensor
,
router_logits
:
torch
.
Tensor
,
router_logits
:
torch
.
Tensor
,
)
->
torch
.
Tensor
|
tuple
[
torch
.
Tensor
,
torch
.
Tensor
]:
)
->
torch
.
Tensor
|
tuple
[
torch
.
Tensor
,
torch
.
Tensor
]:
topk_weights
,
topk_ids
=
lay
er
.
select_experts
(
topk_weights
,
topk_ids
=
rout
er
.
select_experts
(
hidden_states
=
x
,
hidden_states
=
x
,
router_logits
=
router_logits
,
router_logits
=
router_logits
,
)
)
...
...
vllm/model_executor/layers/quantization/rtn.py
View file @
327a02d8
...
@@ -10,12 +10,12 @@ import torch
...
@@ -10,12 +10,12 @@ import torch
from
torch.nn.parameter
import
Parameter
from
torch.nn.parameter
import
Parameter
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.fused_moe
import
FusedMoERouter
from
vllm.model_executor.layers.fused_moe.config
import
(
from
vllm.model_executor.layers.fused_moe.config
import
(
FusedMoEConfig
,
FusedMoEConfig
,
FusedMoEQuantConfig
,
FusedMoEQuantConfig
,
)
)
from
vllm.model_executor.layers.fused_moe.fused_marlin_moe
import
fused_marlin_moe
from
vllm.model_executor.layers.fused_moe.fused_marlin_moe
import
fused_marlin_moe
from
vllm.model_executor.layers.fused_moe.fused_moe_router
import
FusedMoERouter
from
vllm.model_executor.layers.fused_moe.layer
import
(
from
vllm.model_executor.layers.fused_moe.layer
import
(
FusedMoE
,
FusedMoE
,
FusedMoEMethodBase
,
FusedMoEMethodBase
,
...
...
vllm/model_executor/models/ernie45_moe.py
View file @
327a02d8
...
@@ -201,6 +201,7 @@ class Ernie4_5_MoeMoE(nn.Module):
...
@@ -201,6 +201,7 @@ class Ernie4_5_MoeMoE(nn.Module):
e_score_correction_bias
=
self
.
gate
.
e_score_correction_bias
,
e_score_correction_bias
=
self
.
gate
.
e_score_correction_bias
,
enable_eplb
=
self
.
enable_eplb
,
enable_eplb
=
self
.
enable_eplb
,
num_redundant_experts
=
self
.
n_redundant_experts
,
num_redundant_experts
=
self
.
n_redundant_experts
,
router_logits_dtype
=
torch
.
float32
,
)
)
def
forward
(
self
,
hidden_states
:
torch
.
Tensor
)
->
torch
.
Tensor
:
def
forward
(
self
,
hidden_states
:
torch
.
Tensor
)
->
torch
.
Tensor
:
...
...
vllm/model_executor/models/ernie45_vl_moe.py
View file @
327a02d8
...
@@ -269,6 +269,7 @@ class Ernie4_5_VLMoeMoE(nn.Module):
...
@@ -269,6 +269,7 @@ class Ernie4_5_VLMoeMoE(nn.Module):
quant_config
=
quant_config
,
quant_config
=
quant_config
,
e_score_correction_bias
=
self
.
e_score_correction_bias
[
0
],
e_score_correction_bias
=
self
.
e_score_correction_bias
[
0
],
prefix
=
f
"
{
prefix
}
.text_experts"
,
prefix
=
f
"
{
prefix
}
.text_experts"
,
router_logits_dtype
=
torch
.
float32
,
)
)
else
:
else
:
self
.
text_experts
=
Ernie4_5_VLMoeMLP
(
self
.
text_experts
=
Ernie4_5_VLMoeMLP
(
...
@@ -306,6 +307,7 @@ class Ernie4_5_VLMoeMoE(nn.Module):
...
@@ -306,6 +307,7 @@ class Ernie4_5_VLMoeMoE(nn.Module):
quant_config
=
quant_config
,
quant_config
=
quant_config
,
e_score_correction_bias
=
self
.
e_score_correction_bias
[
1
],
e_score_correction_bias
=
self
.
e_score_correction_bias
[
1
],
prefix
=
f
"
{
prefix
}
.vision_experts"
,
prefix
=
f
"
{
prefix
}
.vision_experts"
,
router_logits_dtype
=
torch
.
float32
,
)
)
else
:
else
:
self
.
vision_experts
=
Ernie4_5_VLMoeMLP
(
self
.
vision_experts
=
Ernie4_5_VLMoeMLP
(
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment