Unverified Commit c01a1df5 authored by yilian49's avatar yilian49 Committed by GitHub
Browse files

[Bug] add flashinfer bool check for fusedmoe in Qwen moe models (#7723)

parent 00991723
......@@ -143,6 +143,15 @@ class Qwen2MoeSparseMoeBlock(nn.Module):
renormalize=config.norm_topk_prob,
quant_config=quant_config,
prefix=add_prefix("experts", prefix),
# Additional args for FusedMoE
**(
dict(
enable_flashinfer_moe=True,
enable_ep_moe=global_server_args_dict["enable_ep_moe"],
)
if global_server_args_dict["enable_flashinfer_moe"]
else {}
),
)
self.gate = ReplicatedLinear(
......
......@@ -117,6 +117,15 @@ class Qwen3MoeSparseMoeBlock(nn.Module):
if global_server_args_dict["enable_deepep_moe"]
else {}
),
# Additional args for FusedMoE
**(
dict(
enable_flashinfer_moe=True,
enable_ep_moe=global_server_args_dict["enable_ep_moe"],
)
if global_server_args_dict["enable_flashinfer_moe"]
else {}
),
)
self.gate = ReplicatedLinear(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment