"src/git@developer.sourcefind.cn:renzhc/diffusers_dcu.git" did not exist on "279f744ce560818544a018b21c126ce18dc41d86"
Unverified Commit c01a1df5 authored by yilian49's avatar yilian49 Committed by GitHub
Browse files

[Bug] add flashinfer bool check for fusedmoe in Qwen moe models (#7723)

parent 00991723
...@@ -143,6 +143,15 @@ class Qwen2MoeSparseMoeBlock(nn.Module): ...@@ -143,6 +143,15 @@ class Qwen2MoeSparseMoeBlock(nn.Module):
renormalize=config.norm_topk_prob, renormalize=config.norm_topk_prob,
quant_config=quant_config, quant_config=quant_config,
prefix=add_prefix("experts", prefix), prefix=add_prefix("experts", prefix),
# Additional args for FusedMoE
**(
dict(
enable_flashinfer_moe=True,
enable_ep_moe=global_server_args_dict["enable_ep_moe"],
)
if global_server_args_dict["enable_flashinfer_moe"]
else {}
),
) )
self.gate = ReplicatedLinear( self.gate = ReplicatedLinear(
......
...@@ -117,6 +117,15 @@ class Qwen3MoeSparseMoeBlock(nn.Module): ...@@ -117,6 +117,15 @@ class Qwen3MoeSparseMoeBlock(nn.Module):
if global_server_args_dict["enable_deepep_moe"] if global_server_args_dict["enable_deepep_moe"]
else {} else {}
), ),
# Additional args for FusedMoE
**(
dict(
enable_flashinfer_moe=True,
enable_ep_moe=global_server_args_dict["enable_ep_moe"],
)
if global_server_args_dict["enable_flashinfer_moe"]
else {}
),
) )
self.gate = ReplicatedLinear( self.gate = ReplicatedLinear(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment