Unverified Commit 7200a21c authored by Wentao Ye's avatar Wentao Ye Committed by GitHub
Browse files

[Bug] Fix Assertion error DeepEP/csrc/kernels/intranode.cu:928: 'false and...


[Bug] Fix Assertion error DeepEP/csrc/kernels/intranode.cu:928: 'false and Unsupported type' (#26532)
Signed-off-by: default avataryewentao256 <zhyanwentao@126.com>
parent 577c72a2
......@@ -336,7 +336,11 @@ class DeepEPHTPrepareAndFinalize(mk.FusedMoEPrepareAndFinalize):
apply_router_weight_on_input=apply_router_weight_on_input,
)
dbo_yield_and_switch_from_compute_to_comm()
assert fused_expert_output.dtype == torch.bfloat16, (
f"Expected fused_expert_output bfloat16, got {fused_expert_output.dtype}"
)
combined_x, _, event = self.buffer.combine(
# HT combine only supports BF16
x=fused_expert_output,
handle=handle,
topk_weights=None,
......
......@@ -984,7 +984,7 @@ class FusedMoEModularKernel(torch.nn.Module):
assert num_chunks == 0
workspace13 = None
workspace2 = None
fused_out = torch.empty_like(a1q)
fused_out = torch.empty_like(a1q, dtype=in_dtype)
else:
assert num_chunks > 0
workspace13, workspace2, fused_out = self._allocate_buffers(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment