print(f"####################combine x shape:{fused_expert_output.shape} x dtype:{fused_expert_output.dtype}, config:{self._get_combine_config()}, do_async:{do_async}")
combined_x,_,event=self.buffer.combine(
combined_x,_,event=self.buffer.combine(
# HT combine only supports BF16
# HT combine only supports BF16
x=fused_expert_output,
x=fused_expert_output,
...
@@ -356,6 +359,9 @@ class DeepEPHTPrepareAndFinalize(mk.FusedMoEPrepareAndFinalize):
...
@@ -356,6 +359,9 @@ class DeepEPHTPrepareAndFinalize(mk.FusedMoEPrepareAndFinalize):