Unverified Commit de4990a5 authored by Yuhao Yao's avatar Yuhao Yao Committed by GitHub
Browse files

[Bug] Fix w4afp8 moe kernel (#9392)

parent 029e0af3
...@@ -1488,6 +1488,10 @@ struct CollectiveMmaArrayMixedInput< ...@@ -1488,6 +1488,10 @@ struct CollectiveMmaArrayMixedInput<
template <class... TMs> template <class... TMs>
CUTLASS_DEVICE void CUTLASS_DEVICE void
tensormaps_cp_fence_release(TensorMapStorage& shared_tensormaps, cute::tuple<TMs...> const& input_tensormaps) { tensormaps_cp_fence_release(TensorMapStorage& shared_tensormaps, cute::tuple<TMs...> const& input_tensormaps) {
if (cute::elect_one_sync()) {
cute::tma_desc_commit_group();
cute::tma_desc_wait_group();
}
// Entire warp must do this (i.e. it's aligned) // Entire warp must do this (i.e. it's aligned)
tma_descriptor_cp_fence_release(get<0>(input_tensormaps), shared_tensormaps.smem_tensormap_A); tma_descriptor_cp_fence_release(get<0>(input_tensormaps), shared_tensormaps.smem_tensormap_A);
tma_descriptor_cp_fence_release(get<1>(input_tensormaps), shared_tensormaps.smem_tensormap_B); tma_descriptor_cp_fence_release(get<1>(input_tensormaps), shared_tensormaps.smem_tensormap_B);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment