Commit 64b5f20f authored by danyao12's avatar danyao12
Browse files

Bwd Qloop_PT2&Split K in LDS/V in Vgpr

parent e296ee56
...@@ -15,9 +15,11 @@ add_example_executable(example_grouped_multihead_attention_forward_v2 grouped_mu ...@@ -15,9 +15,11 @@ add_example_executable(example_grouped_multihead_attention_forward_v2 grouped_mu
add_example_executable(example_batched_multihead_attention_forward_v2 batched_multihead_attention_forward_v2.cpp) add_example_executable(example_batched_multihead_attention_forward_v2 batched_multihead_attention_forward_v2.cpp)
add_example_executable(example_grouped_multihead_attention_backward_v2 grouped_multihead_attention_backward_v2.cpp) add_example_executable(example_grouped_multihead_attention_backward_v2 grouped_multihead_attention_backward_v2.cpp)
add_example_executable(example_batched_multihead_attention_backward_v2 batched_multihead_attention_backward_v2.cpp) add_example_executable(example_batched_multihead_attention_backward_v2 batched_multihead_attention_backward_v2.cpp)
add_example_executable(example_batched_multihead_attention_backward_v2_protro batched_multihead_attention_backward_v2_protro.cpp)
add_example_executable(example_grouped_multihead_attention_train_v2 grouped_multihead_attention_train_v2.cpp) add_example_executable(example_grouped_multihead_attention_train_v2 grouped_multihead_attention_train_v2.cpp)
add_example_executable(example_batched_multihead_attention_train_v2 batched_multihead_attention_train_v2.cpp) add_example_executable(example_batched_multihead_attention_train_v2 batched_multihead_attention_train_v2.cpp)
add_example_executable(example_batched_multihead_attention_backward_v3 batched_multihead_attention_backward_v3.cpp) add_example_executable(example_batched_multihead_attention_backward_v3 batched_multihead_attention_backward_v3.cpp)
add_example_executable(example_batched_multihead_attention_backward_v3_protro batched_multihead_attention_backward_v3_protro.cpp)
add_example_executable(example_grouped_multihead_attention_backward_v3 grouped_multihead_attention_backward_v3.cpp) add_example_executable(example_grouped_multihead_attention_backward_v3 grouped_multihead_attention_backward_v3.cpp)
add_custom_target(example_gemm_scale_softmax_gemm) add_custom_target(example_gemm_scale_softmax_gemm)
......
...@@ -879,6 +879,18 @@ struct BlockwiseGemmXdlops_v2 ...@@ -879,6 +879,18 @@ struct BlockwiseGemmXdlops_v2
b_thread_copy_.SetSrcCoord(b_origin); b_thread_copy_.SetSrcCoord(b_origin);
} }
template <typename SrcSliceMoveStepIdx>
__device__ void MoveABlockSrcSliceWindow(const SrcSliceMoveStepIdx& src_slice_move_step_idx)
{
a_thread_copy_.MoveSrcSliceWindow(a_block_desc_m0_m1_m2_k, src_slice_move_step_idx);
}
template <typename SrcSliceMoveStepIdx>
__device__ void MoveBBlockSrcSliceWindow(const SrcSliceMoveStepIdx& src_slice_move_step_idx)
{
b_thread_copy_.MoveSrcSliceWindow(b_block_desc_n0_n1_n2_k, src_slice_move_step_idx);
}
// transposed XDL output supporting C_xdl' = B_xdl' * A_xdl' // transposed XDL output supporting C_xdl' = B_xdl' * A_xdl'
__host__ __device__ static constexpr auto GetCThreadDescriptor_M0_N0_M1_N1_M2_N2_N3_N4() __host__ __device__ static constexpr auto GetCThreadDescriptor_M0_N0_M1_N1_M2_N2_N3_N4()
{ {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment