Commit 9d78a6c5 authored by danyao12's avatar danyao12
Browse files

comments

parent 42a7240a
...@@ -750,7 +750,7 @@ struct BlockFmhaBwdDQDKDVPipelineKRKTRVR ...@@ -750,7 +750,7 @@ struct BlockFmhaBwdDQDKDVPipelineKRKTRVR
HotLoopScheduler::template GemmStagedScheduler<3>(); HotLoopScheduler::template GemmStagedScheduler<3>();
__builtin_amdgcn_sched_barrier(0); __builtin_amdgcn_sched_barrier(0);
// STAGE7 SGrad@K^T // STAGE7 SGrad@K^T Gemm4
auto dq_acc = QGradBlockTileType{}; auto dq_acc = QGradBlockTileType{};
clear_tile(dq_acc); clear_tile(dq_acc);
...@@ -806,6 +806,7 @@ struct BlockFmhaBwdDQDKDVPipelineKRKTRVR ...@@ -806,6 +806,7 @@ struct BlockFmhaBwdDQDKDVPipelineKRKTRVR
auto st_acc = SPTBlockTileType{}; auto st_acc = SPTBlockTileType{};
clear_tile(st_acc); clear_tile(st_acc);
// STAGE 1, Q@K Gemm0
gemm_0(st_acc, q_reg_tensor, k_reg_tensor); gemm_0(st_acc, q_reg_tensor, k_reg_tensor);
// STAGE 2, Scale, Add bias, Mask, Softmax, Dropout // STAGE 2, Scale, Add bias, Mask, Softmax, Dropout
...@@ -980,7 +981,6 @@ struct BlockFmhaBwdDQDKDVPipelineKRKTRVR ...@@ -980,7 +981,6 @@ struct BlockFmhaBwdDQDKDVPipelineKRKTRVR
HotLoopScheduler::template GemmStagedScheduler<3>(); HotLoopScheduler::template GemmStagedScheduler<3>();
// STAGE 7, SGrad@K^T Gemm4 // STAGE 7, SGrad@K^T Gemm4
auto dq_acc = QGradBlockTileType{}; auto dq_acc = QGradBlockTileType{};
clear_tile(dq_acc); clear_tile(dq_acc);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment