Commit fa5a501f authored by Anthony Chang's avatar Anthony Chang
Browse files

format

parent 33beec37
...@@ -15,7 +15,8 @@ namespace tensor_operation { ...@@ -15,7 +15,8 @@ namespace tensor_operation {
namespace device { namespace device {
// Note: inter-wave loop scheduler is rolled out to c-shuffle version first. Becuase non c-shuffle // Note: inter-wave loop scheduler is rolled out to c-shuffle version first. Becuase non c-shuffle
// version currently has compiler issues with register spill which further causes validation failures. // version currently has compiler issues with register spill which further causes validation
// failures.
template <typename ALayout, template <typename ALayout,
typename BLayout, typename BLayout,
typename CLayout, typename CLayout,
......
...@@ -15,7 +15,8 @@ namespace tensor_operation { ...@@ -15,7 +15,8 @@ namespace tensor_operation {
namespace device { namespace device {
// Note: inter-wave loop scheduler is rolled out to c-shuffle version first. Becuase non c-shuffle // Note: inter-wave loop scheduler is rolled out to c-shuffle version first. Becuase non c-shuffle
// version currently has compiler issues with register spill which further causes validation failures. // version currently has compiler issues with register spill which further causes validation
// failures.
template <typename ALayout, template <typename ALayout,
typename BLayout, typename BLayout,
typename CLayout, typename CLayout,
......
...@@ -276,22 +276,21 @@ struct GridwiseGemmPipelineInterwave_v1<1> ...@@ -276,22 +276,21 @@ struct GridwiseGemmPipelineInterwave_v1<1>
typename BBlockTransferStep, typename BBlockTransferStep,
typename BlockwiseGemm, typename BlockwiseGemm,
typename CThreadBuffer> typename CThreadBuffer>
static __device__ void static __device__ void Run(const AGridDesc& a_grid_desc,
Run(const AGridDesc& a_grid_desc, const ABlockDesc& a_block_desc,
const ABlockDesc& a_block_desc, ABlockTransfer& a_blockwise_copy,
ABlockTransfer& a_blockwise_copy, const AGridBuffer& a_grid_buf,
const AGridBuffer& a_grid_buf, ABlockBuffer& a_block_buf,
ABlockBuffer& a_block_buf, const ABlockTransferStep& a_block_copy_step,
const ABlockTransferStep& a_block_copy_step, const BGridDesc& b_grid_desc,
const BGridDesc& b_grid_desc, const BBlockDesc& b_block_desc,
const BBlockDesc& b_block_desc, BBlockTransfer& b_blockwise_copy,
BBlockTransfer& b_blockwise_copy, const BGridBuffer& b_grid_buf,
const BGridBuffer& b_grid_buf, BBlockBuffer& b_block_buf,
BBlockBuffer& b_block_buf, const BBlockTransferStep& b_block_copy_step,
const BBlockTransferStep& b_block_copy_step, const BlockwiseGemm& blockwise_gemm,
const BlockwiseGemm& blockwise_gemm, CThreadBuffer& c_thread_buf,
CThreadBuffer& c_thread_buf, index_t num_loop)
index_t num_loop)
{ {
// preload data into LDS // preload data into LDS
a_blockwise_copy.RunRead(a_grid_desc, a_grid_buf); a_blockwise_copy.RunRead(a_grid_desc, a_grid_buf);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment