Commit 3f032f25 authored by Anthony Chang's avatar Anthony Chang
Browse files

compile time M01

parent 132cd364
......@@ -107,7 +107,7 @@ struct BlockToCTileMap_M00_N0_M01
// Rows of column-vectors
// This C-tile map dynamically adjusts M01 when C-tile index is out of range
template <index_t MPerBlock, index_t NPerBlock, typename CGridDesc_M_N>
template <index_t MPerBlock, index_t NPerBlock, typename CGridDesc_M_N, index_t M01 = 1>
struct BlockToCTileMap_M00_N0_M01Adapt
{
static constexpr auto I0 = Number<0>{};
......@@ -117,8 +117,7 @@ struct BlockToCTileMap_M00_N0_M01Adapt
__host__ __device__ BlockToCTileMap_M00_N0_M01Adapt() = default;
__host__ __device__ BlockToCTileMap_M00_N0_M01Adapt(const CGridDesc_M_N& c_grid_desc_m_n,
index_t M01 = 1)
__host__ __device__ BlockToCTileMap_M00_N0_M01Adapt(const CGridDesc_M_N& c_grid_desc_m_n)
: M01_(M01), c_grid_desc_m_n_(c_grid_desc_m_n)
{
}
......@@ -172,7 +171,7 @@ struct BlockToCTileMap_M00_N0_M01Adapt
// 2D slices of column-vectors in 3D space
// This C-tile map dynamically adjusts M01 when C-tile index is out of range
template <index_t MPerBlock, index_t NPerBlock, typename CGridDesc_M_N>
template <index_t MPerBlock, index_t NPerBlock, typename CGridDesc_M_N, index_t M01 = 1>
struct BlockToCTileMap_KSplit_M00_N0_M01Adapt
{
static constexpr auto I0 = Number<0>{};
......@@ -183,7 +182,6 @@ struct BlockToCTileMap_KSplit_M00_N0_M01Adapt
__host__ __device__ BlockToCTileMap_KSplit_M00_N0_M01Adapt() = default;
__host__ __device__ BlockToCTileMap_KSplit_M00_N0_M01Adapt(const CGridDesc_M_N& c_grid_desc_m_n,
index_t M01 = 8,
index_t KSplit = 1)
: M01_(M01), KSplit_(KSplit), c_grid_desc_m_n_(c_grid_desc_m_n)
{
......
......@@ -268,7 +268,7 @@ struct GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_v2r4
const CMNGridDesc& c_m_n_grid_desc, index_t /* M01 */, index_t /* N01 */, index_t KBatch)
{
return BlockToCTileMap_KSplit_M00_N0_M01Adapt<MPerBlock, NPerBlock, CMNGridDesc>(
c_m_n_grid_desc, 1, KBatch);
c_m_n_grid_desc, KBatch);
}
using CM0N0M1N1M2M3M4N2GridDesc = decltype(MakeCM0N0M1N1M2M3M4N2GridDescriptor(CMNGridDesc{}));
......
......@@ -242,7 +242,7 @@ struct GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_v2r4r2
const CMNGridDesc& c_m_n_grid_desc, index_t /* M01 */, index_t /* N01 */, index_t KBatch)
{
return BlockToCTileMap_KSplit_M00_N0_M01Adapt<MPerBlock, NPerBlock, CMNGridDesc>(
c_m_n_grid_desc, 1, KBatch);
c_m_n_grid_desc, KBatch);
}
__host__ __device__ static constexpr auto
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment