Commit 3f032f25 authored by Anthony Chang's avatar Anthony Chang
Browse files

compile time M01

parent 132cd364
...@@ -107,7 +107,7 @@ struct BlockToCTileMap_M00_N0_M01 ...@@ -107,7 +107,7 @@ struct BlockToCTileMap_M00_N0_M01
// Rows of column-vectors // Rows of column-vectors
// This C-tile map dynamically adjusts M01 when C-tile index is out of range // This C-tile map dynamically adjusts M01 when C-tile index is out of range
template <index_t MPerBlock, index_t NPerBlock, typename CGridDesc_M_N> template <index_t MPerBlock, index_t NPerBlock, typename CGridDesc_M_N, index_t M01 = 1>
struct BlockToCTileMap_M00_N0_M01Adapt struct BlockToCTileMap_M00_N0_M01Adapt
{ {
static constexpr auto I0 = Number<0>{}; static constexpr auto I0 = Number<0>{};
...@@ -117,8 +117,7 @@ struct BlockToCTileMap_M00_N0_M01Adapt ...@@ -117,8 +117,7 @@ struct BlockToCTileMap_M00_N0_M01Adapt
__host__ __device__ BlockToCTileMap_M00_N0_M01Adapt() = default; __host__ __device__ BlockToCTileMap_M00_N0_M01Adapt() = default;
__host__ __device__ BlockToCTileMap_M00_N0_M01Adapt(const CGridDesc_M_N& c_grid_desc_m_n, __host__ __device__ BlockToCTileMap_M00_N0_M01Adapt(const CGridDesc_M_N& c_grid_desc_m_n)
index_t M01 = 1)
: M01_(M01), c_grid_desc_m_n_(c_grid_desc_m_n) : M01_(M01), c_grid_desc_m_n_(c_grid_desc_m_n)
{ {
} }
...@@ -172,7 +171,7 @@ struct BlockToCTileMap_M00_N0_M01Adapt ...@@ -172,7 +171,7 @@ struct BlockToCTileMap_M00_N0_M01Adapt
// 2D slices of column-vectors in 3D space // 2D slices of column-vectors in 3D space
// This C-tile map dynamically adjusts M01 when C-tile index is out of range // This C-tile map dynamically adjusts M01 when C-tile index is out of range
template <index_t MPerBlock, index_t NPerBlock, typename CGridDesc_M_N> template <index_t MPerBlock, index_t NPerBlock, typename CGridDesc_M_N, index_t M01 = 1>
struct BlockToCTileMap_KSplit_M00_N0_M01Adapt struct BlockToCTileMap_KSplit_M00_N0_M01Adapt
{ {
static constexpr auto I0 = Number<0>{}; static constexpr auto I0 = Number<0>{};
...@@ -183,7 +182,6 @@ struct BlockToCTileMap_KSplit_M00_N0_M01Adapt ...@@ -183,7 +182,6 @@ struct BlockToCTileMap_KSplit_M00_N0_M01Adapt
__host__ __device__ BlockToCTileMap_KSplit_M00_N0_M01Adapt() = default; __host__ __device__ BlockToCTileMap_KSplit_M00_N0_M01Adapt() = default;
__host__ __device__ BlockToCTileMap_KSplit_M00_N0_M01Adapt(const CGridDesc_M_N& c_grid_desc_m_n, __host__ __device__ BlockToCTileMap_KSplit_M00_N0_M01Adapt(const CGridDesc_M_N& c_grid_desc_m_n,
index_t M01 = 8,
index_t KSplit = 1) index_t KSplit = 1)
: M01_(M01), KSplit_(KSplit), c_grid_desc_m_n_(c_grid_desc_m_n) : M01_(M01), KSplit_(KSplit), c_grid_desc_m_n_(c_grid_desc_m_n)
{ {
......
...@@ -268,7 +268,7 @@ struct GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_v2r4 ...@@ -268,7 +268,7 @@ struct GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_v2r4
const CMNGridDesc& c_m_n_grid_desc, index_t /* M01 */, index_t /* N01 */, index_t KBatch) const CMNGridDesc& c_m_n_grid_desc, index_t /* M01 */, index_t /* N01 */, index_t KBatch)
{ {
return BlockToCTileMap_KSplit_M00_N0_M01Adapt<MPerBlock, NPerBlock, CMNGridDesc>( return BlockToCTileMap_KSplit_M00_N0_M01Adapt<MPerBlock, NPerBlock, CMNGridDesc>(
c_m_n_grid_desc, 1, KBatch); c_m_n_grid_desc, KBatch);
} }
using CM0N0M1N1M2M3M4N2GridDesc = decltype(MakeCM0N0M1N1M2M3M4N2GridDescriptor(CMNGridDesc{})); using CM0N0M1N1M2M3M4N2GridDesc = decltype(MakeCM0N0M1N1M2M3M4N2GridDescriptor(CMNGridDesc{}));
......
...@@ -242,7 +242,7 @@ struct GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_v2r4r2 ...@@ -242,7 +242,7 @@ struct GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_v2r4r2
const CMNGridDesc& c_m_n_grid_desc, index_t /* M01 */, index_t /* N01 */, index_t KBatch) const CMNGridDesc& c_m_n_grid_desc, index_t /* M01 */, index_t /* N01 */, index_t KBatch)
{ {
return BlockToCTileMap_KSplit_M00_N0_M01Adapt<MPerBlock, NPerBlock, CMNGridDesc>( return BlockToCTileMap_KSplit_M00_N0_M01Adapt<MPerBlock, NPerBlock, CMNGridDesc>(
c_m_n_grid_desc, 1, KBatch); c_m_n_grid_desc, KBatch);
} }
__host__ __device__ static constexpr auto __host__ __device__ static constexpr auto
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment