Commit 06eacec9 authored by Rosty Geyyer's avatar Rosty Geyyer
Browse files

Update blockwise indexing, working version

parent 581d244c
...@@ -60,9 +60,9 @@ using DeviceConvBwdWeightInstance = ...@@ -60,9 +60,9 @@ using DeviceConvBwdWeightInstance =
S<1, 2, 1, 128, 1>, // ABlockTransferThreadClusterLengths_K0_M0_M1_K1 S<1, 2, 1, 128, 1>, // ABlockTransferThreadClusterLengths_K0_M0_M1_K1
S<0, 2, 3, 1, 4>, // ABlockTransferThreadClusterArrangeOrder S<0, 2, 3, 1, 4>, // ABlockTransferThreadClusterArrangeOrder
S<0, 2, 3, 1, 4>, // ABlockTransferSrcAccessOrder S<0, 2, 3, 1, 4>, // ABlockTransferSrcAccessOrder
S<1, 4, 1, 1, 2>, // ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1 S<1, 1, 1, 1, 1>, // ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1
S<0, 2, 3, 1, 4>, // ABlockTransferSrcVectorTensorContiguousDimOrder S<0, 2, 3, 1, 4>, // ABlockTransferSrcVectorTensorContiguousDimOrder
S<1, 1, 1, 1, 2>, // ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1 S<1, 1, 1, 1, 1>, // ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1
S<1, 1, 1, 8, 2>, // BBlockTransferThreadSliceLengths_K0_N0_N1_K1 S<1, 1, 1, 8, 2>, // BBlockTransferThreadSliceLengths_K0_N0_N1_K1
S<1, 16, 1, 16, 1>, // BBlockTransferThreadClusterLengths_K0_N0_N1_K1 S<1, 16, 1, 16, 1>, // BBlockTransferThreadClusterLengths_K0_N0_N1_K1
S<0, 1, 4, 2, 3>, // BBlockTransferThreadClusterArrangeOrder S<0, 1, 4, 2, 3>, // BBlockTransferThreadClusterArrangeOrder
......
...@@ -138,17 +138,17 @@ struct DeviceGroupedConvBwdWeightGnwcGkxcGnwk_Dl ...@@ -138,17 +138,17 @@ struct DeviceGroupedConvBwdWeightGnwcGkxcGnwk_Dl
: public DeviceGroupedConvBwdWeight< : public DeviceGroupedConvBwdWeight<
NDimSpatial, NDimSpatial,
ck::tuple_element_t<NDimSpatial - 1, ck::tuple_element_t<NDimSpatial - 1,
ck::Tuple<ck::tensor_layout::convolution::NWC, ck::Tuple<ck::tensor_layout::convolution::GNWC,
ck::tensor_layout::convolution::NHWC, ck::tensor_layout::convolution::GNHWC,
ck::tensor_layout::convolution::NDHWC>>, ck::tensor_layout::convolution::GNDHWC>>,
ck::tuple_element_t<NDimSpatial - 1, ck::tuple_element_t<NDimSpatial - 1,
ck::Tuple<ck::tensor_layout::convolution::KXC, ck::Tuple<ck::tensor_layout::convolution::GKXC,
ck::tensor_layout::convolution::KYXC, ck::tensor_layout::convolution::GKYXC,
ck::tensor_layout::convolution::KZYXC>>, ck::tensor_layout::convolution::GKZYXC>>,
ck::tuple_element_t<NDimSpatial - 1, ck::tuple_element_t<NDimSpatial - 1,
ck::Tuple<ck::tensor_layout::convolution::NWK, ck::Tuple<ck::tensor_layout::convolution::GNWK,
ck::tensor_layout::convolution::NHWK, ck::tensor_layout::convolution::GNHWK,
ck::tensor_layout::convolution::NDHWK>>, ck::tensor_layout::convolution::GNDHWK>>,
InDataType, InDataType,
WeiDataType, WeiDataType,
OutDataType, OutDataType,
......
...@@ -812,11 +812,9 @@ struct GridwiseGemmDl_bkm_bkn_mn_v1r3 ...@@ -812,11 +812,9 @@ struct GridwiseGemmDl_bkm_bkn_mn_v1r3
} }
// HACK: this force m/n_block_data_idx_on_grid into SGPR // HACK: this force m/n_block_data_idx_on_grid into SGPR
const index_t m_block_data_idx_on_grid = const index_t m_block_data_idx_on_grid = __builtin_amdgcn_readfirstlane(block_work_idx[I1]);
__builtin_amdgcn_readfirstlane(block_work_idx[I1] * MPerBlock);
const index_t n_block_data_idx_on_grid = const index_t n_block_data_idx_on_grid = __builtin_amdgcn_readfirstlane(block_work_idx[I2]);
__builtin_amdgcn_readfirstlane(block_work_idx[I2] * NPerBlock);
// TODO: change this. I think it needs multi-dimensional alignment // TODO: change this. I think it needs multi-dimensional alignment
constexpr auto max_lds_align = K1; constexpr auto max_lds_align = K1;
...@@ -980,7 +978,7 @@ struct GridwiseGemmDl_bkm_bkn_mn_v1r3 ...@@ -980,7 +978,7 @@ struct GridwiseGemmDl_bkm_bkn_mn_v1r3
if constexpr(HasMainKBlockLoop) if constexpr(HasMainKBlockLoop)
{ {
const auto K0 = a_grid_desc_b_k0_m0_m1_k1.GetLength(I0); const auto K0 = a_grid_desc_b_k0_m0_m1_k1.GetLength(I1);
index_t k_block_data_begin = 0; index_t k_block_data_begin = 0;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment