Commit da3f38fe authored by Rosty Geyyer's avatar Rosty Geyyer
Browse files

Format

parent 58af805f
add_example_executable(example_convnd_bwd_weight_xdl_fp16 convnd_bwd_weight_xdl_fp16.cpp) add_example_executable(example_convnd_bwd_weight_xdl_fp16 convnd_bwd_weight_xdl_fp16.cpp)
add_example_executable(example_convnd_bwd_weight_xdl_bf16 convnd_bwd_weight_xdl_bf16.cpp) add_example_executable(example_convnd_bwd_weight_xdl_bf16 convnd_bwd_weight_xdl_bf16.cpp)
add_example_executable(example_convnd_bwd_weight_dl_fp16 convnd_bwd_weight_dl_fp16.cpp) add_example_executable(example_convnd_bwd_weight_dl_fp16 convnd_bwd_weight_dl_fp16.cpp)
target_link_libraries(example_convnd_bwd_weight_xdl_fp16 PRIVATE utility) target_link_libraries(example_convnd_bwd_weight_xdl_fp16 PRIVATE utility)
target_link_libraries(example_convnd_bwd_weight_xdl_bf16 PRIVATE utility) target_link_libraries(example_convnd_bwd_weight_xdl_bf16 PRIVATE utility)
target_link_libraries(example_convnd_bwd_weight_dl_fp16 PRIVATE utility) target_link_libraries(example_convnd_bwd_weight_dl_fp16 PRIVATE utility)
...@@ -23,42 +23,42 @@ static constexpr auto ConvBwdWeightDefault = ...@@ -23,42 +23,42 @@ static constexpr auto ConvBwdWeightDefault =
template <ck::index_t NDimSpatial> template <ck::index_t NDimSpatial>
using DeviceConvndBwdWeightInstance = using DeviceConvndBwdWeightInstance =
ck::tensor_operation::device::DeviceConvNdBwdWeightNwcKxcNwk_Dl< ck::tensor_operation::device::DeviceConvNdBwdWeightNwcKxcNwk_Dl<
NDimSpatial, // NDimSpatial NDimSpatial, // NDimSpatial
InDataType, // InDataType InDataType, // InDataType
WeiDataType, // WeiDataType WeiDataType, // WeiDataType
OutDataType, // OutDataType OutDataType, // OutDataType
AccDataType, // AccDataType AccDataType, // AccDataType
InElementOp, // InElementwiseOperation InElementOp, // InElementwiseOperation
WeiElementOp, // WeiElementwiseOperation WeiElementOp, // WeiElementwiseOperation
OutElementOp, // OutElementwiseOperation OutElementOp, // OutElementwiseOperation
ConvBwdWeightDefault, // ConvBackwardWeightSpecialization ConvBwdWeightDefault, // ConvBackwardWeightSpecialization
256, // BlockSize 256, // BlockSize
128, // MPerBlock 128, // MPerBlock
128, // NPerBlock 128, // NPerBlock
16, // K0PerBlock 16, // K0PerBlock
2, // K1 2, // K1
4, // M1PerThread 4, // M1PerThread
4, // N1PerThread 4, // N1PerThread
1, // KPerThread 1, // KPerThread
S<8, 2>, // M1N1ThreadClusterM1Xs S<8, 2>, // M1N1ThreadClusterM1Xs
S<8, 2>, // M1N1ThreadClusterN1Xs S<8, 2>, // M1N1ThreadClusterN1Xs
S<8, 1, 1, 2>, // ABlockTransferThreadSliceLengths_K0_M0_M1_K1 S<8, 1, 1, 2>, // ABlockTransferThreadSliceLengths_K0_M0_M1_K1
S<2, 1, 128, 1>, // ABlockTransferThreadClusterLengths_K0_M0_M1_K1 S<2, 1, 128, 1>, // ABlockTransferThreadClusterLengths_K0_M0_M1_K1
S<1, 2, 0, 3>, // ABlockTransferThreadClusterArrangeOrder S<1, 2, 0, 3>, // ABlockTransferThreadClusterArrangeOrder
S<1, 2, 0, 3>, // ABlockTransferSrcAccessOrder S<1, 2, 0, 3>, // ABlockTransferSrcAccessOrder
S<4, 1, 1, 2>, // ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1 S<4, 1, 1, 2>, // ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1
S<1, 2, 0, 3>, // ABlockTransferSrcVectorTensorContiguousDimOrder S<1, 2, 0, 3>, // ABlockTransferSrcVectorTensorContiguousDimOrder
S<1, 1, 1, 2>, // ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1 S<1, 1, 1, 2>, // ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1
S<1, 1, 8, 2>, // BBlockTransferThreadSliceLengths_K0_N0_N1_K1 S<1, 1, 8, 2>, // BBlockTransferThreadSliceLengths_K0_N0_N1_K1
S<16, 1, 16, 1>, // BBlockTransferThreadClusterLengths_K0_N0_N1_K1 S<16, 1, 16, 1>, // BBlockTransferThreadClusterLengths_K0_N0_N1_K1
S<0, 3, 1, 2>, // BBlockTransferThreadClusterArrangeOrder S<0, 3, 1, 2>, // BBlockTransferThreadClusterArrangeOrder
S<0, 3, 1, 2>, // BBlockTransferSrcAccessOrder S<0, 3, 1, 2>, // BBlockTransferSrcAccessOrder
S<1, 1, 8, 1>, // BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1 S<1, 1, 8, 1>, // BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1
S<0, 3, 1, 2>, // BBlockTransferSrcVectorTensorContiguousDimOrder S<0, 3, 1, 2>, // BBlockTransferSrcVectorTensorContiguousDimOrder
S<1, 1, 1, 2>, // BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1 S<1, 1, 1, 2>, // BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1
S<0, 1, 2, 3, 4, 5>, // CThreadTransferSrcDstAccessOrder S<0, 1, 2, 3, 4, 5>, // CThreadTransferSrcDstAccessOrder
5, // CThreadTransferSrcDstVectorDim 5, // CThreadTransferSrcDstVectorDim
4>; // CThreadTransferDstScalarPerVector 4>; // CThreadTransferDstScalarPerVector
int main(int argc, char* argv[]) int main(int argc, char* argv[])
{ {
......
...@@ -1328,8 +1328,7 @@ struct DeviceConvNdBwdWeightNwcKxcNwk_Dl ...@@ -1328,8 +1328,7 @@ struct DeviceConvNdBwdWeightNwcKxcNwk_Dl
arg.b_grid_desc_k0_n_k1_container_[i], arg.b_grid_desc_k0_n_k1_container_[i],
arg.c_grid_desc_m_n_container_[i])) arg.c_grid_desc_m_n_container_[i]))
{ {
throw std::runtime_error( throw std::runtime_error("wrong! GridwiseGemm has invalid setting");
"wrong! GridwiseGemm has invalid setting");
} }
const index_t grid_size = arg.block_2_ctile_map_container_[i].CalculateGridSize( const index_t grid_size = arg.block_2_ctile_map_container_[i].CalculateGridSize(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment