Format

da3f38fe · Rosty Geyyer · 58af805f · da3f38fe · da3f38fe · da3f38fe
Commit da3f38fe authored Nov 11, 2022 by Rosty Geyyer
3 changed files
--- a/example/20_convnd_bwd_weight/CMakeLists.txt
+++ b/example/20_convnd_bwd_weight/CMakeLists.txt
 add_example_executable(example_convnd_bwd_weight_xdl_fp16 convnd_bwd_weight_xdl_fp16.cpp)
-add_example_executable(example_convnd_bwd_weight_xdl_bf16 convnd_bwd_weight_xdl_bf16.cpp)
+    add_example_executable(example_convnd_bwd_weight_xdl_bf16 convnd_bwd_weight_xdl_bf16.cpp)
-add_example_executable(example_convnd_bwd_weight_dl_fp16 convnd_bwd_weight_dl_fp16.cpp)
+        add_example_executable(example_convnd_bwd_weight_dl_fp16 convnd_bwd_weight_dl_fp16.cpp)
-target_link_libraries(example_convnd_bwd_weight_xdl_fp16 PRIVATE utility)
+            target_link_libraries(example_convnd_bwd_weight_xdl_fp16 PRIVATE utility)
-target_link_libraries(example_convnd_bwd_weight_xdl_bf16 PRIVATE utility)
+                target_link_libraries(example_convnd_bwd_weight_xdl_bf16 PRIVATE utility)
-target_link_libraries(example_convnd_bwd_weight_dl_fp16 PRIVATE utility)
+                    target_link_libraries(example_convnd_bwd_weight_dl_fp16 PRIVATE utility)
--- a/example/20_convnd_bwd_weight/convnd_bwd_weight_dl_fp16.cpp
+++ b/example/20_convnd_bwd_weight/convnd_bwd_weight_dl_fp16.cpp
@@ -23,42 +23,42 @@ static constexpr auto ConvBwdWeightDefault =
 template <ck::index_t NDimSpatial>
 using DeviceConvndBwdWeightInstance =
    ck::tensor_operation::device::DeviceConvNdBwdWeightNwcKxcNwk_Dl<
-        NDimSpatial,    // NDimSpatial
+        NDimSpatial,          // NDimSpatial
-        InDataType,     // InDataType
+        InDataType,           // InDataType
-        WeiDataType,    // WeiDataType
+        WeiDataType,          // WeiDataType
-        OutDataType,    // OutDataType
+        OutDataType,          // OutDataType
-        AccDataType,    // AccDataType
+        AccDataType,          // AccDataType
-        InElementOp,    // InElementwiseOperation
+        InElementOp,          // InElementwiseOperation
-        WeiElementOp,   // WeiElementwiseOperation
+        WeiElementOp,         // WeiElementwiseOperation
-        OutElementOp,   // OutElementwiseOperation
+        OutElementOp,         // OutElementwiseOperation
-        ConvBwdWeightDefault,   // ConvBackwardWeightSpecialization
+        ConvBwdWeightDefault, // ConvBackwardWeightSpecialization
-        256,    // BlockSize
+        256,                  // BlockSize
-        128,    // MPerBlock
+        128,                  // MPerBlock
-        128,    // NPerBlock
+        128,                  // NPerBlock
-        16,     // K0PerBlock
+        16,                   // K0PerBlock
-        2,      // K1
+        2,                    // K1
-        4,      // M1PerThread
+        4,                    // M1PerThread
-        4,      // N1PerThread
+        4,                    // N1PerThread
-        1,      // KPerThread
+        1,                    // KPerThread
-        S<8, 2>,    // M1N1ThreadClusterM1Xs
+        S<8, 2>,              // M1N1ThreadClusterM1Xs
-        S<8, 2>,    // M1N1ThreadClusterN1Xs
+        S<8, 2>,              // M1N1ThreadClusterN1Xs
-        S<8, 1, 1, 2>,      // ABlockTransferThreadSliceLengths_K0_M0_M1_K1
+        S<8, 1, 1, 2>,        // ABlockTransferThreadSliceLengths_K0_M0_M1_K1
-        S<2, 1, 128, 1>,    // ABlockTransferThreadClusterLengths_K0_M0_M1_K1
+        S<2, 1, 128, 1>,      // ABlockTransferThreadClusterLengths_K0_M0_M1_K1
-        S<1, 2, 0, 3>,      // ABlockTransferThreadClusterArrangeOrder
+        S<1, 2, 0, 3>,        // ABlockTransferThreadClusterArrangeOrder
-        S<1, 2, 0, 3>,      // ABlockTransferSrcAccessOrder
+        S<1, 2, 0, 3>,        // ABlockTransferSrcAccessOrder
-        S<4, 1, 1, 2>,      // ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1
+        S<4, 1, 1, 2>,        // ABlockTransferSrcVectorTensorLengths_K0_M0_M1_K1
-        S<1, 2, 0, 3>,      // ABlockTransferSrcVectorTensorContiguousDimOrder
+        S<1, 2, 0, 3>,        // ABlockTransferSrcVectorTensorContiguousDimOrder
-        S<1, 1, 1, 2>,      // ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1
+        S<1, 1, 1, 2>,        // ABlockTransferDstVectorTensorLengths_K0_M0_M1_K1
-        S<1, 1, 8, 2>,      // BBlockTransferThreadSliceLengths_K0_N0_N1_K1
+        S<1, 1, 8, 2>,        // BBlockTransferThreadSliceLengths_K0_N0_N1_K1
-        S<16, 1, 16, 1>,    // BBlockTransferThreadClusterLengths_K0_N0_N1_K1
+        S<16, 1, 16, 1>,      // BBlockTransferThreadClusterLengths_K0_N0_N1_K1
-        S<0, 3, 1, 2>,      // BBlockTransferThreadClusterArrangeOrder
+        S<0, 3, 1, 2>,        // BBlockTransferThreadClusterArrangeOrder
-        S<0, 3, 1, 2>,      // BBlockTransferSrcAccessOrder
+        S<0, 3, 1, 2>,        // BBlockTransferSrcAccessOrder
-        S<1, 1, 8, 1>,      // BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1
+        S<1, 1, 8, 1>,        // BBlockTransferSrcVectorTensorLengths_K0_N0_N1_K1
-        S<0, 3, 1, 2>,      // BBlockTransferSrcVectorTensorContiguousDimOrder
+        S<0, 3, 1, 2>,        // BBlockTransferSrcVectorTensorContiguousDimOrder
-        S<1, 1, 1, 2>,      // BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1
+        S<1, 1, 1, 2>,        // BBlockTransferDstVectorTensorLengths_K0_N0_N1_K1
-        S<0, 1, 2, 3, 4, 5>,    // CThreadTransferSrcDstAccessOrder
+        S<0, 1, 2, 3, 4, 5>,  // CThreadTransferSrcDstAccessOrder
-        5,      // CThreadTransferSrcDstVectorDim
+        5,                    // CThreadTransferSrcDstVectorDim
-        4>;     // CThreadTransferDstScalarPerVector
+        4>;                   // CThreadTransferDstScalarPerVector
 int main(int argc, char* argv[])
 {

--- a/include/ck/tensor_operation/gpu/device/impl/device_convnd_bwd_weight_nwc_kxc_nwk_dl.hpp
+++ b/include/ck/tensor_operation/gpu/device/impl/device_convnd_bwd_weight_nwc_kxc_nwk_dl.hpp
@@ -1328,8 +1328,7 @@ struct DeviceConvNdBwdWeightNwcKxcNwk_Dl
                                                arg.b_grid_desc_k0_n_k1_container_[i],
                                                arg.c_grid_desc_m_n_container_[i]))
                {
-                    throw std::runtime_error(
+                    throw std::runtime_error("wrong! GridwiseGemm has invalid setting");
-                        "wrong! GridwiseGemm has invalid setting");
                }
                const index_t grid_size = arg.block_2_ctile_map_container_[i].CalculateGridSize(