Merge branch 'develop' of...

Merge branch 'develop' of https://github.com/ROCmSoftwarePlatform/composable_kernel into add_int8_wmma_example_instance

Merge branch 'develop' of...
Merge branch 'develop' of https://github.com/ROCmSoftwarePlatform/composable_kernel into add_int8_wmma_example_instance
e24f37fb · aska-0096 · 9697ad4e · 85e2e1e2 · e24f37fb · e24f37fb
Commit e24f37fb authored Sep 12, 2023 by aska-0096
20 changed files
--- a/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_fp32.cpp
+++ b/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_fp32.cpp
 // SPDX-License-Identifier: MIT
-// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.

 #include "gemm_reduce_xdl_common.hpp"


--- a/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_int4.cpp
+++ b/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_int4.cpp
 // SPDX-License-Identifier: MIT
-// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.

 #include "gemm_reduce_xdl_common.hpp"


--- a/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_int8.cpp
+++ b/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_int8.cpp
 // SPDX-License-Identifier: MIT
-// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.

 #include "gemm_reduce_xdl_common.hpp"


--- a/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_bf16.cpp
+++ b/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_bf16.cpp
 // SPDX-License-Identifier: MIT
-// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.

 #include "gemm_reduce_xdl_common.hpp"


--- a/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_fp16.cpp
+++ b/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_fp16.cpp
 // SPDX-License-Identifier: MIT
-// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.

 #include "gemm_reduce_xdl_common.hpp"


--- a/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_fp32.cpp
+++ b/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_fp32.cpp
 // SPDX-License-Identifier: MIT
-// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.

 #include "gemm_reduce_xdl_common.hpp"


--- a/example/16_gemm_multi_d_multi_reduces/gemm_reduce_xdl_common.hpp
+++ b/example/16_gemm_multi_d_multi_reduces/gemm_reduce_xdl_common.hpp
 // SPDX-License-Identifier: MIT
-// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.

 #include <numeric>
 #include <initializer_list>

--- a/example/17_convnd_bwd_data/CMakeLists.txt
+++ b/example/17_convnd_bwd_data/CMakeLists.txt
-if(GPU_TARGETS MATCHES "gfx908" OR GPU_TARGETS MATCHES "gfx90a" OR GPU_TARGETS MATCHES "gfx940")
+if(DTYPES MATCHES "fp16" OR NOT DEFINED DTYPES)
+list(APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942)
+set(target 0)
+foreach(gpu IN LISTS GPU_TARGETS)
+ if(gpu IN_LIST gpu_list AND target EQUAL 0)
   add_example_executable(example_convnd_bwd_data_xdl_fp16 convnd_bwd_data_xdl_fp16.cpp)
   target_link_libraries(example_convnd_bwd_data_xdl_fp16 PRIVATE utility)
+   set(target 1)
+ endif()
+endforeach()
+  if(DL_KERNELS)
+    add_example_executable(example_convnd_bwd_data_dl_fp16 convnd_bwd_data_dl_fp16.cpp)
+    target_link_libraries(example_convnd_bwd_data_dl_fp16 PRIVATE utility)
+  endif()
 endif()
-add_example_executable(example_convnd_bwd_data_dl_fp16 convnd_bwd_data_dl_fp16.cpp)
-target_link_libraries(example_convnd_bwd_data_dl_fp16 PRIVATE utility)
--- a/example/17_convnd_bwd_data/convnd_bwd_data_common.hpp
+++ b/example/17_convnd_bwd_data/convnd_bwd_data_common.hpp
 // SPDX-License-Identifier: MIT
-// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.

 #include <iostream>
 #include <numeric>
@@ -81,32 +81,33 @@ int run_conv_bwd_data(bool do_verification,
    in_device_buf.SetZero();

    // do GEMM
-    auto conv     = DeviceConvNdBwdDataInstance{};
-    auto invoker  = conv.MakeInvoker();
-    auto argument = conv.MakeArgument(static_cast<InDataType*>(in_device_buf.GetDeviceBuffer()),
-                                      static_cast<WeiDataType*>(wei_device_buf.GetDeviceBuffer()),
-                                      static_cast<OutDataType*>(out_device_buf.GetDeviceBuffer()),
-                                      conv_param.N_,
-                                      conv_param.K_,
-                                      conv_param.C_,
-                                      conv_param.input_spatial_lengths_,
-                                      conv_param.filter_spatial_lengths_,
-                                      conv_param.GetOutputSpatialLengths(),
-                                      conv_param.conv_filter_strides_,
-                                      conv_param.conv_filter_dilations_,
-                                      conv_param.input_left_pads_,
-                                      conv_param.input_right_pads_,
-                                      in_element_op,
-                                      wei_element_op,
-                                      out_element_op);
-
-    if(!conv.IsSupportedArgument(argument))
+    auto conv    = DeviceConvNdBwdDataInstance{};
+    auto invoker = conv.MakeInvoker();
+    auto argument =
+        conv.MakeArgumentPointer(static_cast<InDataType*>(in_device_buf.GetDeviceBuffer()),
+                                 static_cast<WeiDataType*>(wei_device_buf.GetDeviceBuffer()),
+                                 static_cast<OutDataType*>(out_device_buf.GetDeviceBuffer()),
+                                 conv_param.N_,
+                                 conv_param.K_,
+                                 conv_param.C_,
+                                 conv_param.input_spatial_lengths_,
+                                 conv_param.filter_spatial_lengths_,
+                                 conv_param.GetOutputSpatialLengths(),
+                                 conv_param.conv_filter_strides_,
+                                 conv_param.conv_filter_dilations_,
+                                 conv_param.input_left_pads_,
+                                 conv_param.input_right_pads_,
+                                 in_element_op,
+                                 wei_element_op,
+                                 out_element_op);
+
+    if(!conv.IsSupportedArgument(argument.get()))
    {
        std::cout << "Not support,please check parameters or device";
        return 0;
    }

-    float ave_time = invoker.Run(argument, StreamConfig{nullptr, time_kernel});
+    float ave_time = invoker.Run(argument.get(), StreamConfig{nullptr, time_kernel});

    std::size_t flop      = conv_param.GetFlops();
    std::size_t num_btype = conv_param.GetByte<InDataType, WeiDataType, OutDataType>();

--- a/example/17_convnd_bwd_data/convnd_bwd_data_dl_fp16.cpp
+++ b/example/17_convnd_bwd_data/convnd_bwd_data_dl_fp16.cpp
 // SPDX-License-Identifier: MIT
-// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.

 #include "convnd_bwd_data_common.hpp"


--- a/example/17_convnd_bwd_data/convnd_bwd_data_xdl_fp16.cpp
+++ b/example/17_convnd_bwd_data/convnd_bwd_data_xdl_fp16.cpp
 // SPDX-License-Identifier: MIT
-// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.

 #include "convnd_bwd_data_common.hpp"


--- a/example/18_batched_gemm_reduce/CMakeLists.txt
+++ b/example/18_batched_gemm_reduce/CMakeLists.txt
-if(GPU_TARGETS MATCHES "gfx908" OR GPU_TARGETS MATCHES "gfx90a" OR GPU_TARGETS MATCHES "gfx940")
+if(DTYPES MATCHES "fp16" OR NOT DEFINED DTYPES)
+list(APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942)
+set(target 0)
+foreach(gpu IN LISTS GPU_TARGETS)
+ if(gpu IN_LIST gpu_list AND target EQUAL 0)
   add_example_executable(example_batched_gemm_reduce_xdl_fp16 batched_gemm_reduce_xdl_fp16.cpp)
+   set(target 1)
+ endif()
+endforeach()
 endif()
-
--- a/example/18_batched_gemm_reduce/batched_gemm_reduce_xdl_fp16.cpp
+++ b/example/18_batched_gemm_reduce/batched_gemm_reduce_xdl_fp16.cpp
 // SPDX-License-Identifier: MIT
-// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.

 #include <iostream>
 #include <numeric>

--- a/example/19_binary_elementwise/broadcast_add_2d_amn_bn.cpp
+++ b/example/19_binary_elementwise/broadcast_add_2d_amn_bn.cpp
 // SPDX-License-Identifier: MIT
-// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.

 #include <iostream>
 #include <cstdlib>

--- a/example/19_binary_elementwise/broadcast_add_3d_am_bmnk.cpp
+++ b/example/19_binary_elementwise/broadcast_add_3d_am_bmnk.cpp
 // SPDX-License-Identifier: MIT
-// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.

 #include <iostream>
 #include <cstdlib>

--- a/example/19_binary_elementwise/elementwise_add_1d.cpp
+++ b/example/19_binary_elementwise/elementwise_add_1d.cpp
 // SPDX-License-Identifier: MIT
-// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.

 #include <iostream>
 #include <cstdlib>

--- a/example/19_binary_elementwise/elementwise_add_4d.cpp
+++ b/example/19_binary_elementwise/elementwise_add_4d.cpp
 // SPDX-License-Identifier: MIT
-// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.

 #include <iostream>
 #include <cstdlib>

--- a/example/20_grouped_conv_bwd_weight/CMakeLists.txt
+++ b/example/20_grouped_conv_bwd_weight/CMakeLists.txt
-if(GPU_TARGETS MATCHES "gfx908" OR GPU_TARGETS MATCHES "gfx90a" OR GPU_TARGETS MATCHES "gfx940")
+list(APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942)
+set(target 0)
+foreach(gpu IN LISTS GPU_TARGETS)
+ if(gpu IN_LIST gpu_list AND target EQUAL 0)
   add_custom_target(example_grouped_conv_bwd_weight)
+   if(DTYPES MATCHES "fp16" OR NOT DEFINED DTYPES)
+    add_example_executable(example_grouped_conv_bwd_weight_xdl_fp16 grouped_conv_bwd_weight_xdl_fp16.cpp)
+    add_dependencies(example_grouped_conv_bwd_weight example_grouped_conv_bwd_weight_xdl_fp16)
+   endif()
+   if(DTYPES MATCHES "bf16" OR NOT DEFINED DTYPES)
+    add_example_executable(example_grouped_conv_bwd_weight_xdl_bf16 grouped_conv_bwd_weight_xdl_bf16.cpp)
+    add_dependencies(example_grouped_conv_bwd_weight example_grouped_conv_bwd_weight_xdl_bf16)
+   endif()
+   set(target 1)
+ endif()
+endforeach()

-   add_example_executable(example_grouped_conv_bwd_weight_xdl_fp16 grouped_conv_bwd_weight_xdl_fp16.cpp)
-   add_example_executable(example_grouped_conv_bwd_weight_xdl_bf16 grouped_conv_bwd_weight_xdl_bf16.cpp)
-
-   add_dependencies(example_grouped_conv_bwd_weight example_grouped_conv_bwd_weight_xdl_fp16
-                                                 example_grouped_conv_bwd_weight_xdl_bf16)
-endif()
-
-add_custom_target(example_grouped_conv_bwd_weight_dl)
-
-add_example_executable(example_grouped_conv_bwd_weight_dl_fp16 grouped_conv_bwd_weight_dl_fp16.cpp)
-
-add_dependencies(example_grouped_conv_bwd_weight_dl example_grouped_conv_bwd_weight_dl_fp16)
+if(DTYPES MATCHES "fp16" OR NOT DEFINED DTYPES)
+  if(DL_KERNELS)
+    add_custom_target(example_grouped_conv_bwd_weight_dl)
+    add_example_executable(example_grouped_conv_bwd_weight_dl_fp16 grouped_conv_bwd_weight_dl_fp16.cpp)
+    add_dependencies(example_grouped_conv_bwd_weight_dl example_grouped_conv_bwd_weight_dl_fp16)
+  endif()
+endif()
\ No newline at end of file
--- a/example/20_grouped_conv_bwd_weight/common.hpp
+++ b/example/20_grouped_conv_bwd_weight/common.hpp
 // SPDX-License-Identifier: MIT
-// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.

 #pragma once


--- a/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_xdl_bf16.cpp
+++ b/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_xdl_bf16.cpp
 // SPDX-License-Identifier: MIT
-// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.

 #include "common.hpp"

-#include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_gnwc_gkxc_gnwk_xdl_cshuffle.hpp"
+#include "ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_xdl_cshuffle.hpp"

 using InDataType = BF16;
 // bf16 kernel use fp32 atomic add to accumulate Weight tensor into global memory
@@ -17,8 +17,20 @@ using OutElementOp = PassThrough;

 template <ck::index_t NDimSpatial>
 using DeviceConvBwdWeightInstance =
-    ck::tensor_operation::device::DeviceGroupedConvBwdWeightGnwcGkxcGnwk_Xdl_CShuffle<
-        NDimSpatial,          // NDimSpatial
+    ck::tensor_operation::device::DeviceGroupedConvBwdWeight_Xdl_CShuffle<
+        NDimSpatial,
+        ck::tuple_element_t<NDimSpatial - 1,
+                            ck::Tuple<ck::tensor_layout::convolution::GNWC,
+                                      ck::tensor_layout::convolution::GNHWC,
+                                      ck::tensor_layout::convolution::GNDHWC>>,
+        ck::tuple_element_t<NDimSpatial - 1,
+                            ck::Tuple<ck::tensor_layout::convolution::GKXC,
+                                      ck::tensor_layout::convolution::GKYXC,
+                                      ck::tensor_layout::convolution::GKZYXC>>,
+        ck::tuple_element_t<NDimSpatial - 1,
+                            ck::Tuple<ck::tensor_layout::convolution::GNWK,
+                                      ck::tensor_layout::convolution::GNHWK,
+                                      ck::tensor_layout::convolution::GNDHWK>>,
        InDataType,           // InDataType
        WeiDataType,          // WeiDataType
        OutDataType,          // OutDataType