add selection of device_instances

25751e37 · Jing Zhang · 8160c31a · 25751e37 · 25751e37 · 25751e37
Commit 25751e37 authored Jan 29, 2022 by Jing Zhang
Showing with 84 additions and 14 deletions

profiler/CMakeLists.txt profiler/CMakeLists.txt +5 -4

profiler/include/profile_gemm_impl.hpp profiler/include/profile_gemm_impl.hpp +73 -6

profiler/profiler.cpp profiler/profiler.cpp +6 -4

No files found.
--- a/profiler/CMakeLists.txt
+++ b/profiler/CMakeLists.txt
@@ -84,10 +84,11 @@ install(TARGETS device_conv2d_fwd_bias_relu_atomic_add_instance LIBRARY DESTINAT
 set(PROFILER_SOURCE 
    profiler.cpp
    profile_gemm.cpp
-    profile_conv_fwd.cpp
+    #profile_conv_fwd.cpp
-    profile_conv_fwd_bias_relu.cpp
+    #profile_conv_fwd_bias_relu.cpp
-    profile_conv_fwd_bias_relu_add.cpp
+    #profile_conv_fwd_bias_relu_add.cpp
-    profile_conv_fwd_bias_relu_atomic_add.cpp)
+    #profile_conv_fwd_bias_relu_atomic_add.cpp
+    )
 add_executable(ckProfiler ${PROFILER_SOURCE})
 target_link_libraries(ckProfiler PRIVATE host_tensor)

--- a/profiler/include/profile_gemm_impl.hpp
+++ b/profiler/include/profile_gemm_impl.hpp
@@ -7,6 +7,11 @@ namespace tensor_operation {
 namespace device {
 namespace device_gemm_instance {
+using DeviceGemmNoOpPtr =
+    ck::tensor_operation::device::DeviceGemmPtr<ck::tensor_operation::element_wise::PassThrough,
+                                                ck::tensor_operation::element_wise::PassThrough,
+                                                ck::tensor_operation::element_wise::PassThrough>;
 #if 0
 template <>
 void add_device_gemm_instance<float,
@@ -175,15 +180,77 @@ void profile_gemm_impl(int do_verification,
    if(KBatch > 1 && is_same<ADataType, float>::value)
    {
-        ck::tensor_operation::device::device_gemm_instance::
+        // ck::tensor_operation::device::device_gemm_instance::
-            add_device_splitk_gemm_instance<float, float, float, ALayout, BLayout, CLayout>(
+        // add_device_splitk_gemm_instance<float, float, float, ALayout, BLayout, CLayout>(
-                gemm_ptrs);
+        // gemm_ptrs);
    }
    else
    {
-        ck::tensor_operation::device::device_gemm_instance::
-            add_device_gemm_instance<ADataType, BDataType, CDataType, ALayout, BLayout, CLayout>(
+        if(is_same<ADataType, float>::value && is_same<BDataType, float>::value &&
-                gemm_ptrs);
+           is_same<CDataType, float>::value)
+        {
+            if(is_same<ALayout, tensor_layout::gemm::RowMajor>::value &&
+               is_same<BLayout, tensor_layout::gemm::RowMajor>::value &&
+               is_same<CLayout, tensor_layout::gemm::RowMajor>::value)
+            {
+                ck::tensor_operation::device::device_gemm_instance::
+                    add_device_gemm_xdl_f32_f32_f32_mk_kn_mn_instances(gemm_ptrs);
+            }
+            else if(is_same<ALayout, tensor_layout::gemm::RowMajor>::value &&
+                    is_same<BLayout, tensor_layout::gemm::ColumnMajor>::value &&
+                    is_same<CLayout, tensor_layout::gemm::RowMajor>::value)
+            {
+                ck::tensor_operation::device::device_gemm_instance::
+                    add_device_gemm_xdl_f32_f32_f32_mk_nk_mn_instances(gemm_ptrs);
+            }
+            else if(is_same<ALayout, tensor_layout::gemm::ColumnMajor>::value &&
+                    is_same<BLayout, tensor_layout::gemm::RowMajor>::value &&
+                    is_same<CLayout, tensor_layout::gemm::RowMajor>::value)
+            {
+                ck::tensor_operation::device::device_gemm_instance::
+                    add_device_gemm_xdl_f32_f32_f32_km_kn_mn_instances(gemm_ptrs);
+            }
+            else if(is_same<ALayout, tensor_layout::gemm::ColumnMajor>::value &&
+                    is_same<BLayout, tensor_layout::gemm::ColumnMajor>::value &&
+                    is_same<CLayout, tensor_layout::gemm::RowMajor>::value)
+            {
+                ck::tensor_operation::device::device_gemm_instance::
+                    add_device_gemm_xdl_f32_f32_f32_km_nk_mn_instances(gemm_ptrs);
+            }
+        }
+        else if(is_same<ADataType, half_t>::value && is_same<BDataType, half_t>::value &&
+                is_same<CDataType, half_t>::value)
+        {
+            if(is_same<ALayout, tensor_layout::gemm::RowMajor>::value &&
+               is_same<BLayout, tensor_layout::gemm::RowMajor>::value &&
+               is_same<CLayout, tensor_layout::gemm::RowMajor>::value)
+            {
+                ck::tensor_operation::device::device_gemm_instance::
+                    add_device_gemm_xdl_f16_f16_f16_mk_kn_mn_instances(gemm_ptrs);
+            }
+            else if(is_same<ALayout, tensor_layout::gemm::RowMajor>::value &&
+                    is_same<BLayout, tensor_layout::gemm::ColumnMajor>::value &&
+                    is_same<CLayout, tensor_layout::gemm::RowMajor>::value)
+            {
+                ck::tensor_operation::device::device_gemm_instance::
+                    add_device_gemm_xdl_f16_f16_f16_mk_nk_mn_instances(gemm_ptrs);
+            }
+            else if(is_same<ALayout, tensor_layout::gemm::ColumnMajor>::value &&
+                    is_same<BLayout, tensor_layout::gemm::RowMajor>::value &&
+                    is_same<CLayout, tensor_layout::gemm::RowMajor>::value)
+            {
+                ck::tensor_operation::device::device_gemm_instance::
+                    add_device_gemm_xdl_f16_f16_f16_km_kn_mn_instances(gemm_ptrs);
+            }
+            else if(is_same<ALayout, tensor_layout::gemm::ColumnMajor>::value &&
+                    is_same<BLayout, tensor_layout::gemm::ColumnMajor>::value &&
+                    is_same<CLayout, tensor_layout::gemm::RowMajor>::value)
+            {
+                ck::tensor_operation::device::device_gemm_instance::
+                    add_device_gemm_xdl_f16_f16_f16_km_nk_mn_instances(gemm_ptrs);
+            }
+        }
    }
    if(gemm_ptrs.size() <= 0)

--- a/profiler/profiler.cpp
+++ b/profiler/profiler.cpp
@@ -6,10 +6,10 @@
 #include <half.hpp>
 int profile_gemm(int, char*[]);
-int profile_conv_fwd(int, char*[]);
+// int profile_conv_fwd(int, char*[]);
-int profile_conv_fwd_bias_relu(int, char*[]);
+// int profile_conv_fwd_bias_relu(int, char*[]);
-int profile_conv_fwd_bias_relu_add(int, char*[]);
+// int profile_conv_fwd_bias_relu_add(int, char*[]);
-int profile_conv_fwd_bias_relu_atomic_add(int, char*[]);
+// int profile_conv_fwd_bias_relu_atomic_add(int, char*[]);
 int main(int argc, char* argv[])
 {
@@ -17,6 +17,7 @@ int main(int argc, char* argv[])
    {
        return profile_gemm(argc, argv);
    }
+#if 0
    else if(strcmp(argv[1], "conv_fwd") == 0)
    {
        return profile_conv_fwd(argc, argv);
@@ -33,6 +34,7 @@ int main(int argc, char* argv[])
    {
        return profile_conv_fwd_bias_relu_atomic_add(argc, argv);
    }
+#endif
    else
    {
        printf("arg1: tensor operation (gemm: GEMM;\n"