Split the instances by architecture. (#1223)

* parse examples inside the add_example_executable function * fix the example 64 cmake file * add xdl flag to the gemm_bias_softmax_gemm_permute example * add filtering of tests based on architecture type * enable test_grouped_gemm for gfx9 only * enable test_transpose only for gfx9 * only linnk test_transpose if it gets built * split the gemm instances by architectures * split gemm_bilinear,grouped_conv_bwd_weight instances by targets * split instances by architecture * split grouped_conv instances by architecture * fix clang format * fix the if-else logic in group_conv headers * small fix for grouped convolution instances * fix the grouped conv bwd weight dl instances * fix client examples * only enable client examples 3 and 4 on gfx9 * set the gfx9 macro * make sure the architecture macros are set by cmake * use separate set of xdl/wmma flags for host code * sinmplify the main cmake file * add conv_fwd_bf8 instance declaration

Split the instances by architecture. (#1223)
* parse examples inside the add_example_executable function * fix the example 64 cmake file * add xdl flag to the gemm_bias_softmax_gemm_permute example * add filtering of tests based on architecture type * enable test_grouped_gemm for gfx9 only * enable test_transpose only for gfx9 * only linnk test_transpose if it gets built * split the gemm instances by architectures * split gemm_bilinear,grouped_conv_bwd_weight instances by targets * split instances by architecture * split grouped_conv instances by architecture * fix clang format * fix the if-else logic in group_conv headers * small fix for grouped convolution instances * fix the grouped conv bwd weight dl instances * fix client examples * only enable client examples 3 and 4 on gfx9 * set the gfx9 macro * make sure the architecture macros are set by cmake * use separate set of xdl/wmma flags for host code * sinmplify the main cmake file * add conv_fwd_bf8 instance declaration
ae57e593 · Illia Silin · GitHub · 303d4594 · ae57e593 · ae57e593
Unverified Commit ae57e593 authored Apr 02, 2024 by Illia Silin Committed by GitHub Apr 02, 2024
20 changed files
--- a/test/batched_gemm_softmax_gemm_permute/CMakeLists.txt
+++ b/test/batched_gemm_softmax_gemm_permute/CMakeLists.txt
-list(APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942)
-set(target 0)
-foreach(gpu IN LISTS GPU_TARGETS)
- if(gpu IN_LIST gpu_list AND target EQUAL 0)
-    add_custom_target(test_batched_gemm_softmax_gemm_permute)
-    add_gtest_executable(test_batched_gemm_softmax_gemm_permute_fp16 test_batched_gemm_softmax_gemm_permute_fp16.cpp)
-    if(result EQUAL 0)
-      target_link_libraries(test_batched_gemm_softmax_gemm_permute_fp16 PRIVATE utility device_batched_gemm_softmax_gemm_permute_instance)
-      add_dependencies(test_batched_gemm_softmax_gemm_permute test_batched_gemm_softmax_gemm_permute_fp16)
-    endif()
-    add_gtest_executable(test_batched_gemm_bias_softmax_gemm_permute_fp16 test_batched_gemm_bias_softmax_gemm_permute_fp16.cpp)
-    if(result EQUAL 0)
-      target_link_libraries(test_batched_gemm_bias_softmax_gemm_permute_fp16 PRIVATE utility device_batched_gemm_softmax_gemm_permute_instance)
-      add_dependencies(test_batched_gemm_softmax_gemm_permute test_batched_gemm_bias_softmax_gemm_permute_fp16)
-    endif()
-   
-    add_gtest_executable(test_batched_gemm_softmax_gemm_permute_bf16 test_batched_gemm_softmax_gemm_permute_bf16.cpp)
-    if(result EQUAL 0)
-      target_link_libraries(test_batched_gemm_softmax_gemm_permute_bf16 PRIVATE utility device_batched_gemm_softmax_gemm_permute_instance)
-      add_dependencies(test_batched_gemm_softmax_gemm_permute test_batched_gemm_softmax_gemm_permute_bf16)
-    endif()
-    add_gtest_executable(test_batched_gemm_bias_softmax_gemm_permute_bf16 test_batched_gemm_bias_softmax_gemm_permute_bf16.cpp)
-    if(result EQUAL 0)
-      target_link_libraries(test_batched_gemm_bias_softmax_gemm_permute_bf16 PRIVATE utility device_batched_gemm_softmax_gemm_permute_instance)
-      add_dependencies(test_batched_gemm_softmax_gemm_permute test_batched_gemm_bias_softmax_gemm_permute_bf16)
-    endif()
-   set(target 1)
- endif()
-endforeach()
\ No newline at end of file
+add_custom_target(test_batched_gemm_softmax_gemm_permute)
+add_gtest_executable(test_batched_gemm_softmax_gemm_permute_fp16 test_batched_gemm_softmax_gemm_permute_fp16_xdl.cpp)
+if(result EQUAL 0)
+  target_link_libraries(test_batched_gemm_softmax_gemm_permute_fp16 PRIVATE utility device_batched_gemm_softmax_gemm_permute_instance)
+  add_dependencies(test_batched_gemm_softmax_gemm_permute test_batched_gemm_softmax_gemm_permute_fp16)
+endif()
+add_gtest_executable(test_batched_gemm_bias_softmax_gemm_permute_fp16 test_batched_gemm_bias_softmax_gemm_permute_fp16_xdl.cpp)
+if(result EQUAL 0)
+  target_link_libraries(test_batched_gemm_bias_softmax_gemm_permute_fp16 PRIVATE utility device_batched_gemm_softmax_gemm_permute_instance)
+  add_dependencies(test_batched_gemm_softmax_gemm_permute test_batched_gemm_bias_softmax_gemm_permute_fp16)
+endif()
+add_gtest_executable(test_batched_gemm_softmax_gemm_permute_bf16 test_batched_gemm_softmax_gemm_permute_bf16_xdl.cpp)
+if(result EQUAL 0)
+  target_link_libraries(test_batched_gemm_softmax_gemm_permute_bf16 PRIVATE utility device_batched_gemm_softmax_gemm_permute_instance)
+  add_dependencies(test_batched_gemm_softmax_gemm_permute test_batched_gemm_softmax_gemm_permute_bf16)
+endif()
+add_gtest_executable(test_batched_gemm_bias_softmax_gemm_permute_bf16 test_batched_gemm_bias_softmax_gemm_permute_bf16_xdl.cpp)
+if(result EQUAL 0)
+  target_link_libraries(test_batched_gemm_bias_softmax_gemm_permute_bf16 PRIVATE utility device_batched_gemm_softmax_gemm_permute_instance)
+  add_dependencies(test_batched_gemm_softmax_gemm_permute test_batched_gemm_bias_softmax_gemm_permute_bf16)
+endif()
--- a/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_bias_softmax_gemm_permute_bf16.cpp
+++ b/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_bias_softmax_gemm_permute_bf16.cpp
--- a/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_bias_softmax_gemm_permute_fp16.cpp
+++ b/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_bias_softmax_gemm_permute_fp16.cpp
--- a/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_bf16.cpp
+++ b/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_bf16.cpp
--- a/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_fp16.cpp
+++ b/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_fp16.cpp
--- a/test/contraction/CMakeLists.txt
+++ b/test/contraction/CMakeLists.txt
-list(APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942)
-set(target 0)
-foreach(gpu IN LISTS GPU_TARGETS)
-    if(gpu IN_LIST gpu_list AND target EQUAL 0)
-        if((DTYPES MATCHES "fp32" OR DTYPES MATCHES "fp64") OR NOT DEFINED DTYPES)
-            add_gtest_executable(test_contraction test_contraction.cpp)
-            target_link_libraries(test_contraction PRIVATE utility device_contraction_bilinear_instance device_contraction_scale_instance)
-            add_gtest_executable(test_contraction_interface test_contraction_interface.cpp)
-            target_link_libraries(test_contraction_interface PRIVATE utility device_contraction_bilinear_instance device_contraction_scale_instance)
-            set(target 1)
-        endif()
+if((DTYPES MATCHES "fp32" OR DTYPES MATCHES "fp64") OR NOT DEFINED DTYPES)
+    add_gtest_executable(test_contraction test_contraction_xdl.cpp)
+    if(result EQUAL 0)
+        target_link_libraries(test_contraction PRIVATE utility device_contraction_bilinear_instance device_contraction_scale_instance)
    endif()
-endforeach()
+    add_gtest_executable(test_contraction_interface test_contraction_interface_xdl.cpp)
+    if(result EQUAL 0)
+        target_link_libraries(test_contraction_interface PRIVATE utility device_contraction_bilinear_instance device_contraction_scale_instance)
+    endif()
+endif()
--- a/test/contraction/test_contraction_interface.cpp
+++ b/test/contraction/test_contraction_interface.cpp
--- a/test/contraction/test_contraction.cpp
+++ b/test/contraction/test_contraction.cpp
--- a/test/convnd_bwd_data/CMakeLists.txt
+++ b/test/convnd_bwd_data/CMakeLists.txt
-list(APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942)
-set(target 0)
-foreach(gpu IN LISTS GPU_TARGETS)
- if(gpu IN_LIST gpu_list AND target EQUAL 0)
-   add_gtest_executable(test_convnd_bwd_data convnd_bwd_data.cpp)
+add_gtest_executable(test_convnd_bwd_data convnd_bwd_data_xdl.cpp)
+if(result EQUAL 0)
   target_link_libraries(test_convnd_bwd_data PRIVATE utility device_conv1d_bwd_data_instance device_conv2d_bwd_data_instance device_conv3d_bwd_data_instance)
-   set(target 1)
- endif()
-endforeach()
\ No newline at end of file
+endif()
--- a/test/convnd_bwd_data/convnd_bwd_data.cpp
+++ b/test/convnd_bwd_data/convnd_bwd_data.cpp
--- a/test/convnd_fwd/CMakeLists.txt
+++ b/test/convnd_fwd/CMakeLists.txt
-list(APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942)
-set(target 0)
-foreach(gpu IN LISTS GPU_TARGETS)
- if(gpu IN_LIST gpu_list AND target EQUAL 0)
-   add_gtest_executable(test_convnd_fwd convnd_fwd.cpp)
+add_gtest_executable(test_convnd_fwd convnd_fwd_xdl.cpp)
+if(result EQUAL 0)
   target_link_libraries(test_convnd_fwd PRIVATE utility device_conv2d_fwd_instance)
-   set(target 1)
- endif()
-endforeach()
+endif()
--- a/test/convnd_fwd/convnd_fwd.cpp
+++ b/test/convnd_fwd/convnd_fwd.cpp
--- a/test/gemm_add/CMakeLists.txt
+++ b/test/gemm_add/CMakeLists.txt
-add_gtest_executable(test_gemm_add test_gemm_add.hpp)
-target_link_libraries(test_gemm_add PRIVATE utility device_gemm_add_instance)
+add_gtest_executable(test_gemm_add test_gemm_add_xdl.hpp)
+if(result EQUAL 0)
+    target_link_libraries(test_gemm_add PRIVATE utility device_gemm_add_instance)
+endif()

-add_gtest_executable(test_gemm_add_relu test_gemm_add_relu.cpp)
-target_link_libraries(test_gemm_add_relu PRIVATE utility device_gemm_add_instance device_gemm_add_relu_instance)
+add_gtest_executable(test_gemm_add_relu test_gemm_add_relu_xdl.cpp)
+if(result EQUAL 0)
+    target_link_libraries(test_gemm_add_relu PRIVATE utility device_gemm_add_instance device_gemm_add_relu_instance)
+endif()

-add_gtest_executable(test_gemm_add_silu test_gemm_add_silu.cpp)
-target_link_libraries(test_gemm_add_silu PRIVATE utility device_gemm_add_instance device_gemm_add_silu_instance)
+add_gtest_executable(test_gemm_add_silu test_gemm_add_silu_xdl.cpp)
+if(result EQUAL 0)
+    target_link_libraries(test_gemm_add_silu PRIVATE utility device_gemm_add_instance device_gemm_add_silu_instance)
+endif()

-add_gtest_executable(test_gemm_add_fastgelu test_gemm_add_fastgelu.cpp)
-target_link_libraries(test_gemm_add_fastgelu PRIVATE utility device_gemm_add_instance device_gemm_add_fastgelu_instance)
+add_gtest_executable(test_gemm_add_fastgelu test_gemm_add_fastgelu_xdl.cpp)
+if(result EQUAL 0)
+    target_link_libraries(test_gemm_add_fastgelu PRIVATE utility device_gemm_add_instance device_gemm_add_fastgelu_instance)
+endif()
--- a/test/gemm_add/test_gemm_add_fastgelu.cpp
+++ b/test/gemm_add/test_gemm_add_fastgelu.cpp
@@ -4,7 +4,7 @@
 #include "gtest/gtest.h"
 #include "ck/ck.hpp"
 #include "profiler/profile_gemm_add_fastgelu_impl.hpp"
-#include "test_gemm_add.hpp"
+#include "test_gemm_add_xdl.hpp"

 template <typename Tuple>
 class TestGemmAddFastgelu : public TestGemmAdd<Tuple>

--- a/test/gemm_add/test_gemm_add_relu.cpp
+++ b/test/gemm_add/test_gemm_add_relu.cpp
@@ -4,7 +4,7 @@
 #include "gtest/gtest.h"
 #include "ck/ck.hpp"
 #include "profiler/profile_gemm_add_relu_impl.hpp"
-#include "test_gemm_add.hpp"
+#include "test_gemm_add_xdl.hpp"

 template <typename Tuple>
 class TestGemmAddRelu : public TestGemmAdd<Tuple>

--- a/test/gemm_add/test_gemm_add_silu.cpp
+++ b/test/gemm_add/test_gemm_add_silu.cpp
@@ -4,7 +4,7 @@
 #include "gtest/gtest.h"
 #include "ck/ck.hpp"
 #include "profiler/profile_gemm_add_silu_impl.hpp"
-#include "test_gemm_add.hpp"
+#include "test_gemm_add_xdl.hpp"

 template <typename Tuple>
 class TestGemmAddSilu : public TestGemmAdd<Tuple>

--- a/test/gemm_add/test_gemm_add.hpp
+++ b/test/gemm_add/test_gemm_add.hpp
--- a/test/gemm_layernorm/CMakeLists.txt
+++ b/test/gemm_layernorm/CMakeLists.txt
-list(APPEND gpu_list gfx908 gfx90a gfx940 gfx941 gfx942)
-set(target 0)
-foreach(gpu IN LISTS GPU_TARGETS)
- if(gpu IN_LIST gpu_list AND target EQUAL 0)
-   add_custom_target(test_gemm_layernorm)
-   add_gtest_executable(test_gemm_add_relu_add_layernorm_fp16 test_gemm_add_relu_add_layernorm_fp16.cpp)
-   if(result EQUAL 0)
-     target_link_libraries(test_gemm_add_relu_add_layernorm_fp16 PRIVATE utility device_gemm_add_relu_add_layernorm_instance)
-     add_dependencies(test_gemm_layernorm test_gemm_add_relu_add_layernorm_fp16)
-     set(target 1)
-   endif()
- endif()
-endforeach()
+add_gtest_executable(test_gemm_add_relu_add_layernorm_fp16 test_gemm_add_relu_add_layernorm_fp16_xdl.cpp)
+if(result EQUAL 0)
+  add_custom_target(test_gemm_layernorm)
+  target_link_libraries(test_gemm_add_relu_add_layernorm_fp16 PRIVATE utility device_gemm_add_relu_add_layernorm_instance)
+  add_dependencies(test_gemm_layernorm test_gemm_add_relu_add_layernorm_fp16)
+endif()
--- a/test/gemm_layernorm/test_gemm_add_relu_add_layernorm_fp16.cpp
+++ b/test/gemm_layernorm/test_gemm_add_relu_add_layernorm_fp16.cpp
--- a/test/gemm_reduce/CMakeLists.txt
+++ b/test/gemm_reduce/CMakeLists.txt
-add_test_executable(test_gemm_reduce_fp16 gemm_reduce_fp16.cpp)
+add_test_executable(test_gemm_reduce_fp16 gemm_reduce_fp16_xdl.cpp)
 if(result EQUAL 0)
  target_link_libraries(test_gemm_reduce_fp16 PRIVATE utility device_gemm_reduce_instance)
 endif()
\ No newline at end of file