Disable tests for gfx90a

f65bb1a1 · Bartlomiej Kocot · 0f48e38a · f65bb1a1 · f65bb1a1 · f65bb1a1
Commit f65bb1a1 authored Jun 11, 2023 by Bartlomiej Kocot
3 changed files
--- a/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multiple_d_dl.hpp
+++ b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multiple_d_dl.hpp
@@ -70,9 +70,10 @@ __global__ void
            const ComputePtrOffsetOfBatch compute_ptr_offset_of_batch,
            const Block2CTileMap block_2_ctile_map)
 {
-#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx906__) || defined(__gfx908__) ||             \
+// TODO: Enable for gfx90a after complier fix
-    defined(__gfx90a__) || defined(__gfx940__) || defined(__gfx1030__) || defined(__gfx1100__) || \
+#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx906__) || defined(__gfx908__) ||              \
-    defined(__gfx1101__) || defined(__gfx1102__))
+    defined(__gfx940__) || defined(__gfx1030__) || defined(__gfx1100__) || defined(__gfx1101__) || \
+    defined(__gfx1102__))
    const index_t num_blocks_per_batch =
        __builtin_amdgcn_readfirstlane(get_grid_size() / batch_count);
@@ -648,10 +649,11 @@ struct DeviceBatchedGemmMultipleD_Dl : public DeviceBatchedGemmMultiD<ALayout,
    static bool IsSupportedArgument(const Argument& arg)
    {
+        // TODO: Enable for gfx90a after complier fix
        if(ck::get_device_name() == "gfx906" || ck::get_device_name() == "gfx908" ||
-           ck::get_device_name() == "gfx90a" || ck::get_device_name() == "gfx1030" ||
+           ck::get_device_name() == "gfx1030" || ck::get_device_name() == "gfx940" ||
-           ck::get_device_name() == "gfx940" || ck::get_device_name() == "gfx1100" ||
+           ck::get_device_name() == "gfx1100" || ck::get_device_name() == "gfx1101" ||
-           ck::get_device_name() == "gfx1101" || ck::get_device_name() == "gfx1102")
+           ck::get_device_name() == "gfx1102")
        {
            bool pass = true;
            pass      = pass && arg.K_ % K1 == 0;

--- a/profiler/include/profiler/profile_batched_gemm_impl.hpp
+++ b/profiler/include/profiler/profile_batched_gemm_impl.hpp
@@ -141,7 +141,7 @@ bool profile_batched_gemm_impl(int do_verification,
    for(auto& op_ptr : op_ptrs)
    {
        std::unique_ptr<tensor_operation::device::BaseArgument> argument_ptr;
-        // true branch for multi d dl kernel
+        // false branch for multi d dl kernel
        if constexpr(std::is_same<
                         DeviceOp,
                         ck::tensor_operation::device::DeviceBatchedGemm<ALayout,

--- a/test/batched_gemm_multi_d/CMakeLists.txt
+++ b/test/batched_gemm_multi_d/CMakeLists.txt
-add_gtest_executable(test_batched_gemm_multi_d test_batched_gemm_multi_d.cpp)
+# TODO: Enable for gfx90a after complier fix
-target_link_libraries(test_batched_gemm_multi_d PRIVATE utility device_batched_gemm_multi_d_instance)
+if(NOT GPU_TARGETS MATCHES "gfx90a")
+    add_gtest_executable(test_batched_gemm_multi_d test_batched_gemm_multi_d.cpp)
+    target_link_libraries(test_batched_gemm_multi_d PRIVATE utility device_batched_gemm_multi_d_instance)
+endif()