Commit f65bb1a1 authored by Bartlomiej Kocot's avatar Bartlomiej Kocot
Browse files

Disable tests for gfx90a

parent 0f48e38a
...@@ -70,9 +70,10 @@ __global__ void ...@@ -70,9 +70,10 @@ __global__ void
const ComputePtrOffsetOfBatch compute_ptr_offset_of_batch, const ComputePtrOffsetOfBatch compute_ptr_offset_of_batch,
const Block2CTileMap block_2_ctile_map) const Block2CTileMap block_2_ctile_map)
{ {
#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx906__) || defined(__gfx908__) || \ // TODO: Enable for gfx90a after complier fix
defined(__gfx90a__) || defined(__gfx940__) || defined(__gfx1030__) || defined(__gfx1100__) || \ #if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx906__) || defined(__gfx908__) || \
defined(__gfx1101__) || defined(__gfx1102__)) defined(__gfx940__) || defined(__gfx1030__) || defined(__gfx1100__) || defined(__gfx1101__) || \
defined(__gfx1102__))
const index_t num_blocks_per_batch = const index_t num_blocks_per_batch =
__builtin_amdgcn_readfirstlane(get_grid_size() / batch_count); __builtin_amdgcn_readfirstlane(get_grid_size() / batch_count);
...@@ -648,10 +649,11 @@ struct DeviceBatchedGemmMultipleD_Dl : public DeviceBatchedGemmMultiD<ALayout, ...@@ -648,10 +649,11 @@ struct DeviceBatchedGemmMultipleD_Dl : public DeviceBatchedGemmMultiD<ALayout,
static bool IsSupportedArgument(const Argument& arg) static bool IsSupportedArgument(const Argument& arg)
{ {
// TODO: Enable for gfx90a after complier fix
if(ck::get_device_name() == "gfx906" || ck::get_device_name() == "gfx908" || if(ck::get_device_name() == "gfx906" || ck::get_device_name() == "gfx908" ||
ck::get_device_name() == "gfx90a" || ck::get_device_name() == "gfx1030" || ck::get_device_name() == "gfx1030" || ck::get_device_name() == "gfx940" ||
ck::get_device_name() == "gfx940" || ck::get_device_name() == "gfx1100" || ck::get_device_name() == "gfx1100" || ck::get_device_name() == "gfx1101" ||
ck::get_device_name() == "gfx1101" || ck::get_device_name() == "gfx1102") ck::get_device_name() == "gfx1102")
{ {
bool pass = true; bool pass = true;
pass = pass && arg.K_ % K1 == 0; pass = pass && arg.K_ % K1 == 0;
......
...@@ -141,7 +141,7 @@ bool profile_batched_gemm_impl(int do_verification, ...@@ -141,7 +141,7 @@ bool profile_batched_gemm_impl(int do_verification,
for(auto& op_ptr : op_ptrs) for(auto& op_ptr : op_ptrs)
{ {
std::unique_ptr<tensor_operation::device::BaseArgument> argument_ptr; std::unique_ptr<tensor_operation::device::BaseArgument> argument_ptr;
// true branch for multi d dl kernel // false branch for multi d dl kernel
if constexpr(std::is_same< if constexpr(std::is_same<
DeviceOp, DeviceOp,
ck::tensor_operation::device::DeviceBatchedGemm<ALayout, ck::tensor_operation::device::DeviceBatchedGemm<ALayout,
......
add_gtest_executable(test_batched_gemm_multi_d test_batched_gemm_multi_d.cpp) # TODO: Enable for gfx90a after complier fix
target_link_libraries(test_batched_gemm_multi_d PRIVATE utility device_batched_gemm_multi_d_instance) if(NOT GPU_TARGETS MATCHES "gfx90a")
add_gtest_executable(test_batched_gemm_multi_d test_batched_gemm_multi_d.cpp)
target_link_libraries(test_batched_gemm_multi_d PRIVATE utility device_batched_gemm_multi_d_instance)
endif()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment