Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
f65bb1a1
Commit
f65bb1a1
authored
Jun 11, 2023
by
Bartlomiej Kocot
Browse files
Disable tests for gfx90a
parent
0f48e38a
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
14 additions
and
9 deletions
+14
-9
include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multiple_d_dl.hpp
...ion/gpu/device/impl/device_batched_gemm_multiple_d_dl.hpp
+8
-6
profiler/include/profiler/profile_batched_gemm_impl.hpp
profiler/include/profiler/profile_batched_gemm_impl.hpp
+1
-1
test/batched_gemm_multi_d/CMakeLists.txt
test/batched_gemm_multi_d/CMakeLists.txt
+5
-2
No files found.
include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multiple_d_dl.hpp
View file @
f65bb1a1
...
@@ -70,9 +70,10 @@ __global__ void
...
@@ -70,9 +70,10 @@ __global__ void
const
ComputePtrOffsetOfBatch
compute_ptr_offset_of_batch
,
const
ComputePtrOffsetOfBatch
compute_ptr_offset_of_batch
,
const
Block2CTileMap
block_2_ctile_map
)
const
Block2CTileMap
block_2_ctile_map
)
{
{
#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx906__) || defined(__gfx908__) || \
// TODO: Enable for gfx90a after complier fix
defined(__gfx90a__) || defined(__gfx940__) || defined(__gfx1030__) || defined(__gfx1100__) || \
#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx906__) || defined(__gfx908__) || \
defined(__gfx1101__) || defined(__gfx1102__))
defined(__gfx940__) || defined(__gfx1030__) || defined(__gfx1100__) || defined(__gfx1101__) || \
defined(__gfx1102__))
const
index_t
num_blocks_per_batch
=
const
index_t
num_blocks_per_batch
=
__builtin_amdgcn_readfirstlane
(
get_grid_size
()
/
batch_count
);
__builtin_amdgcn_readfirstlane
(
get_grid_size
()
/
batch_count
);
...
@@ -648,10 +649,11 @@ struct DeviceBatchedGemmMultipleD_Dl : public DeviceBatchedGemmMultiD<ALayout,
...
@@ -648,10 +649,11 @@ struct DeviceBatchedGemmMultipleD_Dl : public DeviceBatchedGemmMultiD<ALayout,
static
bool
IsSupportedArgument
(
const
Argument
&
arg
)
static
bool
IsSupportedArgument
(
const
Argument
&
arg
)
{
{
// TODO: Enable for gfx90a after complier fix
if
(
ck
::
get_device_name
()
==
"gfx906"
||
ck
::
get_device_name
()
==
"gfx908"
||
if
(
ck
::
get_device_name
()
==
"gfx906"
||
ck
::
get_device_name
()
==
"gfx908"
||
ck
::
get_device_name
()
==
"gfx
90a
"
||
ck
::
get_device_name
()
==
"gfx
103
0"
||
ck
::
get_device_name
()
==
"gfx
1030
"
||
ck
::
get_device_name
()
==
"gfx
94
0"
||
ck
::
get_device_name
()
==
"gfx
94
0"
||
ck
::
get_device_name
()
==
"gfx110
0
"
||
ck
::
get_device_name
()
==
"gfx
110
0"
||
ck
::
get_device_name
()
==
"gfx110
1
"
||
ck
::
get_device_name
()
==
"gfx1101"
||
ck
::
get_device_name
()
==
"gfx1102"
)
ck
::
get_device_name
()
==
"gfx1102"
)
{
{
bool
pass
=
true
;
bool
pass
=
true
;
pass
=
pass
&&
arg
.
K_
%
K1
==
0
;
pass
=
pass
&&
arg
.
K_
%
K1
==
0
;
...
...
profiler/include/profiler/profile_batched_gemm_impl.hpp
View file @
f65bb1a1
...
@@ -141,7 +141,7 @@ bool profile_batched_gemm_impl(int do_verification,
...
@@ -141,7 +141,7 @@ bool profile_batched_gemm_impl(int do_verification,
for
(
auto
&
op_ptr
:
op_ptrs
)
for
(
auto
&
op_ptr
:
op_ptrs
)
{
{
std
::
unique_ptr
<
tensor_operation
::
device
::
BaseArgument
>
argument_ptr
;
std
::
unique_ptr
<
tensor_operation
::
device
::
BaseArgument
>
argument_ptr
;
//
tru
e branch for multi d dl kernel
//
fals
e branch for multi d dl kernel
if
constexpr
(
std
::
is_same
<
if
constexpr
(
std
::
is_same
<
DeviceOp
,
DeviceOp
,
ck
::
tensor_operation
::
device
::
DeviceBatchedGemm
<
ALayout
,
ck
::
tensor_operation
::
device
::
DeviceBatchedGemm
<
ALayout
,
...
...
test/batched_gemm_multi_d/CMakeLists.txt
View file @
f65bb1a1
add_gtest_executable
(
test_batched_gemm_multi_d test_batched_gemm_multi_d.cpp
)
# TODO: Enable for gfx90a after complier fix
target_link_libraries
(
test_batched_gemm_multi_d PRIVATE utility device_batched_gemm_multi_d_instance
)
if
(
NOT GPU_TARGETS MATCHES
"gfx90a"
)
add_gtest_executable
(
test_batched_gemm_multi_d test_batched_gemm_multi_d.cpp
)
target_link_libraries
(
test_batched_gemm_multi_d PRIVATE utility device_batched_gemm_multi_d_instance
)
endif
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment