Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
a5137505
Unverified
Commit
a5137505
authored
Jan 06, 2025
by
arai713
Committed by
GitHub
Jan 06, 2025
Browse files
Merge branch 'develop' into codegen_hiprtc
parents
208a1dab
888317e6
Changes
259
Hide whitespace changes
Inline
Side-by-side
Showing
19 changed files
with
790 additions
and
137 deletions
+790
-137
script/process_qa_data.sh
script/process_qa_data.sh
+12
-0
script/run_full_performance_tests.sh
script/run_full_performance_tests.sh
+1
-1
script/run_gemm_performance_tests.sh
script/run_gemm_performance_tests.sh
+41
-0
script/run_performance_tests.sh
script/run_performance_tests.sh
+6
-15
test/ck_tile/CMakeLists.txt
test/ck_tile/CMakeLists.txt
+1
-0
test/ck_tile/batched_gemm/test_batched_gemm_util.hpp
test/ck_tile/batched_gemm/test_batched_gemm_util.hpp
+21
-22
test/ck_tile/gemm/CMakeLists.txt
test/ck_tile/gemm/CMakeLists.txt
+1
-1
test/ck_tile/gemm/test_gemm_pipeline.cpp
test/ck_tile/gemm/test_gemm_pipeline.cpp
+42
-0
test/ck_tile/gemm/test_gemm_pipeline_ut_cases.inc
test/ck_tile/gemm/test_gemm_pipeline_ut_cases.inc
+57
-0
test/ck_tile/gemm/test_gemm_pipeline_util.hpp
test/ck_tile/gemm/test_gemm_pipeline_util.hpp
+69
-59
test/ck_tile/grouped_gemm/CMakeLists.txt
test/ck_tile/grouped_gemm/CMakeLists.txt
+4
-0
test/ck_tile/grouped_gemm/test_grouped_gemm.cpp
test/ck_tile/grouped_gemm/test_grouped_gemm.cpp
+29
-0
test/ck_tile/grouped_gemm/test_grouped_gemm_ut_cases.inc
test/ck_tile/grouped_gemm/test_grouped_gemm_ut_cases.inc
+25
-0
test/ck_tile/grouped_gemm/test_grouped_gemm_util.hpp
test/ck_tile/grouped_gemm/test_grouped_gemm_util.hpp
+282
-0
test/data_type/test_custom_type.cpp
test/data_type/test_custom_type.cpp
+59
-23
test/grouped_convnd_bwd_data/CMakeLists.txt
test/grouped_convnd_bwd_data/CMakeLists.txt
+6
-2
test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data_wmma.cpp
...ped_convnd_bwd_data/test_grouped_convnd_bwd_data_wmma.cpp
+108
-0
test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data_xdl.cpp
...uped_convnd_bwd_data/test_grouped_convnd_bwd_data_xdl.cpp
+25
-14
test/grouped_convnd_fwd/test_grouped_convnd_fwd.cpp
test/grouped_convnd_fwd/test_grouped_convnd_fwd.cpp
+1
-0
No files found.
script/process_qa_data.sh
View file @
a5137505
...
...
@@ -24,6 +24,18 @@ python3 process_perf_data.py perf_splitK_gemm.log
python3 process_perf_data.py perf_onnx_gemm.log
python3 process_perf_data.py perf_mixed_gemm.log
file
=
./perf_onnx_gemm_gfx10.log
if
[
-e
"
$file
"
]
;
then
python3 process_perf_data.py perf_onnx_gemm_gfx10.log
fi
file
=
./perf_onnx_gemm_gfx11.log
if
[
-e
"
$file
"
]
;
then
python3 process_perf_data.py perf_onnx_gemm_gfx11.log
fi
file
=
./perf_onnx_gemm_gfx12.log
if
[
-e
"
$file
"
]
;
then
python3 process_perf_data.py perf_onnx_gemm_gfx12.log
fi
file
=
./perf_fmha_fwd_gfx942.log
if
[
-e
"
$file
"
]
;
then
python3 process_perf_data.py perf_fmha_fwd_gfx942.log
...
...
script/run_full_performance_tests.sh
View file @
a5137505
...
...
@@ -5,7 +5,7 @@
# post your new test results to the database and compare them to the baseline
# please contact Illia.Silin@amd.com for more details
#
# run the script as "./run_full_performance_tests.sh <verification> <tag for your test environment> <branch name> <
node name>
# run the script as "./run_full_performance_tests.sh <verification> <tag for your test environment> <branch name> <node name>
# input arguments:
# verification = 0 : do not verify result correctness on CPU
# = 1 : verifuy correctness on CPU (may take a long time)
...
...
script/run_gemm_performance_tests.sh
0 → 100755
View file @
a5137505
#!/bin/bash
#
# in order to run this script you'd first need to build the ckProfiler executable in ../build/bin/
# run the script as "./run_gemm_performance_tests.sh <verification> <tag for your test environment> <branch name> <node name> <arch>
# input arguments:
# verification = 0 : do not verify result correctness on CPU
# = 1 : verify correctness on CPU (may take a long time)
# environment tag : a string describing the specifics of your test environment
# branch name : name of the branch in git repo (git status | grep -e 'On branch')
# node name : $hostname
# arch : GPU architecture, e.g. "gfx9" or "gfx1100"
#get the command line arguments:
export
verify
=
$1
echo
'Verification: '
$verify
export
env_type
=
$2
echo
'Environment type: '
$env_type
export
branch
=
$3
echo
'Branch name: '
$branch
export
host_name
=
$4
echo
'Host name: '
$host_name
export arch
=
$5
echo
'GPU architecture: '
$arch
function
print_log_header
(){
rm
-f
$1
;
echo
'On branch '
$3
&>
$1
;
echo
'Node name: '
$4
>>
$1
;
#get GPU_arch and number of compute units from rocminfo
echo
-n
"GPU_arch: "
>>
$1
;
rocminfo |
grep
"Name:"
|
grep
"gfx"
>>
$1
;
rocminfo |
grep
"Compute Unit:"
>>
$1
;
hipcc
--version
|
grep
-e
'HIP version'
>>
$1
;
echo
'Environment type: '
$2
>>
$1
;
/opt/rocm/bin/amdclang++
--version
|
grep
-e
'InstalledDir'
>>
$1
;
}
#run ONNX gemm tests
export
onnx_log
=
"perf_onnx_gemm_
$arch
.log"
print_log_header
$onnx_log
$env_type
$branch
$host_name
./profile_onnx_gemm.sh gemm 0 0
$verify
1 0 1 2>&1 |
tee
-a
$onnx_log
./profile_onnx_gemm.sh gemm 1 0
$verify
1 0 1 2>&1 |
tee
-a
$onnx_log
script/run_performance_tests.sh
View file @
a5137505
#!/bin/bash
#
# in order to run this script you'd first need to build the ckProfiler executable in ../build/bin/
# run the script as "./run_performance_tests.sh <verification> <tag for your test environment> <branch name> <
node name>
# run the script as "./run_performance_tests.sh <verification> <tag for your test environment> <branch name> <node name>
# input arguments:
# verification = 0 : do not verify result correctness on CPU
# = 1 : verify correctness on CPU (may take a long time)
...
...
@@ -51,20 +51,11 @@ print_log_header $gemm_log $env_type $branch $host_name
./profile_gemm.sh gemm 2 3
$verify
1 0 1 |
tee
-a
$gemm_log
./profile_gemm.sh gemm 3 3
$verify
1 0 1 |
tee
-a
$gemm_log
#run grouped_fwd fp16 tests
export
grouped_conv_fwd_log
=
"perf_grouped_conv_fwd_fp16.log"
print_log_header
$conv_fwd_log
$env_type
$branch
$host_name
./profile_grouped_conv_fwd.sh grouped_conv_fwd 1 1 0
$verify
1 0 1 256 2>&1 |
tee
-a
$grouped_conv_fwd_log
#run grouped_bwd_data fp16 tests
export
grouped_conv_bwd_data_log
=
"perf_grouped_conv_bwd_data_fp16.log"
print_log_header
$grouped_conv_bwd_data_log
$env_type
$branch
$host_name
./profile_grouped_conv_bwd_data.sh grouped_conv_bwd_data 1 1
$verify
1 0 1 256 2>&1 |
tee
-a
$grouped_conv_bwd_data_log
#run grouped_bwd_weight fp16 tests
export
grouped_conv_bwd_weight_log
=
"perf_grouped_conv_bwd_weight_fp16.log"
print_log_header
$grouped_conv_bwd_weight_log
$env_type
$branch
$host_name
./profile_grouped_conv_bwd_weight.sh grouped_conv_bwd_weight 1 1
$verify
1 0 1 256 1 2>&1 |
tee
-a
$grouped_conv_bwd_weight_log
#run ONNX gemm tests
export
onnx_log
=
"perf_onnx_gemm.log"
print_log_header
$onnx_log
$env_type
$branch
$host_name
./profile_onnx_gemm.sh gemm 0 0
$verify
1 0 1 2>&1 |
tee
-a
$onnx_log
./profile_onnx_gemm.sh gemm 1 0
$verify
1 0 1 2>&1 |
tee
-a
$onnx_log
#run resnet50 tests
export
resnet256_log
=
"perf_resnet50_N256.log"
...
...
test/ck_tile/CMakeLists.txt
View file @
a5137505
add_subdirectory
(
image_to_column
)
add_subdirectory
(
gemm
)
add_subdirectory
(
batched_gemm
)
add_subdirectory
(
grouped_gemm
)
test/ck_tile/batched_gemm/test_batched_gemm_util.hpp
View file @
a5137505
...
...
@@ -24,12 +24,9 @@ class TestCkTileBatchedGemm : public ::testing::Test
using
AccDataType
=
std
::
tuple_element_t
<
5
,
Tuple
>
;
using
CDataType
=
std
::
tuple_element_t
<
6
,
Tuple
>
;
struct
batched_gemm_kargs
:
public
ck_tile
::
BatchedGemmHostArgs
{
};
template
<
typename
ALayout
,
typename
BLayout
,
typename
CLayout
>
void
invoke_batched_gemm
(
const
batched_gemm_kargs
&
args
,
const
ck_tile
::
stream_config
&
s
)
void
invoke_batched_gemm
(
const
ck_tile
::
BatchedGemmHostArgs
&
args
,
const
ck_tile
::
stream_config
&
s
)
{
// The kPadM, kPadN, kPadK & kBlockPerCu should also come from the Codegen part.
constexpr
bool
kPadM
=
false
;
...
...
@@ -94,9 +91,9 @@ class TestCkTileBatchedGemm : public ::testing::Test
using
Kernel
=
ck_tile
::
BatchedGemmKernel
<
TilePartitioner
,
CodegenGemmPipeline
,
GemmEpilogue
>
;
auto
kargs
=
Kernel
::
MakeK
a
rgs
(
args
);
auto
kargs
=
Kernel
::
MakeK
ernelA
rgs
(
args
);
const
dim3
grids
=
Kernel
::
GridSize
(
args
);
const
dim3
grids
=
Kernel
::
GridSize
(
args
.
M
,
args
.
N
,
args
.
k_batch
,
args
.
batch_count
);
constexpr
dim3
blocks
=
Kernel
::
BlockSize
();
if
(
s
.
log_level_
>
0
)
...
...
@@ -185,21 +182,23 @@ class TestCkTileBatchedGemm : public ::testing::Test
c_m_n_dev_buf
.
SetZero
();
c_m_n_dev_result
.
SetZero
();
batched_gemm_kargs
kargs
{
a_m_k_dev_buf
.
GetDeviceBuffer
(),
b_k_n_dev_buf
.
GetDeviceBuffer
(),
c_m_n_dev_buf
.
GetDeviceBuffer
(),
M
,
N
,
K
,
StrideA
,
StrideB
,
StrideC
,
BatchStrideA
,
BatchStrideB
,
BatchStrideC
,
BatchCount
};
invoke_batched_gemm
<
ALayout
,
BLayout
,
CLayout
>
(
kargs
,
ck_tile
::
BatchedGemmHostArgs
args
;
args
.
a_ptr
=
a_m_k_dev_buf
.
GetDeviceBuffer
();
args
.
b_ptr
=
b_k_n_dev_buf
.
GetDeviceBuffer
();
args
.
c_ptr
=
c_m_n_dev_buf
.
GetDeviceBuffer
();
args
.
k_batch
=
1
;
args
.
M
=
M
;
args
.
N
=
N
;
args
.
K
=
K
;
args
.
stride_A
=
StrideA
;
args
.
stride_B
=
StrideB
;
args
.
stride_C
=
StrideC
;
args
.
batch_stride_A
=
BatchStrideA
;
args
.
batch_stride_B
=
BatchStrideB
;
args
.
batch_stride_C
=
BatchStrideC
;
args
.
batch_count
=
BatchCount
;
invoke_batched_gemm
<
ALayout
,
BLayout
,
CLayout
>
(
args
,
ck_tile
::
stream_config
{
nullptr
,
false
});
std
::
cout
<<
"Run kernel with M ="
<<
M
<<
" N ="
<<
N
<<
" K ="
<<
K
...
...
test/ck_tile/gemm/CMakeLists.txt
View file @
a5137505
# Currently ck_tile is only built on gfx9
if
(
GPU_TARGETS MATCHES
"gfx9"
)
add_gtest_executable
(
test_ck_tile_gemm_
mem_
pipeline test_gemm_
mem_
pipeline.cpp
)
add_gtest_executable
(
test_ck_tile_gemm_pipeline test_gemm_pipeline.cpp
)
endif
()
test/ck_tile/gemm/test_gemm_pipeline.cpp
0 → 100644
View file @
a5137505
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include <tuple>
#include "gtest/gtest.h"
#include "ck_tile/host.hpp"
#include "test_gemm_pipeline_util.hpp"
using
F16
=
ck_tile
::
half_t
;
using
F32
=
float
;
using
Row
=
ck_tile
::
tensor_layout
::
gemm
::
RowMajor
;
using
Col
=
ck_tile
::
tensor_layout
::
gemm
::
ColumnMajor
;
using
Intrawave
=
ck_tile
::
integral_constant
<
ck_tile
::
GemmPipelineScheduler
,
ck_tile
::
GemmPipelineScheduler
::
Intrawave
>
;
using
Interwave
=
ck_tile
::
integral_constant
<
ck_tile
::
GemmPipelineScheduler
,
ck_tile
::
GemmPipelineScheduler
::
Interwave
>
;
using
Mem
=
ck_tile
::
integral_constant
<
GemmPipelineType
,
GemmPipelineType
::
Mem
>
;
using
Comp
=
ck_tile
::
integral_constant
<
GemmPipelineType
,
GemmPipelineType
::
Comp
>
;
// clang-format off
using
KernelTypes
=
::
testing
::
Types
<
// ALayout, BLayout, CLayout, ADataType, BDataType, AccDataType, CDataType, GemmPipelineScheduler, PipelineType
std
::
tuple
<
Row
,
Row
,
Row
,
F16
,
F16
,
F32
,
F16
,
Intrawave
,
Mem
>
,
std
::
tuple
<
Row
,
Row
,
Row
,
F16
,
F16
,
F32
,
F16
,
Intrawave
,
Comp
>
,
std
::
tuple
<
Row
,
Row
,
Row
,
F16
,
F16
,
F32
,
F16
,
Interwave
,
Mem
>
,
std
::
tuple
<
Row
,
Col
,
Row
,
F16
,
F16
,
F32
,
F16
,
Intrawave
,
Mem
>
,
std
::
tuple
<
Row
,
Col
,
Row
,
F16
,
F16
,
F32
,
F16
,
Intrawave
,
Comp
>
,
std
::
tuple
<
Row
,
Col
,
Row
,
F16
,
F16
,
F32
,
F16
,
Interwave
,
Mem
>
,
std
::
tuple
<
Col
,
Row
,
Row
,
F16
,
F16
,
F32
,
F16
,
Intrawave
,
Mem
>
,
std
::
tuple
<
Col
,
Row
,
Row
,
F16
,
F16
,
F32
,
F16
,
Intrawave
,
Comp
>
,
std
::
tuple
<
Col
,
Row
,
Row
,
F16
,
F16
,
F32
,
F16
,
Interwave
,
Mem
>
,
std
::
tuple
<
Col
,
Col
,
Row
,
F16
,
F16
,
F32
,
F16
,
Intrawave
,
Mem
>
,
std
::
tuple
<
Col
,
Col
,
Row
,
F16
,
F16
,
F32
,
F16
,
Intrawave
,
Comp
>
,
std
::
tuple
<
Col
,
Col
,
Row
,
F16
,
F16
,
F32
,
F16
,
Interwave
,
Mem
>
>
;
// clang-format on
TYPED_TEST_SUITE
(
TestCkTileGemmPipeline
,
KernelTypes
);
#include "test_gemm_pipeline_ut_cases.inc"
test/ck_tile/gemm/test_gemm_
mem_
pipeline_ut_cases.inc
→
test/ck_tile/gemm/test_gemm_pipeline_ut_cases.inc
View file @
a5137505
...
...
@@ -3,11 +3,7 @@
#pragma once
//------------------------------------------------------------------------------------------------
// INTERWAVE SCHEDULER
//------------------------------------------------------------------------------------------------
TYPED_TEST
(
TestCkTileGemmMemPipelineInterwave
,
SmallM
)
TYPED_TEST
(
TestCkTileGemmPipeline
,
SmallM
)
{
std
::
vector
<
int
>
Ms
{
1
,
2
,
3
,
4
,
5
,
6
};
constexpr
int
N
=
1024
;
...
...
@@ -17,7 +13,7 @@ TYPED_TEST(TestCkTileGemmMemPipelineInterwave, SmallM)
this
->
Run
(
M
,
N
,
K
);
}
TYPED_TEST
(
TestCkTileGemm
Mem
Pipeline
Interwave
,
MidLargeM
)
TYPED_TEST
(
TestCkTileGemmPipeline
,
MidLargeM
)
{
std
::
vector
<
int
>
Ms
{
127
,
255
,
312
,
799
,
1573
};
constexpr
int
N
=
1024
;
...
...
@@ -27,7 +23,7 @@ TYPED_TEST(TestCkTileGemmMemPipelineInterwave, MidLargeM)
this
->
Run
(
M
,
N
,
K
);
}
TYPED_TEST
(
TestCkTileGemm
Mem
Pipeline
Interwave
,
PaddK
)
TYPED_TEST
(
TestCkTileGemmPipeline
,
PaddK
)
{
std
::
vector
<
int
>
Ms
{
127
};
constexpr
int
N
=
1024
;
...
...
@@ -37,7 +33,7 @@ TYPED_TEST(TestCkTileGemmMemPipelineInterwave, PaddK)
this
->
Run
(
M
,
N
,
K
);
}
TYPED_TEST
(
TestCkTileGemm
Mem
Pipeline
Interwave
,
Regular
)
TYPED_TEST
(
TestCkTileGemmPipeline
,
Regular
)
{
std
::
vector
<
int
>
Ms
{
512
};
constexpr
int
N
=
1024
;
...
...
@@ -47,46 +43,15 @@ TYPED_TEST(TestCkTileGemmMemPipelineInterwave, Regular)
this
->
Run
(
M
,
N
,
K
);
}
//------------------------------------------------------------------------------------------------
// INTRAWAVE SCHEDULER
//------------------------------------------------------------------------------------------------
TYPED_TEST
(
TestCkTileGemmMemPipelineIntrawave
,
SmallM
)
TYPED_TEST
(
TestCkTileGemmPipeline
,
NotSupportedArgument
)
{
std
::
vector
<
int
>
Ms
{
1
,
2
,
3
,
4
,
5
,
6
};
constexpr
int
N
=
1024
;
constexpr
int
K
=
320
;
for
(
int
M
:
Ms
)
this
->
Run
(
M
,
N
,
K
);
}
constexpr
int
M
=
512
;
constexpr
int
N
=
1025
;
constexpr
int
K
=
513
;
TYPED_TEST
(
TestCkTileGemmMemPipelineIntrawave
,
MidLargeM
)
{
std
::
vector
<
int
>
Ms
{
127
,
255
,
312
,
799
,
1573
};
constexpr
int
N
=
1024
;
constexpr
int
K
=
320
;
for
(
int
M
:
Ms
)
this
->
Run
(
M
,
N
,
K
);
}
constexpr
bool
PadM
=
false
;
constexpr
bool
PadN
=
false
;
constexpr
bool
PadK
=
false
;
TYPED_TEST
(
TestCkTileGemmMemPipelineIntrawave
,
PaddK
)
{
std
::
vector
<
int
>
Ms
{
127
};
constexpr
int
N
=
1024
;
constexpr
int
K
=
432
;
for
(
int
M
:
Ms
)
this
->
Run
(
M
,
N
,
K
);
}
TYPED_TEST
(
TestCkTileGemmMemPipelineIntrawave
,
Regular
)
{
std
::
vector
<
int
>
Ms
{
512
};
constexpr
int
N
=
1024
;
constexpr
int
K
=
512
;
for
(
int
M
:
Ms
)
this
->
Run
(
M
,
N
,
K
);
EXPECT_THROW
((
this
->
template
Run
<
PadM
,
PadN
,
PadK
>
(
M
,
N
,
K
)),
std
::
runtime_error
);
}
test/ck_tile/gemm/test_gemm_
mem_
pipeline_util.hpp
→
test/ck_tile/gemm/test_gemm_pipeline_util.hpp
View file @
a5137505
...
...
@@ -11,35 +11,28 @@
#include "ck_tile/ops/epilogue.hpp"
#include "ck_tile/ops/gemm.hpp"
template
<
typename
Tuple
,
ck_tile
::
GemmPipelineScheduler
Scheduler_
>
class
TestCkTileGemmMemPipeline
:
public
::
testing
::
Test
enum
struct
GemmPipelineType
{
Mem
,
Comp
};
template
<
typename
Tuple
>
class
TestCkTileGemmPipeline
:
public
::
testing
::
Test
{
protected:
using
ALayout
=
std
::
tuple_element_t
<
0
,
Tuple
>
;
using
BLayout
=
std
::
tuple_element_t
<
1
,
Tuple
>
;
using
CLayout
=
std
::
tuple_element_t
<
2
,
Tuple
>
;
using
ADataType
=
std
::
tuple_element_t
<
3
,
Tuple
>
;
using
BDataType
=
std
::
tuple_element_t
<
4
,
Tuple
>
;
using
AccDataType
=
std
::
tuple_element_t
<
5
,
Tuple
>
;
using
CDataType
=
std
::
tuple_element_t
<
6
,
Tuple
>
;
static
constexpr
auto
Scheduler
=
Scheduler_
;
using
ALayout
=
std
::
tuple_element_t
<
0
,
Tuple
>
;
using
BLayout
=
std
::
tuple_element_t
<
1
,
Tuple
>
;
using
CLayout
=
std
::
tuple_element_t
<
2
,
Tuple
>
;
using
ADataType
=
std
::
tuple_element_t
<
3
,
Tuple
>
;
using
BDataType
=
std
::
tuple_element_t
<
4
,
Tuple
>
;
using
AccDataType
=
std
::
tuple_element_t
<
5
,
Tuple
>
;
using
CDataType
=
std
::
tuple_element_t
<
6
,
Tuple
>
;
static
constexpr
auto
Scheduler
=
std
::
tuple_element_t
<
7
,
Tuple
>::
value
;
static
constexpr
auto
PipelineType
=
std
::
tuple_element_t
<
8
,
Tuple
>::
value
;
// TODO: expose tile size through test t-param ?
struct
gemm_args
{
const
void
*
p_a
;
const
void
*
p_b
;
void
*
p_c
;
ck_tile
::
index_t
kbatch
;
ck_tile
::
index_t
M
;
ck_tile
::
index_t
N
;
ck_tile
::
index_t
K
;
ck_tile
::
index_t
stride_A
;
ck_tile
::
index_t
stride_B
;
ck_tile
::
index_t
stride_C
;
};
void
invoke_gemm
(
const
gemm_args
&
args
,
const
ck_tile
::
stream_config
&
s
)
template
<
bool
PadM
,
bool
PadN
,
bool
PadK
>
void
invoke_gemm
(
const
ck_tile
::
GemmHostArgs
&
args
,
const
ck_tile
::
stream_config
&
s
)
{
// TODO: This should be parameterized in tests
constexpr
ck_tile
::
index_t
M_Tile
=
128
;
...
...
@@ -54,9 +47,9 @@ class TestCkTileGemmMemPipeline : public ::testing::Test
constexpr
ck_tile
::
index_t
N_Warp_Tile
=
32
;
constexpr
ck_tile
::
index_t
K_Warp_Tile
=
8
;
constexpr
bool
kPadM
=
true
;
constexpr
bool
kPadN
=
true
;
constexpr
bool
kPadK
=
true
;
constexpr
bool
kPadM
=
PadM
;
constexpr
bool
kPadN
=
PadN
;
constexpr
bool
kPadK
=
PadK
;
constexpr
int
kBlockPerCu
=
1
;
...
...
@@ -73,10 +66,17 @@ class TestCkTileGemmMemPipeline : public ::testing::Test
using
Traits
=
ck_tile
::
TileGemmTraits
<
kPadM
,
kPadN
,
kPadK
,
ALayout
,
BLayout
,
CLayout
>
;
using
BaseGemmPipeline
=
ck_tile
::
BaseGemmPipelineAgBgCrMem
<
ck_tile
::
GemmPipelineProblem
<
ADataType
,
BDataType
,
AccDataType
,
GemmShape
,
Traits
>>
;
const
ck_tile
::
index_t
num_loop
=
TilePartitioner
::
GetLoopNum
(
args
.
K
);
using
BaseGemmPipeline
=
std
::
conditional_t
<
PipelineType
==
GemmPipelineType
::
Mem
,
ck_tile
::
BaseGemmPipelineAgBgCrMem
<
ck_tile
::
GemmPipelineProblem
<
ADataType
,
BDataType
,
AccDataType
,
GemmShape
,
Traits
>>
,
ck_tile
::
BaseGemmPipelineAgBgCrCompV3
<
ck_tile
::
GemmPipelineProblem
<
ADataType
,
BDataType
,
AccDataType
,
GemmShape
,
Traits
>>>
;
const
ck_tile
::
index_t
k_grain
=
args
.
k_batch
*
K_Tile
;
const
ck_tile
::
index_t
K_split
=
(
args
.
K
+
k_grain
-
1
)
/
k_grain
*
K_Tile
;
const
ck_tile
::
index_t
num_loop
=
TilePartitioner
::
GetLoopNum
(
K_split
);
const
bool
has_hot_loop
=
BaseGemmPipeline
::
BlockHasHotloop
(
num_loop
);
const
ck_tile
::
TailNumber
tail_num
=
BaseGemmPipeline
::
GetBlockLoopTailNum
(
num_loop
);
...
...
@@ -84,29 +84,37 @@ class TestCkTileGemmMemPipeline : public ::testing::Test
constexpr
bool
has_hot_loop_v
=
has_hot_loop_
.
value
;
constexpr
auto
tail_number_v
=
tail_number_
.
value
;
using
GemmPipeline
=
ck_tile
::
GemmPipelineAgBgCrMem
<
ck_tile
::
UniversalGemmPipelineProblem
<
ADataType
,
BDataType
,
AccDataType
,
GemmShape
,
Traits
,
Scheduler
,
has_hot_loop_v
,
tail_number_v
>>
;
using
GemmPipeline
=
std
::
conditional_t
<
PipelineType
==
GemmPipelineType
::
Mem
,
ck_tile
::
GemmPipelineAgBgCrMem
<
ck_tile
::
UniversalGemmPipelineProblem
<
ADataType
,
BDataType
,
AccDataType
,
GemmShape
,
Traits
,
Scheduler
,
has_hot_loop_v
,
tail_number_v
>>
,
ck_tile
::
GemmPipelineAgBgCrCompV3
<
ck_tile
::
UniversalGemmPipelineProblem
<
ADataType
,
BDataType
,
AccDataType
,
GemmShape
,
Traits
,
Scheduler
,
has_hot_loop_v
,
tail_number_v
>>>
;
using
Kernel
=
ck_tile
::
GemmKernel
<
TilePartitioner
,
GemmPipeline
,
GemmEpilogue
>
;
auto
kargs
=
Kernel
::
MakeKargs
(
args
.
p_a
,
args
.
p_b
,
args
.
p_c
,
args
.
M
,
args
.
N
,
args
.
K
,
args
.
stride_A
,
args
.
stride_B
,
args
.
stride_C
);
const
dim3
grids
=
Kernel
::
GridSize
(
args
.
M
,
args
.
N
,
args
.
kbatch
);
auto
kargs
=
Kernel
::
MakeKernelArgs
(
args
);
const
dim3
grids
=
Kernel
::
GridSize
(
args
.
M
,
args
.
N
,
args
.
k_batch
);
constexpr
dim3
blocks
=
Kernel
::
BlockSize
();
if
(
!
Kernel
::
IsSupportedArgument
(
kargs
))
{
throw
std
::
runtime_error
(
"Wrong! Arguments not supported! Skipping gemm!
\n
"
);
}
if
(
s
.
log_level_
>
0
)
{
std
::
cout
<<
"Launching kernel with args:"
...
...
@@ -212,6 +220,7 @@ class TestCkTileGemmMemPipeline : public ::testing::Test
void
SetUp
()
override
{
k_batches_
=
{
1
};
}
template
<
bool
PadM
=
true
,
bool
PadN
=
true
,
bool
PadK
=
true
>
void
Run
(
const
int
M
,
const
int
N
,
const
int
K
,
...
...
@@ -221,10 +230,11 @@ class TestCkTileGemmMemPipeline : public ::testing::Test
{
for
(
auto
kb
:
k_batches_
)
{
RunSingle
(
M
,
N
,
K
,
StrideA
,
StrideB
,
StrideC
,
kb
);
RunSingle
<
PadM
,
PadN
,
PadK
>
(
M
,
N
,
K
,
StrideA
,
StrideB
,
StrideC
,
kb
);
}
}
template
<
bool
PadM
,
bool
PadN
,
bool
PadK
>
void
RunSingle
(
const
int
M
,
const
int
N
,
const
int
K
,
...
...
@@ -289,11 +299,11 @@ class TestCkTileGemmMemPipeline : public ::testing::Test
c_m_n_dev_buf
.
SetZero
();
c_m_n_dev_result
.
SetZero
();
gemm_a
rgs
args
;
args
.
p_a
=
a_m_k_dev_buf
.
GetDeviceBuffer
();
args
.
p_b
=
b_k_n_dev_buf
.
GetDeviceBuffer
();
args
.
p_c
=
c_m_n_dev_buf
.
GetDeviceBuffer
();
args
.
kbatch
=
kbatch
;
ck_tile
::
GemmHostA
rgs
args
;
args
.
a_ptr
=
a_m_k_dev_buf
.
GetDeviceBuffer
();
args
.
b_ptr
=
b_k_n_dev_buf
.
GetDeviceBuffer
();
args
.
c_ptr
=
c_m_n_dev_buf
.
GetDeviceBuffer
();
args
.
k
_
batch
=
kbatch
;
args
.
M
=
M
;
args
.
N
=
N
;
args
.
K
=
K
;
...
...
@@ -301,7 +311,7 @@ class TestCkTileGemmMemPipeline : public ::testing::Test
args
.
stride_B
=
stride_B
;
args
.
stride_C
=
stride_C
;
invoke_gemm
(
args
,
ck_tile
::
stream_config
{
nullptr
,
false
});
invoke_gemm
<
PadM
,
PadN
,
PadK
>
(
args
,
ck_tile
::
stream_config
{
nullptr
,
false
});
c_m_n_dev_buf
.
FromDevice
(
c_m_n_dev_result
.
data
());
bool
pass
=
true
;
...
...
test/ck_tile/grouped_gemm/CMakeLists.txt
0 → 100644
View file @
a5137505
# Currently ck_tile is only built on gfx9
if
(
GPU_TARGETS MATCHES
"gfx9"
)
add_gtest_executable
(
test_ck_tile_grouped_gemm test_grouped_gemm.cpp
)
endif
()
test/ck_tile/gemm/test_g
emm_mem_pipeline
.cpp
→
test/ck_tile/
grouped_
gemm/test_g
rouped_gemm
.cpp
View file @
a5137505
...
...
@@ -6,37 +6,24 @@
#include "gtest/gtest.h"
#include "ck_tile/host.hpp"
#include "test_g
emm_mem_pipeline
_util.hpp"
#include "test_g
rouped_gemm
_util.hpp"
using
F16
=
ck_tile
::
half_t
;
using
F32
=
float
;
using
Row
=
ck_tile
::
tensor_layout
::
gemm
::
RowMajor
;
using
Col
=
ck_tile
::
tensor_layout
::
gemm
::
ColumnMajor
;
static
constexpr
auto
Intrawave
=
ck_tile
::
GemmPipelineScheduler
::
Intrawave
;
static
constexpr
auto
Interwave
=
ck_tile
::
GemmPipelineScheduler
::
Interwave
;
template
<
typename
Tuple
>
class
TestCkTileGemmMemPipelineIntrawave
:
public
TestCkTileGemmMemPipeline
<
Tuple
,
Intrawave
>
{
};
template
<
typename
Tuple
>
class
TestCkTileGemmMemPipelineInterwave
:
public
TestCkTileGemmMemPipeline
<
Tuple
,
Interwave
>
{
};
using
Row
=
ck_tile
::
tensor_layout
::
gemm
::
RowMajor
;
using
Col
=
ck_tile
::
tensor_layout
::
gemm
::
ColumnMajor
;
// clang-format off
using
KernelTypes
=
::
testing
::
Types
<
// ALayout, BLayout, CLayout, ADataType, BDataType, AccDataType, CDataType
std
::
tuple
<
Row
,
Col
,
Row
,
F16
,
F16
,
F32
,
F16
>
,
std
::
tuple
<
Col
,
Row
,
Row
,
F16
,
F16
,
F32
,
F16
>
,
std
::
tuple
<
Row
,
Row
,
Row
,
F16
,
F16
,
F32
,
F16
>
,
std
::
tuple
<
Col
,
Col
,
Row
,
F16
,
F16
,
F32
,
F16
>
//std::tuple< Col, Row, Row, F16, F16, F32, F16>,
std
::
tuple
<
Row
,
Col
,
Row
,
F16
,
F16
,
F32
,
F16
>
//,
//std::tuple< Col, Col, Row, F16, F16, F32, F16>
>
;
// clang-format on
TYPED_TEST_SUITE
(
TestCkTileGemmMemPipelineIntrawave
,
KernelTypes
);
TYPED_TEST_SUITE
(
TestCkTileGemmMemPipelineInterwave
,
KernelTypes
);
TYPED_TEST_SUITE
(
TestCkTileGroupedGemm
,
KernelTypes
);
#include "test_g
emm_mem_pipeline
_ut_cases.inc"
#include "test_g
rouped_gemm
_ut_cases.inc"
test/ck_tile/grouped_gemm/test_grouped_gemm_ut_cases.inc
0 → 100644
View file @
a5137505
#pragma once
TYPED_TEST
(
TestCkTileGroupedGemm
,
Basic
)
{
const
int
group_count
=
16
;
std
::
vector
<
int
>
Ms
;
std
::
vector
<
int
>
Ns
;
std
::
vector
<
int
>
Ks
;
std
::
vector
<
int
>
stride_As
;
std
::
vector
<
int
>
stride_Bs
;
std
::
vector
<
int
>
stride_Cs
;
for
(
int
i
=
0
;
i
<
group_count
;
i
++
)
{
Ms
.
push_back
(
256
+
256
*
i
);
Ns
.
push_back
(
128
+
128
*
i
);
Ks
.
push_back
(
128
+
64
*
i
);
stride_As
.
push_back
(
Ks
[
i
]);
stride_Bs
.
push_back
(
Ks
[
i
]);
stride_Cs
.
push_back
(
Ns
[
i
]);
}
this
->
Run
(
Ms
,
Ns
,
Ks
,
stride_As
,
stride_Bs
,
stride_Cs
,
group_count
);
}
test/ck_tile/grouped_gemm/test_grouped_gemm_util.hpp
0 → 100644
View file @
a5137505
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <sstream>
#include <gtest/gtest.h>
#include "ck_tile/core.hpp"
#include "ck_tile/host.hpp"
#include "ck_tile/host/kernel_launch.hpp"
#include "ck_tile/ops/epilogue.hpp"
#include "ck_tile/ops/gemm.hpp"
#include "ck_tile/ops/gemm/kernel/grouped_gemm_kernel.hpp"
template
<
typename
Tuple
>
class
TestCkTileGroupedGemm
:
public
::
testing
::
Test
{
protected:
using
ALayout
=
std
::
tuple_element_t
<
0
,
Tuple
>
;
using
BLayout
=
std
::
tuple_element_t
<
1
,
Tuple
>
;
using
CLayout
=
std
::
tuple_element_t
<
2
,
Tuple
>
;
using
ADataType
=
std
::
tuple_element_t
<
3
,
Tuple
>
;
using
BDataType
=
std
::
tuple_element_t
<
4
,
Tuple
>
;
using
AccDataType
=
std
::
tuple_element_t
<
5
,
Tuple
>
;
using
CDataType
=
std
::
tuple_element_t
<
6
,
Tuple
>
;
struct
GroupedGemKernelParam
{
static
const
bool
kPadM
=
false
;
static
const
bool
kPadN
=
false
;
static
const
bool
kPadK
=
false
;
static
const
bool
kTilePermute
=
false
;
static
const
ck_tile
::
index_t
kOutputRank
=
2
;
static
const
int
kBlockPerCu
=
1
;
static
const
ck_tile
::
index_t
M_Tile
=
128
;
static
const
ck_tile
::
index_t
N_Tile
=
128
;
static
const
ck_tile
::
index_t
K_Tile
=
32
;
static
const
ck_tile
::
index_t
M_Warp
=
2
;
static
const
ck_tile
::
index_t
N_Warp
=
2
;
static
const
ck_tile
::
index_t
K_Warp
=
1
;
static
const
ck_tile
::
index_t
M_Warp_Tile
=
32
;
static
const
ck_tile
::
index_t
N_Warp_Tile
=
32
;
static
const
ck_tile
::
index_t
K_Warp_Tile
=
8
;
};
using
CodegenGemmShape
=
ck_tile
::
TileGemmShape
<
ck_tile
::
sequence
<
GroupedGemKernelParam
::
M_Tile
,
GroupedGemKernelParam
::
N_Tile
,
GroupedGemKernelParam
::
K_Tile
>
,
ck_tile
::
sequence
<
GroupedGemKernelParam
::
M_Warp
,
GroupedGemKernelParam
::
N_Warp
,
GroupedGemKernelParam
::
K_Warp
>
,
ck_tile
::
sequence
<
GroupedGemKernelParam
::
M_Warp_Tile
,
GroupedGemKernelParam
::
N_Warp_Tile
,
GroupedGemKernelParam
::
K_Warp_Tile
>>
;
using
TilePartitioner
=
ck_tile
::
GemmTile1DPartitioner
<
CodegenGemmShape
>
;
template
<
typename
CLayout
>
using
GemmEpilogue
=
std
::
conditional_t
<
std
::
is_same_v
<
CLayout
,
ck_tile
::
tensor_layout
::
gemm
::
ColumnMajor
>
,
ck_tile
::
CShuffleEpilogue
<
ck_tile
::
CShuffleEpilogueProblem
<
AccDataType
,
CDataType
,
GroupedGemKernelParam
::
kPadM
,
GroupedGemKernelParam
::
kPadN
,
GroupedGemKernelParam
::
kTilePermute
,
GroupedGemKernelParam
::
kOutputRank
,
1
,
0
,
TilePartitioner
::
MPerBlock
,
TilePartitioner
::
NPerBlock
>>
,
ck_tile
::
Default2DEpilogue
<
ck_tile
::
Default2DEpilogueProblem
<
AccDataType
,
CDataType
,
GroupedGemKernelParam
::
kPadM
,
GroupedGemKernelParam
::
kPadN
>>>
;
template
<
typename
ALayout
,
typename
BLayout
,
typename
CLayout
>
using
CodegenGemmTraits
=
ck_tile
::
TileGemmTraits
<
GroupedGemKernelParam
::
kPadM
,
GroupedGemKernelParam
::
kPadN
,
GroupedGemKernelParam
::
kPadK
,
ALayout
,
BLayout
,
CLayout
>
;
template
<
typename
ALayout
,
typename
BLayout
,
typename
CLayout
>
using
CodegenPipelineProblem
=
ck_tile
::
GemmPipelineProblem
<
ADataType
,
BDataType
,
AccDataType
,
CodegenGemmShape
,
CodegenGemmTraits
<
ALayout
,
BLayout
,
CLayout
>>
;
using
CodegenGemmPolicy
=
ck_tile
::
UniversalGemmPipelineAgBgCrPolicy
;
template
<
typename
ALayout
,
typename
BLayout
,
typename
CLayout
>
using
CodegenGemmPipeline
=
ck_tile
::
GemmPipelineAGmemBGmemCRegV1
<
CodegenPipelineProblem
<
ALayout
,
BLayout
,
CLayout
>
,
CodegenGemmPolicy
>
;
template
<
typename
ALayout
,
typename
BLayout
,
typename
CLayout
>
using
Kernel
=
ck_tile
::
GroupedGemmKernel
<
TilePartitioner
,
CodegenGemmPipeline
<
ALayout
,
BLayout
,
CLayout
>
,
GemmEpilogue
<
CLayout
>>
;
using
grouped_gemm_kargs
=
ck_tile
::
GroupedGemmHostArgs
;
std
::
size_t
GetWorkspaceSize
(
const
std
::
vector
<
grouped_gemm_kargs
>&
gemm_descs
)
{
return
Kernel
<
std
::
nullptr_t
,
std
::
nullptr_t
,
std
::
nullptr_t
>::
GetWorkSpaceSize
(
gemm_descs
);
}
template
<
typename
ALayout
,
typename
BLayout
,
typename
CLayout
>
void
invoke_grouped_gemm
(
const
std
::
vector
<
grouped_gemm_kargs
>&
gemm_descs
,
const
ck_tile
::
stream_config
&
s
,
void
*
p_workspace_
)
{
using
GroupedGemmKernel
=
Kernel
<
ALayout
,
BLayout
,
CLayout
>
;
auto
arguments
=
GroupedGemmKernel
::
MakeKargs
(
gemm_descs
);
const
dim3
grids
=
GroupedGemmKernel
::
GridSize
(
gemm_descs
);
constexpr
dim3
blocks
=
GroupedGemmKernel
::
BlockSize
();
ck_tile
::
hip_check_error
(
hipMemcpyWithStream
(
p_workspace_
,
arguments
.
data
(),
arguments
.
size
()
*
sizeof
(
typename
GroupedGemmKernel
::
GemmTransKernelArg
),
hipMemcpyHostToDevice
,
s
.
stream_id_
));
if
(
s
.
log_level_
>
0
)
{
std
::
cout
<<
"Launching kernel with args:"
<<
" grid: {"
<<
grids
.
x
<<
", "
<<
grids
.
y
<<
", "
<<
grids
.
z
<<
"}"
<<
", blocks: {"
<<
blocks
.
x
<<
", "
<<
blocks
.
y
<<
", "
<<
blocks
.
z
<<
"}"
<<
std
::
endl
;
}
ck_tile
::
launch_kernel
(
s
,
ck_tile
::
make_kernel
<
blocks
.
x
,
GroupedGemKernelParam
::
kBlockPerCu
>
(
GroupedGemmKernel
{},
grids
,
blocks
,
0
,
ck_tile
::
cast_pointer_to_constant_address_space
(
p_workspace_
),
gemm_descs
.
size
()));
}
public:
void
Run
(
const
std
::
vector
<
int
>&
Ms
,
const
std
::
vector
<
int
>&
Ns
,
const
std
::
vector
<
int
>&
Ks
,
std
::
vector
<
int
>&
stride_As
,
std
::
vector
<
int
>&
stride_Bs
,
std
::
vector
<
int
>&
stride_Cs
,
const
int
group_count
=
16
)
{
using
namespace
ck_tile
::
literals
;
auto
f_host_tensor_descriptor
=
[](
std
::
size_t
row
,
std
::
size_t
col
,
std
::
size_t
stride
,
auto
layout
)
{
if
constexpr
(
std
::
is_same_v
<
decltype
(
layout
),
ck_tile
::
tensor_layout
::
gemm
::
RowMajor
>
)
{
return
ck_tile
::
HostTensorDescriptor
({
row
,
col
},
{
stride
,
1
_uz
});
}
else
{
return
ck_tile
::
HostTensorDescriptor
({
row
,
col
},
{
1
_uz
,
stride
});
}
};
auto
f_get_default_stride
=
[](
std
::
size_t
row
,
std
::
size_t
col
,
std
::
size_t
stride
,
auto
layout
)
{
if
(
stride
==
0
)
{
if
constexpr
(
std
::
is_same_v
<
decltype
(
layout
),
ck_tile
::
tensor_layout
::
gemm
::
RowMajor
>
)
{
return
col
;
}
else
{
return
row
;
}
}
else
return
stride
;
};
std
::
vector
<
ck_tile
::
HostTensor
<
ADataType
>>
a_m_k_tensors
;
std
::
vector
<
ck_tile
::
HostTensor
<
BDataType
>>
b_k_n_tensors
;
std
::
vector
<
ck_tile
::
HostTensor
<
CDataType
>>
c_m_n_tensors
;
a_m_k_tensors
.
reserve
(
group_count
);
b_k_n_tensors
.
reserve
(
group_count
);
c_m_n_tensors
.
reserve
(
group_count
);
std
::
vector
<
std
::
unique_ptr
<
ck_tile
::
DeviceMem
>>
a_m_k_dev_buf
;
std
::
vector
<
std
::
unique_ptr
<
ck_tile
::
DeviceMem
>>
b_k_n_dev_buf
;
std
::
vector
<
std
::
unique_ptr
<
ck_tile
::
DeviceMem
>>
c_m_n_dev_buf
;
a_m_k_dev_buf
.
reserve
(
group_count
);
b_k_n_dev_buf
.
reserve
(
group_count
);
c_m_n_dev_buf
.
reserve
(
group_count
);
std
::
vector
<
grouped_gemm_kargs
>
gemm_descs
;
gemm_descs
.
reserve
(
group_count
);
for
(
int
i
=
0
;
i
<
group_count
;
++
i
)
{
const
ck_tile
::
index_t
M
=
Ms
[
i
];
const
ck_tile
::
index_t
N
=
Ns
[
i
];
const
ck_tile
::
index_t
K
=
Ks
[
i
];
stride_As
[
i
]
=
f_get_default_stride
(
M
,
N
,
stride_As
[
i
],
ALayout
{});
stride_Bs
[
i
]
=
f_get_default_stride
(
K
,
N
,
stride_Bs
[
i
],
BLayout
{});
stride_Cs
[
i
]
=
f_get_default_stride
(
M
,
N
,
stride_Cs
[
i
],
CLayout
{});
a_m_k_tensors
.
push_back
(
ck_tile
::
HostTensor
<
ADataType
>
(
f_host_tensor_descriptor
(
M
,
K
,
stride_As
[
i
],
ALayout
{})));
b_k_n_tensors
.
push_back
(
ck_tile
::
HostTensor
<
BDataType
>
(
f_host_tensor_descriptor
(
K
,
N
,
stride_Bs
[
i
],
BLayout
{})));
c_m_n_tensors
.
push_back
(
ck_tile
::
HostTensor
<
CDataType
>
(
f_host_tensor_descriptor
(
M
,
N
,
stride_Cs
[
i
],
CLayout
{})));
std
::
cout
<<
"gemm["
<<
i
<<
"]"
<<
" a_m_k: "
<<
a_m_k_tensors
[
i
].
mDesc
<<
" b_k_n: "
<<
b_k_n_tensors
[
i
].
mDesc
<<
" c_m_n: "
<<
c_m_n_tensors
[
i
].
mDesc
<<
std
::
endl
;
ck_tile
::
FillUniformDistribution
<
ADataType
>
{
-
5.
f
,
5.
f
}(
a_m_k_tensors
[
i
]);
ck_tile
::
FillUniformDistribution
<
BDataType
>
{
-
5.
f
,
5.
f
}(
b_k_n_tensors
[
i
]);
a_m_k_dev_buf
.
push_back
(
std
::
make_unique
<
ck_tile
::
DeviceMem
>
(
a_m_k_tensors
[
i
].
get_element_space_size_in_bytes
()));
b_k_n_dev_buf
.
push_back
(
std
::
make_unique
<
ck_tile
::
DeviceMem
>
(
b_k_n_tensors
[
i
].
get_element_space_size_in_bytes
()));
c_m_n_dev_buf
.
push_back
(
std
::
make_unique
<
ck_tile
::
DeviceMem
>
(
c_m_n_tensors
[
i
].
get_element_space_size_in_bytes
()));
a_m_k_dev_buf
[
i
]
->
ToDevice
(
a_m_k_tensors
[
i
].
data
());
b_k_n_dev_buf
[
i
]
->
ToDevice
(
b_k_n_tensors
[
i
].
data
());
c_m_n_dev_buf
[
i
]
->
SetZero
();
c_m_n_tensors
[
i
].
SetZero
();
const
void
*
p_a
=
a_m_k_dev_buf
[
i
]
->
GetDeviceBuffer
();
const
void
*
p_b
=
b_k_n_dev_buf
[
i
]
->
GetDeviceBuffer
();
void
*
p_c
=
c_m_n_dev_buf
[
i
]
->
GetDeviceBuffer
();
gemm_descs
.
push_back
(
{
p_a
,
p_b
,
p_c
,
M
,
N
,
K
,
stride_As
[
i
],
stride_Bs
[
i
],
stride_Cs
[
i
]});
}
ck_tile
::
DeviceMem
gemm_workspace
;
gemm_workspace
.
Realloc
(
GetWorkspaceSize
(
gemm_descs
));
invoke_grouped_gemm
<
ALayout
,
BLayout
,
CLayout
>
(
gemm_descs
,
ck_tile
::
stream_config
{
nullptr
,
false
},
gemm_workspace
.
GetDeviceBuffer
());
for
(
int
i
=
0
;
i
<
group_count
;
i
++
)
{
c_m_n_dev_buf
[
i
]
->
FromDevice
(
c_m_n_tensors
[
i
].
data
());
}
bool
pass
{
true
};
for
(
int
i
=
0
;
i
<
group_count
;
++
i
)
{
ck_tile
::
HostTensor
<
CDataType
>
c_m_n_host_ref
(
f_host_tensor_descriptor
(
Ms
[
i
],
Ns
[
i
],
stride_Cs
[
i
],
CLayout
{}));
c_m_n_host_ref
.
SetZero
();
ck_tile
::
reference_gemm
<
ADataType
,
BDataType
,
AccDataType
,
CDataType
>
(
a_m_k_tensors
[
i
],
b_k_n_tensors
[
i
],
c_m_n_host_ref
);
pass
&=
ck_tile
::
check_err
(
c_m_n_tensors
[
i
],
c_m_n_host_ref
);
}
EXPECT_TRUE
(
pass
);
}
};
test/data_type/test_custom_type.cpp
View file @
a5137505
...
...
@@ -51,8 +51,11 @@ TEST(Custom_bool, TestAsType)
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
custom_bool_t
>()(
Number
<
i
>
{})
=
custom_bool_t
{
test_vec
.
at
(
i
)};
});
// copy the vector
vector_type
<
custom_bool_t
,
size
>
left_vec
{
right_vec
};
vector_type
<
custom_bool_t
,
size
>
left_vec
;
// check copy assignment op
left_vec
=
right_vec
;
// overwrite right_vec with 0s
right_vec
=
vector_type
<
custom_bool_t
,
size
>
{};
// check if values were copied correctly
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_bool_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
...
...
@@ -129,8 +132,11 @@ TEST(Custom_int8, TestAsType)
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
custom_int8_t
>()(
Number
<
i
>
{})
=
custom_int8_t
{
test_vec
.
at
(
i
)};
});
// copy the vector
vector_type
<
custom_int8_t
,
size
>
left_vec
{
right_vec
};
vector_type
<
custom_int8_t
,
size
>
left_vec
;
// check copy assignment op
left_vec
=
right_vec
;
// overwrite right_vec with 0s
right_vec
=
vector_type
<
custom_int8_t
,
size
>
{};
// check if values were copied correctly
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_int8_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
...
...
@@ -207,8 +213,11 @@ TEST(Custom_uint8, TestAsType)
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
custom_uint8_t
>()(
Number
<
i
>
{})
=
custom_uint8_t
{
test_vec
.
at
(
i
)};
});
// copy the vector
vector_type
<
custom_uint8_t
,
size
>
left_vec
{
right_vec
};
vector_type
<
custom_uint8_t
,
size
>
left_vec
;
// check copy assignment op
left_vec
=
right_vec
;
// overwrite right_vec with 0s
right_vec
=
vector_type
<
custom_uint8_t
,
size
>
{};
// check if values were copied correctly
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_uint8_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
...
...
@@ -287,8 +296,11 @@ TEST(Custom_f8, TestAsType)
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
custom_f8_t
>()(
Number
<
i
>
{})
=
custom_f8_t
{
test_vec
.
at
(
i
)};
});
// copy the vector
vector_type
<
custom_f8_t
,
size
>
left_vec
{
right_vec
};
vector_type
<
custom_f8_t
,
size
>
left_vec
;
// check copy assignment op
left_vec
=
right_vec
;
// overwrite right_vec with 0s
right_vec
=
vector_type
<
custom_f8_t
,
size
>
{};
// check if values were copied correctly
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_f8_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
...
...
@@ -369,8 +381,11 @@ TEST(Custom_bf8, TestAsType)
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
custom_bf8_t
>()(
Number
<
i
>
{})
=
custom_bf8_t
{
test_vec
.
at
(
i
)};
});
// copy the vector
vector_type
<
custom_bf8_t
,
size
>
left_vec
{
right_vec
};
vector_type
<
custom_bf8_t
,
size
>
left_vec
;
// check copy assignment op
left_vec
=
right_vec
;
// overwrite right_vec with 0s
right_vec
=
vector_type
<
custom_bf8_t
,
size
>
{};
// check if values were copied correctly
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_bf8_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
...
...
@@ -450,8 +465,11 @@ TEST(Custom_half, TestAsType)
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
custom_half_t
>()(
Number
<
i
>
{})
=
custom_half_t
{
test_vec
.
at
(
i
)};
});
// copy the vector
vector_type
<
custom_half_t
,
size
>
left_vec
{
right_vec
};
vector_type
<
custom_half_t
,
size
>
left_vec
;
// check copy assignment op
left_vec
=
right_vec
;
// overwrite right_vec with 0s
right_vec
=
vector_type
<
custom_half_t
,
size
>
{};
// check if values were copied correctly
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_half_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
...
...
@@ -533,8 +551,11 @@ TEST(Custom_bhalf, TestAsType)
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
custom_bhalf_t
>()(
Number
<
i
>
{})
=
custom_bhalf_t
{
test_vec
.
at
(
i
)};
});
// copy the vector
vector_type
<
custom_bhalf_t
,
size
>
left_vec
{
right_vec
};
vector_type
<
custom_bhalf_t
,
size
>
left_vec
;
// check copy assignment op
left_vec
=
right_vec
;
// overwrite right_vec with 0s
right_vec
=
vector_type
<
custom_bhalf_t
,
size
>
{};
// check if values were copied correctly
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_bhalf_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
...
...
@@ -615,8 +636,11 @@ TEST(Custom_float, TestAsType)
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
custom_float_t
>()(
Number
<
i
>
{})
=
custom_float_t
{
test_vec
.
at
(
i
)};
});
// copy the vector
vector_type
<
custom_float_t
,
size
>
left_vec
{
right_vec
};
vector_type
<
custom_float_t
,
size
>
left_vec
;
// check copy assignment op
left_vec
=
right_vec
;
// overwrite right_vec with 0s
right_vec
=
vector_type
<
custom_float_t
,
size
>
{};
// check if values were copied correctly
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_float_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
...
...
@@ -693,8 +717,11 @@ TEST(Custom_double, TestAsType)
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
custom_double_t
>()(
Number
<
i
>
{})
=
custom_double_t
{
test_vec
.
at
(
i
)};
});
// copy the vector
vector_type
<
custom_double_t
,
size
>
left_vec
{
right_vec
};
vector_type
<
custom_double_t
,
size
>
left_vec
;
// check copy assignment op
left_vec
=
right_vec
;
// overwrite right_vec with 0s
right_vec
=
vector_type
<
custom_double_t
,
size
>
{};
// check if values were copied correctly
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_double_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
...
...
@@ -813,8 +840,11 @@ TEST(Complex_half, TestAsType)
right_vec
.
template
AsType
<
complex_half_t
>()(
Number
<
i
>
{})
=
complex_half_t
{
test_vec
.
at
(
num_elem
*
i
),
test_vec
.
at
(
num_elem
*
i
+
1
)};
});
// copy the vector
vector_type
<
complex_half_t
,
size
>
left_vec
{
right_vec
};
vector_type
<
complex_half_t
,
size
>
left_vec
;
// check copy assignment op
left_vec
=
right_vec
;
// overwrite right_vec with 0s
right_vec
=
vector_type
<
complex_half_t
,
size
>
{};
// check if values were copied correctly
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
complex_half_t
>()(
Number
<
i
>
{}).
real
,
...
...
@@ -907,8 +937,11 @@ TEST(FP8OCP, TestAsType)
right_vec
.
template
AsType
<
f8_t
>()(
Number
<
i
>
{})
=
ck
::
type_convert
<
f8_t
>
(
test_vec
.
at
(
i
));
});
// copy the vector
vector_type
<
f8_t
,
size
>
left_vec
{
right_vec
};
vector_type
<
f8_t
,
size
>
left_vec
;
// check copy assignment op
left_vec
=
right_vec
;
// overwrite right_vec with 0s
right_vec
=
vector_type
<
f8_t
,
size
>
{};
// check if values were copied correctly
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
...
...
@@ -984,8 +1017,11 @@ TEST(BF8OCP, TestAsType)
right_vec
.
template
AsType
<
bf8_t
>()(
Number
<
i
>
{})
=
ck
::
type_convert
<
bf8_t
>
(
test_vec
.
at
(
i
));
});
// copy the vector
vector_type
<
bf8_t
,
size
>
left_vec
{
right_vec
};
// check copy assignment op
left_vec
=
right_vec
;
// overwrite right_vec with 0s
right_vec
=
vector_type
<
bf8_t
,
size
>
{};
// check if values were copied correctly
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
...
...
test/grouped_convnd_bwd_data/CMakeLists.txt
View file @
a5137505
add_gtest_executable
(
test_grouped_convnd_bwd_data test_grouped_convnd_bwd_data_xdl
_wmma
.cpp
)
add_gtest_executable
(
test_grouped_convnd_bwd_data
_xdl
test_grouped_convnd_bwd_data_xdl.cpp
)
if
(
result EQUAL 0
)
target_link_libraries
(
test_grouped_convnd_bwd_data PRIVATE utility device_grouped_conv2d_bwd_data_instance device_grouped_conv3d_bwd_data_instance
)
target_link_libraries
(
test_grouped_convnd_bwd_data_xdl PRIVATE utility device_grouped_conv2d_bwd_data_instance device_grouped_conv3d_bwd_data_instance
)
endif
()
add_gtest_executable
(
test_grouped_convnd_bwd_data_wmma test_grouped_convnd_bwd_data_wmma.cpp
)
if
(
result EQUAL 0
)
target_link_libraries
(
test_grouped_convnd_bwd_data_wmma PRIVATE utility device_grouped_conv2d_bwd_data_instance device_grouped_conv3d_bwd_data_instance
)
endif
()
add_gtest_executable
(
test_grouped_convnd_bwd_data_interface_xdl test_grouped_convnd_bwd_data_interface_xdl.cpp
)
if
(
result EQUAL 0
)
...
...
test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data_wmma.cpp
0 → 100644
View file @
a5137505
// SPDX-License-Identifier: MIT
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib>
#include <iostream>
#include <initializer_list>
#include <tuple>
#include <vector>
#include <gtest/gtest.h>
#include "profiler/profile_grouped_conv_bwd_data_impl.hpp"
template
<
typename
Tuple
>
class
TestGroupedConvndBwdDataWmma
:
public
::
testing
::
Test
{
protected:
using
DataType
=
std
::
tuple_element_t
<
0
,
Tuple
>
;
using
OutLayout
=
std
::
tuple_element_t
<
1
,
Tuple
>
;
using
WeiLayout
=
std
::
tuple_element_t
<
2
,
Tuple
>
;
using
InLayout
=
std
::
tuple_element_t
<
3
,
Tuple
>
;
std
::
vector
<
ck
::
utils
::
conv
::
ConvParam
>
conv_params
;
template
<
ck
::
index_t
NDimSpatial
>
void
Run
()
{
EXPECT_FALSE
(
conv_params
.
empty
());
bool
pass
=
true
;
for
(
auto
&
param
:
conv_params
)
{
pass
=
pass
&&
ck
::
profiler
::
profile_grouped_conv_bwd_data_impl
<
NDimSpatial
,
OutLayout
,
WeiLayout
,
InLayout
,
DataType
,
DataType
,
DataType
>
(
true
,
// do_verification
1
,
// init_method: integer value
false
,
// do_log
false
,
// time_kernel
param
);
}
EXPECT_TRUE
(
pass
);
}
};
using
namespace
ck
::
tensor_layout
::
convolution
;
using
KernelTypes2d
=
::
testing
::
Types
<
std
::
tuple
<
ck
::
half_t
,
GNHWK
,
GKYXC
,
GNHWC
>
,
std
::
tuple
<
int8_t
,
GNHWK
,
GKYXC
,
GNHWC
>
,
std
::
tuple
<
ck
::
half_t
,
NHWGK
,
GKYXC
,
NHWGC
>
,
std
::
tuple
<
int8_t
,
NHWGK
,
GKYXC
,
NHWGC
>>
;
using
KernelTypes3d
=
::
testing
::
Types
<
std
::
tuple
<
ck
::
half_t
,
GNDHWK
,
GKZYXC
,
GNDHWC
>
,
std
::
tuple
<
int8_t
,
GNDHWK
,
GKZYXC
,
GNDHWC
>
,
std
::
tuple
<
ck
::
half_t
,
NDHWGK
,
GKZYXC
,
NDHWGC
>
,
std
::
tuple
<
int8_t
,
NDHWGK
,
GKZYXC
,
NDHWGC
>>
;
template
<
typename
Tuple
>
class
TestGroupedConvndBwdDataWmma2d
:
public
TestGroupedConvndBwdDataWmma
<
Tuple
>
{
};
template
<
typename
Tuple
>
class
TestGroupedConvndBwdDataWmma3d
:
public
TestGroupedConvndBwdDataWmma
<
Tuple
>
{
};
TYPED_TEST_SUITE
(
TestGroupedConvndBwdDataWmma2d
,
KernelTypes2d
);
TYPED_TEST_SUITE
(
TestGroupedConvndBwdDataWmma3d
,
KernelTypes3d
);
TYPED_TEST
(
TestGroupedConvndBwdDataWmma2d
,
Test2D
)
{
this
->
conv_params
.
clear
();
this
->
conv_params
.
push_back
(
{
2
,
2
,
4
,
192
,
192
,
{
3
,
3
},
{
28
,
28
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
this
->
conv_params
.
push_back
(
{
2
,
2
,
128
,
128
,
256
,
{
3
,
3
},
{
14
,
14
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
this
->
conv_params
.
push_back
(
{
2
,
2
,
128
,
128
,
256
,
{
1
,
1
},
{
7
,
7
},
{
2
,
2
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
this
->
conv_params
.
push_back
(
{
2
,
2
,
128
,
128
,
256
,
{
1
,
1
},
{
3
,
3
},
{
1
,
1
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
this
->
conv_params
.
push_back
({
2
,
1
,
1
,
1
,
32
,
{
8
,
8
},
{
32
,
32
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
this
->
conv_params
.
push_back
({
2
,
1
,
1
,
64
,
3
,
{
8
,
8
},
{
32
,
32
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
this
->
conv_params
.
push_back
({
2
,
1
,
1
,
1
,
1
,
{
8
,
8
},
{
32
,
32
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
this
->
template
Run
<
2
>();
}
TYPED_TEST
(
TestGroupedConvndBwdDataWmma3d
,
Test3D
)
{
this
->
conv_params
.
clear
();
this
->
conv_params
.
push_back
(
{
3
,
2
,
16
,
128
,
256
,
{
1
,
1
,
1
},
{
7
,
7
,
7
},
{
2
,
2
,
2
},
{
1
,
1
,
1
},
{
0
,
0
,
0
},
{
0
,
0
,
0
}});
this
->
conv_params
.
push_back
(
{
3
,
2
,
2
,
128
,
256
,
{
3
,
3
,
3
},
{
14
,
14
,
3
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}});
this
->
conv_params
.
push_back
(
{
3
,
2
,
32
,
128
,
256
,
{
1
,
1
,
1
},
{
3
,
3
,
3
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
0
,
0
,
0
},
{
0
,
0
,
0
}});
this
->
conv_params
.
push_back
(
{
3
,
1
,
1
,
1
,
32
,
{
3
,
3
,
3
},
{
32
,
32
,
32
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}});
this
->
conv_params
.
push_back
(
{
3
,
1
,
1
,
64
,
3
,
{
3
,
3
,
3
},
{
32
,
32
,
32
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}});
this
->
conv_params
.
push_back
(
{
3
,
1
,
1
,
1
,
1
,
{
3
,
3
,
3
},
{
32
,
32
,
32
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}});
this
->
template
Run
<
3
>();
}
test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data_xdl
_wmma
.cpp
→
test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data_xdl.cpp
View file @
a5137505
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
4
, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib>
#include <iostream>
...
...
@@ -12,7 +12,7 @@
#include "profiler/profile_grouped_conv_bwd_data_impl.hpp"
template
<
typename
Tuple
>
class
TestGroupedConvndBwdData
:
public
::
testing
::
Test
class
TestGroupedConvndBwdData
Xdl
:
public
::
testing
::
Test
{
protected:
using
DataType
=
std
::
tuple_element_t
<
0
,
Tuple
>
;
...
...
@@ -51,35 +51,31 @@ using namespace ck::tensor_layout::convolution;
using
KernelTypes2d
=
::
testing
::
Types
<
std
::
tuple
<
float
,
GNHWK
,
GKYXC
,
GNHWC
>
,
std
::
tuple
<
ck
::
half_t
,
GNHWK
,
GKYXC
,
GNHWC
>
,
std
::
tuple
<
ck
::
bhalf_t
,
GNHWK
,
GKYXC
,
GNHWC
>
,
std
::
tuple
<
int8_t
,
GNHWK
,
GKYXC
,
GNHWC
>
,
std
::
tuple
<
float
,
NHWGK
,
GKYXC
,
NHWGC
>
,
std
::
tuple
<
ck
::
half_t
,
NHWGK
,
GKYXC
,
NHWGC
>
,
std
::
tuple
<
ck
::
bhalf_t
,
NHWGK
,
GKYXC
,
NHWGC
>
,
std
::
tuple
<
int8_t
,
NHWGK
,
GKYXC
,
NHWGC
>>
;
std
::
tuple
<
ck
::
bhalf_t
,
NHWGK
,
GKYXC
,
NHWGC
>>
;
using
KernelTypes3d
=
::
testing
::
Types
<
std
::
tuple
<
float
,
GNDHWK
,
GKZYXC
,
GNDHWC
>
,
std
::
tuple
<
ck
::
half_t
,
GNDHWK
,
GKZYXC
,
GNDHWC
>
,
std
::
tuple
<
ck
::
bhalf_t
,
GNDHWK
,
GKZYXC
,
GNDHWC
>
,
std
::
tuple
<
int8_t
,
GNDHWK
,
GKZYXC
,
GNDHWC
>
,
std
::
tuple
<
float
,
NDHWGK
,
GKZYXC
,
NDHWGC
>
,
std
::
tuple
<
ck
::
half_t
,
NDHWGK
,
GKZYXC
,
NDHWGC
>
,
std
::
tuple
<
ck
::
bhalf_t
,
NDHWGK
,
GKZYXC
,
NDHWGC
>
,
std
::
tuple
<
int8_t
,
NDHWGK
,
GKZYXC
,
NDHWGC
>>
;
std
::
tuple
<
ck
::
bhalf_t
,
NDHWGK
,
GKZYXC
,
NDHWGC
>>
;
template
<
typename
Tuple
>
class
TestGroupedConvndBwdData2d
:
public
TestGroupedConvndBwdData
<
Tuple
>
class
TestGroupedConvndBwdData
Xdl
2d
:
public
TestGroupedConvndBwdData
Xdl
<
Tuple
>
{
};
template
<
typename
Tuple
>
class
TestGroupedConvndBwdData3d
:
public
TestGroupedConvndBwdData
<
Tuple
>
class
TestGroupedConvndBwdData
Xdl
3d
:
public
TestGroupedConvndBwdData
Xdl
<
Tuple
>
{
};
TYPED_TEST_SUITE
(
TestGroupedConvndBwdData2d
,
KernelTypes2d
);
TYPED_TEST_SUITE
(
TestGroupedConvndBwdData3d
,
KernelTypes3d
);
TYPED_TEST_SUITE
(
TestGroupedConvndBwdData
Xdl
2d
,
KernelTypes2d
);
TYPED_TEST_SUITE
(
TestGroupedConvndBwdData
Xdl
3d
,
KernelTypes3d
);
TYPED_TEST
(
TestGroupedConvndBwdData2d
,
Test2D
)
TYPED_TEST
(
TestGroupedConvndBwdData
Xdl
2d
,
Test2D
)
{
this
->
conv_params
.
clear
();
...
...
@@ -94,10 +90,13 @@ TYPED_TEST(TestGroupedConvndBwdData2d, Test2D)
this
->
conv_params
.
push_back
({
2
,
1
,
1
,
1
,
32
,
{
8
,
8
},
{
32
,
32
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
this
->
conv_params
.
push_back
({
2
,
1
,
1
,
64
,
3
,
{
8
,
8
},
{
32
,
32
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
this
->
conv_params
.
push_back
({
2
,
1
,
1
,
1
,
1
,
{
8
,
8
},
{
32
,
32
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
// SplitN case
this
->
conv_params
.
push_back
(
{
2
,
1
,
128
,
4
,
192
,
{
2
,
2
},
{
224
,
224
},
{
224
,
224
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
this
->
template
Run
<
2
>();
}
TYPED_TEST
(
TestGroupedConvndBwdData3d
,
Test3D
)
TYPED_TEST
(
TestGroupedConvndBwdData
Xdl
3d
,
Test3D
)
{
this
->
conv_params
.
clear
();
this
->
conv_params
.
push_back
(
...
...
@@ -112,5 +111,17 @@ TYPED_TEST(TestGroupedConvndBwdData3d, Test3D)
{
3
,
1
,
1
,
64
,
3
,
{
3
,
3
,
3
},
{
32
,
32
,
32
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}});
this
->
conv_params
.
push_back
(
{
3
,
1
,
1
,
1
,
1
,
{
3
,
3
,
3
},
{
32
,
32
,
32
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}});
// SplitN case
this
->
conv_params
.
push_back
({
3
,
1
,
128
,
4
,
192
,
{
2
,
2
,
2
},
{
2
,
224
,
224
},
{
1
,
224
,
224
},
{
1
,
1
,
1
},
{
0
,
0
,
0
},
{
0
,
0
,
0
}});
this
->
template
Run
<
3
>();
}
test/grouped_convnd_fwd/test_grouped_convnd_fwd.cpp
View file @
a5137505
...
...
@@ -64,6 +64,7 @@ using KernelTypes2d = ::testing::Types<std::tuple<float, GNHWC, GKYXC, GNHWK>,
std
::
tuple
<
int8_t
,
NHWGC
,
GKYXC
,
NHWGK
>
,
std
::
tuple
<
float
,
NGCHW
,
GKYXC
,
NGKHW
>
,
std
::
tuple
<
ck
::
half_t
,
NGCHW
,
GKYXC
,
NGKHW
>
,
std
::
tuple
<
ck
::
bhalf_t
,
NGCHW
,
GKYXC
,
NGKHW
>
,
std
::
tuple
<
int8_t
,
NGCHW
,
GKYXC
,
NGKHW
>>
;
using
KernelTypes3d
=
::
testing
::
Types
<
std
::
tuple
<
float
,
GNDHWC
,
GKZYXC
,
GNDHWK
>
,
...
...
Prev
1
…
9
10
11
12
13
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment