Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
13587ab3
Unverified
Commit
13587ab3
authored
Oct 23, 2022
by
arai713
Committed by
GitHub
Oct 23, 2022
Browse files
Merge branch 'develop' into gridwise_2d
parents
7e44fd84
685860c2
Changes
338
Hide whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
302 additions
and
734 deletions
+302
-734
profiler/include/profile_groupnorm_impl.hpp
profiler/include/profile_groupnorm_impl.hpp
+9
-9
profiler/include/profile_layernorm_impl.hpp
profiler/include/profile_layernorm_impl.hpp
+22
-22
profiler/include/profile_softmax_impl.hpp
profiler/include/profile_softmax_impl.hpp
+10
-10
profiler/src/profile_layernorm.cpp
profiler/src/profile_layernorm.cpp
+6
-25
profiler/src/profile_softmax.cpp
profiler/src/profile_softmax.cpp
+41
-43
test/CMakeLists.txt
test/CMakeLists.txt
+3
-4
test/batched_gemm_gemm/test_batched_gemm_gemm_util.hpp
test/batched_gemm_gemm/test_batched_gemm_gemm_util.hpp
+1
-1
test/batched_gemm_masking_scale_softmax_gemm_permute/test_batched_gemm_masking_scale_softmax_gemm_permute_util.hpp
..._batched_gemm_masking_scale_softmax_gemm_permute_util.hpp
+1
-1
test/batched_gemm_softmax_gemm/test_batched_gemm_softmax_gemm_util.hpp
...gemm_softmax_gemm/test_batched_gemm_softmax_gemm_util.hpp
+1
-1
test/convnd_bwd_data/convnd_bwd_data.cpp
test/convnd_bwd_data/convnd_bwd_data.cpp
+62
-210
test/convnd_bwd_weight/convnd_bwd_weight.cpp
test/convnd_bwd_weight/convnd_bwd_weight.cpp
+61
-176
test/convnd_fwd/convnd_fwd.cpp
test/convnd_fwd/convnd_fwd.cpp
+62
-211
test/normalization/CMakeLists.txt
test/normalization/CMakeLists.txt
+0
-0
test/normalization/test_groupnorm_fp16.cpp
test/normalization/test_groupnorm_fp16.cpp
+2
-0
test/normalization/test_groupnorm_fp32.cpp
test/normalization/test_groupnorm_fp32.cpp
+0
-0
test/normalization/test_layernorm2d_fp16.cpp
test/normalization/test_layernorm2d_fp16.cpp
+0
-0
test/normalization/test_layernorm2d_fp32.cpp
test/normalization/test_layernorm2d_fp32.cpp
+0
-0
test/normalization/test_layernorm2d_util.hpp
test/normalization/test_layernorm2d_util.hpp
+21
-21
No files found.
profiler/include/profile_groupnorm_impl.hpp
View file @
13587ab3
...
...
@@ -7,7 +7,7 @@
#include "ck/ck.hpp"
#include "ck/library/tensor_operation_instance/gpu/
layernorm
.hpp"
#include "ck/library/tensor_operation_instance/gpu/
normalization
.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/device_memory.hpp"
...
...
@@ -75,14 +75,14 @@ bool profile_groupnorm_impl(int do_verification,
beta_dev
.
ToDevice
(
beta
.
mData
.
data
());
// add device normalization instances
using
DeviceOp
=
ck
::
tensor_operation
::
device
::
Device
Layernorm
<
XDataType
,
GammaDataType
,
BetaDataType
,
AccDataType
,
YDataType
,
PassThrough
,
5
,
3
>
;
using
DeviceOp
=
ck
::
tensor_operation
::
device
::
Device
Normalization
<
XDataType
,
GammaDataType
,
BetaDataType
,
AccDataType
,
YDataType
,
PassThrough
,
5
,
3
>
;
// get device op instances
const
auto
instance_ptrs
=
...
...
profiler/include/profile_layernorm_impl.hpp
View file @
13587ab3
...
...
@@ -6,9 +6,7 @@
#include <iomanip>
#include "ck/ck.hpp"
#include "ck/library/tensor_operation_instance/gpu/layernorm.hpp"
#include "ck/library/tensor_operation_instance/gpu/normalization.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/host_tensor.hpp"
...
...
@@ -28,27 +26,29 @@ void profile_layernorm_impl(int do_verification,
int
init_method
,
bool
do_log
,
bool
time_kernel
,
std
::
vector
<
index_t
>
length
,
std
::
vector
<
index_t
>
strideXY
,
std
::
vector
<
index_t
>
strideGamma
,
std
::
vector
<
index_t
>
strideBeta
)
std
::
vector
<
index_t
>
length
)
{
using
PassThrough
=
ck
::
tensor_operation
::
element_wise
::
PassThrough
;
if
(
length
.
size
()
<
2
)
return
;
// Assume normalize dimension except for first dimension
// Assume normalize dimension except for
batch (
first
)
dimension
std
::
vector
<
index_t
>
reduce_length
{
length
.
begin
()
+
1
,
length
.
end
()};
std
::
vector
<
index_t
>
reduce_dim
;
for
(
int
i
=
1
;
i
<
Rank
;
++
i
)
reduce_dim
.
push_back
(
i
);
Tensor
<
XDataType
>
x
(
length
);
Tensor
<
GammaDataType
>
gamma
(
reduce_length
,
strideGamma
);
Tensor
<
BetaDataType
>
beta
(
reduce_length
,
strideBeta
);
Tensor
<
YDataType
>
y
(
length
,
strideXY
);
Tensor
<
YDataType
>
host_y
(
length
,
strideXY
);
Tensor
<
GammaDataType
>
gamma
(
reduce_length
);
Tensor
<
BetaDataType
>
beta
(
reduce_length
);
Tensor
<
YDataType
>
y
(
length
);
Tensor
<
YDataType
>
host_y
(
length
);
std
::
vector
<
index_t
>
strideXY
=
std
::
vector
<
ck
::
index_t
>
{
x
.
mDesc
.
GetStrides
().
begin
(),
x
.
mDesc
.
GetStrides
().
end
()};
std
::
vector
<
index_t
>
strideGammaBeta
=
strideXY
;
strideGammaBeta
[
0
]
=
0
;
switch
(
init_method
)
{
...
...
@@ -84,14 +84,14 @@ void profile_layernorm_impl(int do_verification,
constexpr
int
NumReduceDim
=
Rank
-
1
;
// add device normalization instances
using
DeviceOp
=
ck
::
tensor_operation
::
device
::
Device
Layernorm
<
XDataType
,
GammaDataType
,
BetaDataType
,
AccDataType
,
YDataType
,
PassThrough
,
Rank
,
NumReduceDim
>
;
using
DeviceOp
=
ck
::
tensor_operation
::
device
::
Device
Normalization
<
XDataType
,
GammaDataType
,
BetaDataType
,
AccDataType
,
YDataType
,
PassThrough
,
Rank
,
NumReduceDim
>
;
// get device op instances
const
auto
instance_ptrs
=
...
...
@@ -126,8 +126,8 @@ void profile_layernorm_impl(int do_verification,
{
auto
argument_ptr
=
inst_ptr
->
MakeArgumentPointer
(
length
,
strideXY
,
strideGamma
,
strideBeta
,
strideGamma
Beta
,
stride
Gamma
Beta
,
strideXY
,
reduce_dim
,
1e-4
,
...
...
profiler/include/profile_
normalization
_impl.hpp
→
profiler/include/profile_
softmax
_impl.hpp
View file @
13587ab3
...
...
@@ -69,16 +69,16 @@ template <> std::string type_to_string<int32_t>() { return "int32"; }
// clang-format on
template
<
typename
InDataType
,
typename
AccDataType
,
typename
OutDataType
,
index_t
Rank
>
void
profile_
normalization
_impl
(
int
do_verification
,
int
init_method
,
bool
do_log
,
bool
time_kernel
,
std
::
vector
<
index_t
>
in_length
,
std
::
vector
<
index_t
>
in_strides
,
std
::
vector
<
index_t
>
reduce_dims
,
AccDataType
alpha
,
AccDataType
beta
,
NormType
norm_type
)
void
profile_
softmax
_impl
(
int
do_verification
,
int
init_method
,
bool
do_log
,
bool
time_kernel
,
std
::
vector
<
index_t
>
in_length
,
std
::
vector
<
index_t
>
in_strides
,
std
::
vector
<
index_t
>
reduce_dims
,
AccDataType
alpha
,
AccDataType
beta
,
NormType
norm_type
)
{
if
(
Rank
!=
in_length
.
size
())
{
...
...
profiler/src/profile_layernorm.cpp
View file @
13587ab3
...
...
@@ -12,8 +12,7 @@ using ck::index_t;
struct
LayernormArgParser
{
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
int
>>
long_opts
=
{
{
"length"
,
{}},
{
"strideXY"
,
{}},
{
"strideGamma"
,
{}},
{
"strideBeta"
,
{}}};
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
int
>>
long_opts
=
{{
"length"
,
{}}};
bool
parse_opt
(
int
argc
,
char
*
argv
[],
const
std
::
string
&
key
,
int
i
)
{
...
...
@@ -52,9 +51,6 @@ void print_help_layernorm()
<<
"arg4: print tensor value (0: no; 1: yes)
\n
"
<<
"arg5: time kernel (0=no, 1=yes)
\n
"
<<
"--length: tensor extents (e.g, --length 1024 1024)
\n
"
<<
"--strideXY: tensor strides (e.g, --strideXY 1024 1)
\n
"
<<
"--strideGamma: tensor strides (e.g, --strideGamma 1)
\n
"
<<
"--strideBeta: tensor strides (e.g, --strideBeta 1)
\n
"
<<
std
::
endl
;
}
...
...
@@ -77,10 +73,7 @@ int profile_layernorm(int argc, char* argv[])
// parse the long options
arg_parser
(
argc
,
argv
);
const
std
::
vector
<
index_t
>
length
=
arg_parser
.
long_opts
[
"length"
];
const
std
::
vector
<
index_t
>
strideXY
=
arg_parser
.
long_opts
[
"strideXY"
];
const
std
::
vector
<
index_t
>
strideGamma
=
arg_parser
.
long_opts
[
"strideGamma"
];
const
std
::
vector
<
index_t
>
strideBeta
=
arg_parser
.
long_opts
[
"strideBeta"
];
const
std
::
vector
<
index_t
>
length
=
arg_parser
.
long_opts
[
"length"
];
using
F16
=
ck
::
half_t
;
using
F32
=
float
;
...
...
@@ -88,25 +81,13 @@ int profile_layernorm(int argc, char* argv[])
if
(
data_type
==
ck
::
DataTypeEnum
::
Half
)
{
ck
::
profiler
::
profile_layernorm_impl
<
F16
,
F16
,
F16
,
F32
,
F16
,
rank
>
(
do_verification
,
init_method
,
do_log
,
time_kernel
,
length
,
strideXY
,
strideGamma
,
strideBeta
);
ck
::
profiler
::
profile_layernorm_impl
<
F16
,
F16
,
F16
,
F32
,
F16
,
rank
>
(
do_verification
,
init_method
,
do_log
,
time_kernel
,
length
);
}
else
if
(
data_type
==
ck
::
DataTypeEnum
::
Float
)
{
ck
::
profiler
::
profile_layernorm_impl
<
F32
,
F32
,
F32
,
F32
,
F32
,
rank
>
(
do_verification
,
init_method
,
do_log
,
time_kernel
,
length
,
strideXY
,
strideGamma
,
strideBeta
);
ck
::
profiler
::
profile_layernorm_impl
<
F32
,
F32
,
F32
,
F32
,
F32
,
rank
>
(
do_verification
,
init_method
,
do_log
,
time_kernel
,
length
);
}
else
{
...
...
profiler/src/profile_
normalization
.cpp
→
profiler/src/profile_
softmax
.cpp
View file @
13587ab3
...
...
@@ -5,7 +5,7 @@
#include <vector>
#include <unordered_map>
#include "profiler/include/profile_
normalization
_impl.hpp"
#include "profiler/include/profile_
softmax
_impl.hpp"
using
ck
::
index_t
;
using
ck
::
profiler
::
NormDataType
;
...
...
@@ -95,30 +95,29 @@ int profile_normalization(int argc, char* argv[])
{
if
(
data_type
==
NormDataType
::
F16_F16
)
{
ck
::
profiler
::
profile_normalization_impl
<
ck
::
half_t
,
float
,
ck
::
half_t
,
3
>
(
do_verification
,
init_method
,
do_log
,
time_kernel
,
length
,
stride
,
reduce
,
float
(
alpha
),
float
(
beta
),
norm_type
);
ck
::
profiler
::
profile_softmax_impl
<
ck
::
half_t
,
float
,
ck
::
half_t
,
3
>
(
do_verification
,
init_method
,
do_log
,
time_kernel
,
length
,
stride
,
reduce
,
float
(
alpha
),
float
(
beta
),
norm_type
);
}
else
if
(
data_type
==
NormDataType
::
F32_F32
)
{
ck
::
profiler
::
profile_
normalization
_impl
<
float
,
float
,
float
,
3
>
(
do_verification
,
init_method
,
do_log
,
time_kernel
,
length
,
stride
,
reduce
,
float
(
alpha
),
float
(
beta
),
norm_type
);
ck
::
profiler
::
profile_
softmax
_impl
<
float
,
float
,
float
,
3
>
(
do_verification
,
init_method
,
do_log
,
time_kernel
,
length
,
stride
,
reduce
,
float
(
alpha
),
float
(
beta
),
norm_type
);
}
else
{
...
...
@@ -129,30 +128,29 @@ int profile_normalization(int argc, char* argv[])
{
if
(
data_type
==
NormDataType
::
F16_F16
)
{
ck
::
profiler
::
profile_normalization_impl
<
ck
::
half_t
,
float
,
ck
::
half_t
,
4
>
(
do_verification
,
init_method
,
do_log
,
time_kernel
,
length
,
stride
,
reduce
,
float
(
alpha
),
float
(
beta
),
norm_type
);
ck
::
profiler
::
profile_softmax_impl
<
ck
::
half_t
,
float
,
ck
::
half_t
,
4
>
(
do_verification
,
init_method
,
do_log
,
time_kernel
,
length
,
stride
,
reduce
,
float
(
alpha
),
float
(
beta
),
norm_type
);
}
else
if
(
data_type
==
NormDataType
::
F32_F32
)
{
ck
::
profiler
::
profile_
normalization
_impl
<
float
,
float
,
float
,
4
>
(
do_verification
,
init_method
,
do_log
,
time_kernel
,
length
,
stride
,
reduce
,
float
(
alpha
),
float
(
beta
),
norm_type
);
ck
::
profiler
::
profile_
softmax
_impl
<
float
,
float
,
float
,
4
>
(
do_verification
,
init_method
,
do_log
,
time_kernel
,
length
,
stride
,
reduce
,
float
(
alpha
),
float
(
beta
),
norm_type
);
}
else
{
...
...
test/CMakeLists.txt
View file @
13587ab3
...
...
@@ -6,11 +6,10 @@ include(googletest)
add_custom_target
(
tests
)
function
(
add_test_executable TEST_NAME
)
message
(
"adding test
${
TEST_NAME
}
"
)
add_executable
(
${
TEST_NAME
}
${
ARGN
}
)
add_test
(
NAME
${
TEST_NAME
}
COMMAND $<TARGET_FILE:
${
TEST_NAME
}
>
)
add_test
(
NAME
${
TEST_NAME
}
COMMAND $<TARGET_FILE:
${
TEST_NAME
}
>
)
add_dependencies
(
tests
${
TEST_NAME
}
)
add_dependencies
(
check
${
TEST_NAME
}
)
rocm_install
(
TARGETS
${
TEST_NAME
}
COMPONENT tests
)
...
...
@@ -23,6 +22,7 @@ function(add_gtest_executable TEST_NAME)
add_executable
(
${
TEST_NAME
}
${
ARGN
}
)
add_dependencies
(
tests
${
TEST_NAME
}
)
add_dependencies
(
check
${
TEST_NAME
}
)
# suppress gtest warnings
target_compile_options
(
${
TEST_NAME
}
PRIVATE -Wno-global-constructors -Wno-undef
)
target_link_libraries
(
${
TEST_NAME
}
PRIVATE gtest_main
)
...
...
@@ -30,7 +30,6 @@ function(add_gtest_executable TEST_NAME)
rocm_install
(
TARGETS
${
TEST_NAME
}
COMPONENT tests
)
endfunction
(
add_gtest_executable TEST_NAME
)
add_subdirectory
(
magic_number_division
)
add_subdirectory
(
space_filling_curve
)
add_subdirectory
(
conv_util
)
...
...
@@ -51,5 +50,5 @@ add_subdirectory(convnd_bwd_data)
add_subdirectory
(
grouped_convnd_fwd
)
add_subdirectory
(
block_to_ctile_map
)
add_subdirectory
(
softmax
)
add_subdirectory
(
layernorm
)
add_subdirectory
(
normalization
)
add_subdirectory
(
data_type
)
test/batched_gemm_gemm/test_batched_gemm_gemm_util.hpp
View file @
13587ab3
...
...
@@ -5,7 +5,7 @@
#include <vector>
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/device/device_batched_gemm_gemm_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/device/
impl/
device_batched_gemm_gemm_xdl_cshuffle.hpp"
#include "profiler/include/profile_batched_gemm_gemm_impl.hpp"
using
ck
::
tensor_operation
::
device
::
GemmSpecialization
;
...
...
test/batched_gemm_masking_scale_softmax_gemm_permute/test_batched_gemm_masking_scale_softmax_gemm_permute_util.hpp
View file @
13587ab3
...
...
@@ -5,7 +5,7 @@
#include <vector>
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/device/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/device/
impl/
device_batched_gemm_softmax_gemm_permute_xdl_cshuffle.hpp"
#include "profiler/include/profile_batched_gemm_masking_scale_softmax_gemm_permute_impl.hpp"
using
ck
::
tensor_operation
::
device
::
GemmSpecialization
;
...
...
test/batched_gemm_softmax_gemm/test_batched_gemm_softmax_gemm_util.hpp
View file @
13587ab3
...
...
@@ -5,7 +5,7 @@
#include <vector>
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/device/device_batched_gemm_softmax_gemm_xdl_cshuffle.hpp"
#include "ck/tensor_operation/gpu/device/
impl/
device_batched_gemm_softmax_gemm_xdl_cshuffle.hpp"
#include "profiler/include/profile_batched_gemm_softmax_gemm_impl.hpp"
using
ck
::
tensor_operation
::
device
::
GemmSpecialization
;
...
...
test/convnd_bwd_data/convnd_bwd_data.cpp
View file @
13587ab3
...
...
@@ -5,237 +5,89 @@
#include <iostream>
#include <initializer_list>
#include <vector>
#include <tuple>
#include <gtest/gtest.h>
#include "profiler/include/profile_conv_bwd_data_impl.hpp"
template
<
typename
Tuple
>
class
TestConvndBwdData
:
public
::
testing
::
Test
{
protected:
using
DataType
=
std
::
tuple_element_t
<
0
,
Tuple
>
;
std
::
vector
<
ck
::
utils
::
conv
::
ConvParam
>
conv_params
;
};
// 1d
TEST_F
(
TestConvndBwdData
,
Conv1dBwdData
)
{
conv_params
.
clear
();
conv_params
.
push_back
({
1
,
1
,
128
,
128
,
256
,
{
1
},
{
14
},
{
2
},
{
1
},
{
0
},
{
0
}});
conv_params
.
push_back
({
1
,
1
,
128
,
128
,
256
,
{
3
},
{
28
},
{
1
},
{
1
},
{
1
},
{
1
}});
conv_params
.
push_back
({
1
,
1
,
128
,
128
,
256
,
{
1
},
{
3
},
{
1
},
{
1
},
{
0
},
{
0
}});
for
(
auto
&
param
:
conv_params
)
template
<
ck
::
index_t
NDimSpatial
>
void
Run
()
{
bool
pass
;
// fp32
pass
=
ck
::
profiler
::
profile_conv_bwd_data_impl
<
1
,
ck
::
tensor_layout
::
convolution
::
NWC
,
ck
::
tensor_layout
::
convolution
::
KXC
,
ck
::
tensor_layout
::
convolution
::
NWK
,
float
,
float
,
float
>
(
true
,
// do_verification
1
,
// init_method
false
,
// do_log
false
,
// time_kernel
param
);
EXPECT_TRUE
(
pass
);
// fp16
pass
=
ck
::
profiler
::
profile_conv_bwd_data_impl
<
1
,
ck
::
tensor_layout
::
convolution
::
NWC
,
ck
::
tensor_layout
::
convolution
::
KXC
,
ck
::
tensor_layout
::
convolution
::
NWK
,
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
>
(
true
,
// do_verification
1
,
// init_method
false
,
// do_log
false
,
// time_kernel
param
);
EXPECT_TRUE
(
pass
);
// bf16
pass
=
ck
::
profiler
::
profile_conv_bwd_data_impl
<
1
,
ck
::
tensor_layout
::
convolution
::
NWC
,
ck
::
tensor_layout
::
convolution
::
KXC
,
ck
::
tensor_layout
::
convolution
::
NWK
,
ck
::
bhalf_t
,
ck
::
bhalf_t
,
ck
::
bhalf_t
>
(
true
,
// do_verification
1
,
// init_method
false
,
// do_log
false
,
// time_kernel
param
);
EXPECT_TRUE
(
pass
);
for
(
auto
&
param
:
conv_params
)
{
bool
pass
;
EXPECT_FALSE
(
conv_params
.
empty
());
pass
=
ck
::
profiler
::
profile_conv_bwd_data_impl
<
NDimSpatial
,
ck
::
tuple_element_t
<
NDimSpatial
-
1
,
ck
::
Tuple
<
ck
::
tensor_layout
::
convolution
::
NWC
,
ck
::
tensor_layout
::
convolution
::
NHWC
,
ck
::
tensor_layout
::
convolution
::
NDHWC
>>
,
ck
::
tuple_element_t
<
NDimSpatial
-
1
,
ck
::
Tuple
<
ck
::
tensor_layout
::
convolution
::
KXC
,
ck
::
tensor_layout
::
convolution
::
KYXC
,
ck
::
tensor_layout
::
convolution
::
KZYXC
>>
,
ck
::
tuple_element_t
<
NDimSpatial
-
1
,
ck
::
Tuple
<
ck
::
tensor_layout
::
convolution
::
NWK
,
ck
::
tensor_layout
::
convolution
::
NHWK
,
ck
::
tensor_layout
::
convolution
::
NDHWK
>>
,
DataType
,
DataType
,
DataType
>
(
true
,
// do_verification
1
,
// init_method integer value
false
,
// do_log
false
,
// time_kernel
param
);
EXPECT_TRUE
(
pass
);
}
}
};
// int8
pass
=
ck
::
profiler
::
profile_conv_bwd_data_impl
<
1
,
ck
::
tensor_layout
::
convolution
::
NWC
,
ck
::
tensor_layout
::
convolution
::
KXC
,
ck
::
tensor_layout
::
convolution
::
NWK
,
int8_t
,
int8_t
,
int8_t
>
(
true
,
// do_verification
1
,
// init_method
false
,
// do_log
false
,
// time_kernel
param
);
using
KernelTypes
=
::
testing
::
Types
<
std
::
tuple
<
float
>
,
std
::
tuple
<
ck
::
half_t
>
,
std
::
tuple
<
ck
::
bhalf_t
>
,
std
::
tuple
<
std
::
int8_t
>>
;
TYPED_TEST_SUITE
(
TestConvndBwdData
,
KernelTypes
);
EXPECT_TRUE
(
pass
);
}
// 1d
TYPED_TEST
(
TestConvndBwdData
,
Conv1dBwdData
)
{
this
->
conv_params
.
clear
();
this
->
conv_params
.
push_back
({
1
,
1
,
128
,
128
,
256
,
{
1
},
{
14
},
{
2
},
{
1
},
{
0
},
{
0
}});
this
->
conv_params
.
push_back
({
1
,
1
,
128
,
128
,
256
,
{
3
},
{
28
},
{
1
},
{
1
},
{
1
},
{
1
}});
this
->
conv_params
.
push_back
({
1
,
1
,
128
,
128
,
256
,
{
1
},
{
3
},
{
1
},
{
1
},
{
0
},
{
0
}});
this
->
template
Run
<
1
>();
}
// 2d
TEST
_F
(
TestConvndBwdData
,
Conv2dBwdData
)
TYPED_
TEST
(
TestConvndBwdData
,
Conv2dBwdData
)
{
conv_params
.
clear
();
conv_params
.
push_back
({
2
,
1
,
128
,
128
,
256
,
{
1
,
1
},
{
7
,
7
},
{
2
,
2
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
conv_params
.
push_back
({
2
,
1
,
128
,
128
,
256
,
{
3
,
3
},
{
14
,
14
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
conv_params
.
push_back
({
2
,
1
,
128
,
128
,
256
,
{
1
,
1
},
{
3
,
3
},
{
1
,
1
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
for
(
auto
&
param
:
conv_params
)
{
bool
pass
;
// fp32
pass
=
ck
::
profiler
::
profile_conv_bwd_data_impl
<
2
,
ck
::
tensor_layout
::
convolution
::
NHWC
,
ck
::
tensor_layout
::
convolution
::
KYXC
,
ck
::
tensor_layout
::
convolution
::
NHWK
,
float
,
float
,
float
>
(
true
,
// do_verification
1
,
// init_method
false
,
// do_log
false
,
// time_kernel
param
);
EXPECT_TRUE
(
pass
);
// fp16
pass
=
ck
::
profiler
::
profile_conv_bwd_data_impl
<
2
,
ck
::
tensor_layout
::
convolution
::
NHWC
,
ck
::
tensor_layout
::
convolution
::
KYXC
,
ck
::
tensor_layout
::
convolution
::
NHWK
,
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
>
(
true
,
// do_verification
1
,
// init_method
false
,
// do_log
false
,
// time_kernel
param
);
EXPECT_TRUE
(
pass
);
// bf16
pass
=
ck
::
profiler
::
profile_conv_bwd_data_impl
<
2
,
ck
::
tensor_layout
::
convolution
::
NHWC
,
ck
::
tensor_layout
::
convolution
::
KYXC
,
ck
::
tensor_layout
::
convolution
::
NHWK
,
ck
::
bhalf_t
,
ck
::
bhalf_t
,
ck
::
bhalf_t
>
(
true
,
// do_verification
1
,
// init_method
false
,
// do_log
false
,
// time_kernel
param
);
EXPECT_TRUE
(
pass
);
// int8
pass
=
ck
::
profiler
::
profile_conv_bwd_data_impl
<
2
,
ck
::
tensor_layout
::
convolution
::
NHWC
,
ck
::
tensor_layout
::
convolution
::
KYXC
,
ck
::
tensor_layout
::
convolution
::
NHWK
,
int8_t
,
int8_t
,
int8_t
>
(
true
,
// do_verification
1
,
// init_method
false
,
// do_log
false
,
// time_kernel
param
);
EXPECT_TRUE
(
pass
);
}
this
->
conv_params
.
clear
();
this
->
conv_params
.
push_back
(
{
2
,
1
,
128
,
128
,
256
,
{
1
,
1
},
{
7
,
7
},
{
2
,
2
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
this
->
conv_params
.
push_back
(
{
2
,
1
,
128
,
128
,
256
,
{
3
,
3
},
{
14
,
14
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
this
->
conv_params
.
push_back
(
{
2
,
1
,
128
,
128
,
256
,
{
1
,
1
},
{
3
,
3
},
{
1
,
1
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
this
->
template
Run
<
2
>();
}
// 3d
TEST
_F
(
TestConvndBwdData
,
Conv3dBwdData
)
TYPED_
TEST
(
TestConvndBwdData
,
Conv3dBwdData
)
{
conv_params
.
clear
();
conv_params
.
push_back
(
this
->
conv_params
.
clear
();
this
->
conv_params
.
push_back
(
{
3
,
1
,
128
,
128
,
256
,
{
1
,
1
,
1
},
{
7
,
7
,
7
},
{
2
,
2
,
2
},
{
1
,
1
,
1
},
{
0
,
0
,
0
},
{
0
,
0
,
0
}});
conv_params
.
push_back
(
this
->
conv_params
.
push_back
(
{
3
,
1
,
128
,
128
,
256
,
{
3
,
3
,
3
},
{
14
,
14
,
3
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}});
conv_params
.
push_back
(
this
->
conv_params
.
push_back
(
{
3
,
1
,
128
,
128
,
256
,
{
1
,
1
,
1
},
{
3
,
3
,
3
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
0
,
0
,
0
},
{
0
,
0
,
0
}});
for
(
auto
&
param
:
conv_params
)
{
bool
pass
;
// fp32
pass
=
ck
::
profiler
::
profile_conv_bwd_data_impl
<
3
,
ck
::
tensor_layout
::
convolution
::
NDHWC
,
ck
::
tensor_layout
::
convolution
::
KZYXC
,
ck
::
tensor_layout
::
convolution
::
NDHWK
,
float
,
float
,
float
>
(
true
,
// do_verification
1
,
// init_method
false
,
// do_log
false
,
// time_kernel
param
);
EXPECT_TRUE
(
pass
);
// fp16
pass
=
ck
::
profiler
::
profile_conv_bwd_data_impl
<
3
,
ck
::
tensor_layout
::
convolution
::
NDHWC
,
ck
::
tensor_layout
::
convolution
::
KZYXC
,
ck
::
tensor_layout
::
convolution
::
NDHWK
,
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
>
(
true
,
// do_verification
1
,
// init_method
false
,
// do_log
false
,
// time_kernel
param
);
EXPECT_TRUE
(
pass
);
// bf16
pass
=
ck
::
profiler
::
profile_conv_bwd_data_impl
<
3
,
ck
::
tensor_layout
::
convolution
::
NDHWC
,
ck
::
tensor_layout
::
convolution
::
KZYXC
,
ck
::
tensor_layout
::
convolution
::
NDHWK
,
ck
::
bhalf_t
,
ck
::
bhalf_t
,
ck
::
bhalf_t
>
(
true
,
// do_verification
1
,
// init_method
false
,
// do_log
false
,
// time_kernel
param
);
EXPECT_TRUE
(
pass
);
// int8
pass
=
ck
::
profiler
::
profile_conv_bwd_data_impl
<
3
,
ck
::
tensor_layout
::
convolution
::
NDHWC
,
ck
::
tensor_layout
::
convolution
::
KZYXC
,
ck
::
tensor_layout
::
convolution
::
NDHWK
,
int8_t
,
int8_t
,
int8_t
>
(
true
,
// do_verification
1
,
// init_method
false
,
// do_log
false
,
// time_kernel
param
);
EXPECT_TRUE
(
pass
);
}
this
->
template
Run
<
3
>();
}
test/convnd_bwd_weight/convnd_bwd_weight.cpp
View file @
13587ab3
...
...
@@ -5,201 +5,86 @@
#include <iostream>
#include <initializer_list>
#include <vector>
#include <tuple>
#include <gtest/gtest.h>
#include "profiler/include/profile_conv_bwd_weight_impl.hpp"
template
<
typename
Tuple
>
class
TestConvndBwdWeight
:
public
::
testing
::
Test
{
protected:
using
DataType
=
std
::
tuple_element_t
<
0
,
Tuple
>
;
std
::
vector
<
ck
::
utils
::
conv
::
ConvParam
>
conv_params
;
};
// 1d
TEST_F
(
TestConvndBwdWeight
,
Conv1dBwdWeight
)
{
conv_params
.
clear
();
conv_params
.
push_back
({
1
,
1
,
128
,
128
,
256
,
{
1
},
{
14
},
{
2
},
{
1
},
{
0
},
{
0
}});
conv_params
.
push_back
({
1
,
1
,
128
,
128
,
256
,
{
3
},
{
28
},
{
1
},
{
1
},
{
1
},
{
1
}});
conv_params
.
push_back
({
1
,
1
,
128
,
128
,
256
,
{
1
},
{
3
},
{
1
},
{
1
},
{
0
},
{
0
}});
ck
::
index_t
split_k
{
2
};
for
(
auto
&
param
:
conv_params
)
template
<
ck
::
index_t
NDimSpatial
>
void
Run
()
{
bool
pass
;
// fp32
pass
=
ck
::
profiler
::
profile_conv_bwd_weight_impl
<
1
,
ck
::
tensor_layout
::
convolution
::
NWC
,
ck
::
tensor_layout
::
convolution
::
KXC
,
ck
::
tensor_layout
::
convolution
::
NWK
,
float
,
float
,
float
>
(
true
,
// do_verification
1
,
// init_method
false
,
// do_log
false
,
// time_kernel
param
,
2
);
EXPECT_TRUE
(
pass
);
// fp16
pass
=
ck
::
profiler
::
profile_conv_bwd_weight_impl
<
1
,
ck
::
tensor_layout
::
convolution
::
NWC
,
ck
::
tensor_layout
::
convolution
::
KXC
,
ck
::
tensor_layout
::
convolution
::
NWK
,
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
>
(
true
,
// do_verification
1
,
// init_method
false
,
// do_log
false
,
// time_kernel
param
,
2
);
EXPECT_TRUE
(
pass
);
for
(
auto
&
param
:
conv_params
)
{
bool
pass
;
EXPECT_FALSE
(
conv_params
.
empty
());
pass
=
ck
::
profiler
::
profile_conv_bwd_weight_impl
<
NDimSpatial
,
ck
::
tuple_element_t
<
NDimSpatial
-
1
,
ck
::
Tuple
<
ck
::
tensor_layout
::
convolution
::
NWC
,
ck
::
tensor_layout
::
convolution
::
NHWC
,
ck
::
tensor_layout
::
convolution
::
NDHWC
>>
,
ck
::
tuple_element_t
<
NDimSpatial
-
1
,
ck
::
Tuple
<
ck
::
tensor_layout
::
convolution
::
KXC
,
ck
::
tensor_layout
::
convolution
::
KYXC
,
ck
::
tensor_layout
::
convolution
::
KZYXC
>>
,
ck
::
tuple_element_t
<
NDimSpatial
-
1
,
ck
::
Tuple
<
ck
::
tensor_layout
::
convolution
::
NWK
,
ck
::
tensor_layout
::
convolution
::
NHWK
,
ck
::
tensor_layout
::
convolution
::
NDHWK
>>
,
DataType
,
DataType
,
DataType
>
(
true
,
// do_verification
1
,
// init_method integer value
false
,
// do_log
false
,
// time_kernel
param
,
split_k
);
EXPECT_TRUE
(
pass
);
}
}
};
// bf16
pass
=
ck
::
profiler
::
profile_conv_bwd_weight_impl
<
1
,
ck
::
tensor_layout
::
convolution
::
NWC
,
ck
::
tensor_layout
::
convolution
::
KXC
,
ck
::
tensor_layout
::
convolution
::
NWK
,
ck
::
bhalf_t
,
ck
::
bhalf_t
,
ck
::
bhalf_t
>
(
true
,
// do_verification
1
,
// init_method
false
,
// do_log
false
,
// time_kernel
param
,
2
);
using
KernelTypes
=
::
testing
::
Types
<
std
::
tuple
<
float
>
,
std
::
tuple
<
ck
::
half_t
>
,
std
::
tuple
<
ck
::
bhalf_t
>>
;
TYPED_TEST_SUITE
(
TestConvndBwdWeight
,
KernelTypes
);
EXPECT_TRUE
(
pass
);
}
TYPED_TEST
(
TestConvndBwdWeight
,
Test1D
)
{
this
->
conv_params
.
clear
();
this
->
conv_params
.
push_back
({
1
,
1
,
128
,
128
,
256
,
{
1
},
{
14
},
{
2
},
{
1
},
{
0
},
{
0
}});
this
->
conv_params
.
push_back
({
1
,
1
,
128
,
128
,
256
,
{
3
},
{
28
},
{
1
},
{
1
},
{
1
},
{
1
}});
this
->
conv_params
.
push_back
({
1
,
1
,
128
,
128
,
256
,
{
1
},
{
3
},
{
1
},
{
1
},
{
0
},
{
0
}});
this
->
template
Run
<
1
>();
}
// 2d
TEST_F
(
TestConvndBwdWeight
,
Conv2dBwdWeight
)
TYPED_TEST
(
TestConvndBwdWeight
,
Test2D
)
{
conv_params
.
clear
();
conv_params
.
push_back
({
2
,
1
,
128
,
128
,
256
,
{
1
,
1
},
{
7
,
7
},
{
2
,
2
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
conv_params
.
push_back
({
2
,
1
,
32
,
128
,
256
,
{
3
,
3
},
{
14
,
14
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
conv_params
.
push_back
({
2
,
1
,
128
,
128
,
256
,
{
1
,
1
},
{
3
,
3
},
{
1
,
1
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
for
(
auto
&
param
:
conv_params
)
{
bool
pass
;
// fp32
pass
=
ck
::
profiler
::
profile_conv_bwd_weight_impl
<
2
,
ck
::
tensor_layout
::
convolution
::
NHWC
,
ck
::
tensor_layout
::
convolution
::
KYXC
,
ck
::
tensor_layout
::
convolution
::
NHWK
,
float
,
float
,
float
>
(
true
,
// do_verification
1
,
// init_method
false
,
// do_log
false
,
// time_kernel
param
,
2
);
EXPECT_TRUE
(
pass
);
// fp16
pass
=
ck
::
profiler
::
profile_conv_bwd_weight_impl
<
2
,
ck
::
tensor_layout
::
convolution
::
NHWC
,
ck
::
tensor_layout
::
convolution
::
KYXC
,
ck
::
tensor_layout
::
convolution
::
NHWK
,
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
>
(
true
,
// do_verification
1
,
// init_method
false
,
// do_log
false
,
// time_kernel
param
,
2
);
EXPECT_TRUE
(
pass
);
// bf16
pass
=
ck
::
profiler
::
profile_conv_bwd_weight_impl
<
2
,
ck
::
tensor_layout
::
convolution
::
NHWC
,
ck
::
tensor_layout
::
convolution
::
KYXC
,
ck
::
tensor_layout
::
convolution
::
NHWK
,
ck
::
bhalf_t
,
ck
::
bhalf_t
,
ck
::
bhalf_t
>
(
true
,
// do_verification
1
,
// init_method
false
,
// do_log
false
,
// time_kernel
param
,
2
);
EXPECT_TRUE
(
pass
);
}
this
->
conv_params
.
clear
();
this
->
conv_params
.
push_back
(
{
2
,
1
,
128
,
128
,
256
,
{
1
,
1
},
{
7
,
7
},
{
2
,
2
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
this
->
conv_params
.
push_back
(
{
2
,
1
,
32
,
128
,
256
,
{
3
,
3
},
{
14
,
14
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
this
->
conv_params
.
push_back
(
{
2
,
1
,
128
,
128
,
256
,
{
1
,
1
},
{
3
,
3
},
{
1
,
1
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
this
->
template
Run
<
2
>();
}
// 3d
TEST_F
(
TestConvndBwdWeight
,
Conv3dBwdWeight
)
TYPED_TEST
(
TestConvndBwdWeight
,
Test3D
)
{
conv_params
.
clear
();
conv_params
.
push_back
(
this
->
conv_params
.
clear
();
this
->
conv_params
.
push_back
(
{
3
,
1
,
128
,
128
,
256
,
{
1
,
1
,
1
},
{
7
,
7
,
7
},
{
2
,
2
,
2
},
{
1
,
1
,
1
},
{
0
,
0
,
0
},
{
0
,
0
,
0
}});
conv_params
.
push_back
(
this
->
conv_params
.
push_back
(
{
3
,
1
,
32
,
128
,
256
,
{
3
,
3
,
3
},
{
14
,
14
,
3
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}});
conv_params
.
push_back
(
this
->
conv_params
.
push_back
(
{
3
,
1
,
128
,
128
,
256
,
{
1
,
1
,
1
},
{
3
,
3
,
3
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
0
,
0
,
0
},
{
0
,
0
,
0
}});
for
(
auto
&
param
:
conv_params
)
{
bool
pass
;
// fp32
pass
=
ck
::
profiler
::
profile_conv_bwd_weight_impl
<
3
,
ck
::
tensor_layout
::
convolution
::
NDHWC
,
ck
::
tensor_layout
::
convolution
::
KZYXC
,
ck
::
tensor_layout
::
convolution
::
NDHWK
,
float
,
float
,
float
>
(
true
,
// do_verification
1
,
// init_method
false
,
// do_log
false
,
// time_kernel
param
,
2
);
EXPECT_TRUE
(
pass
);
// fp16
pass
=
ck
::
profiler
::
profile_conv_bwd_weight_impl
<
3
,
ck
::
tensor_layout
::
convolution
::
NDHWC
,
ck
::
tensor_layout
::
convolution
::
KZYXC
,
ck
::
tensor_layout
::
convolution
::
NDHWK
,
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
>
(
true
,
// do_verification
1
,
// init_method
false
,
// do_log
false
,
// time_kernel
param
,
2
);
EXPECT_TRUE
(
pass
);
// bf16
pass
=
ck
::
profiler
::
profile_conv_bwd_weight_impl
<
3
,
ck
::
tensor_layout
::
convolution
::
NDHWC
,
ck
::
tensor_layout
::
convolution
::
KZYXC
,
ck
::
tensor_layout
::
convolution
::
NDHWK
,
ck
::
bhalf_t
,
ck
::
bhalf_t
,
ck
::
bhalf_t
>
(
true
,
// do_verification
1
,
// init_method
false
,
// do_log
false
,
// time_kernel
param
,
2
);
EXPECT_TRUE
(
pass
);
}
this
->
template
Run
<
3
>();
}
test/convnd_fwd/convnd_fwd.cpp
View file @
13587ab3
...
...
@@ -5,237 +5,88 @@
#include <iostream>
#include <initializer_list>
#include <vector>
#include <tuple>
#include <gtest/gtest.h>
#include "profiler/include/profile_conv_fwd_impl.hpp"
template
<
typename
Tuple
>
class
TestConvndFwd
:
public
::
testing
::
Test
{
protected:
using
DataType
=
std
::
tuple_element_t
<
0
,
Tuple
>
;
std
::
vector
<
ck
::
utils
::
conv
::
ConvParam
>
conv_params
;
};
// 1d
TEST_F
(
TestConvndFwd
,
Conv1dFwd
)
{
conv_params
.
clear
();
conv_params
.
push_back
({
1
,
1
,
128
,
128
,
256
,
{
1
},
{
14
},
{
2
},
{
1
},
{
0
},
{
0
}});
conv_params
.
push_back
({
1
,
1
,
128
,
128
,
256
,
{
3
},
{
28
},
{
1
},
{
1
},
{
1
},
{
1
}});
conv_params
.
push_back
({
1
,
1
,
128
,
128
,
256
,
{
1
},
{
3
},
{
1
},
{
1
},
{
0
},
{
0
}});
for
(
auto
&
param
:
conv_params
)
template
<
ck
::
index_t
NDimSpatial
>
void
Run
()
{
bool
pass
;
// fp32
pass
=
ck
::
profiler
::
profile_conv_fwd_impl
<
1
,
ck
::
tensor_layout
::
convolution
::
NWC
,
ck
::
tensor_layout
::
convolution
::
KXC
,
ck
::
tensor_layout
::
convolution
::
NWK
,
float
,
float
,
float
>
(
true
,
// do_verification
1
,
// init_method
false
,
// do_log
false
,
// time_kernel
param
);
EXPECT_TRUE
(
pass
);
// fp16
pass
=
ck
::
profiler
::
profile_conv_fwd_impl
<
1
,
ck
::
tensor_layout
::
convolution
::
NWC
,
ck
::
tensor_layout
::
convolution
::
KXC
,
ck
::
tensor_layout
::
convolution
::
NWK
,
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
>
(
true
,
// do_verification
1
,
// init_method
false
,
// do_log
false
,
// time_kernel
param
);
EXPECT_TRUE
(
pass
);
// bf16
pass
=
ck
::
profiler
::
profile_conv_fwd_impl
<
1
,
ck
::
tensor_layout
::
convolution
::
NWC
,
ck
::
tensor_layout
::
convolution
::
KXC
,
ck
::
tensor_layout
::
convolution
::
NWK
,
ck
::
bhalf_t
,
ck
::
bhalf_t
,
ck
::
bhalf_t
>
(
true
,
// do_verification
1
,
// init_method
false
,
// do_log
false
,
// time_kernel
param
);
EXPECT_TRUE
(
pass
);
for
(
auto
&
param
:
conv_params
)
{
bool
pass
;
EXPECT_FALSE
(
conv_params
.
empty
());
pass
=
ck
::
profiler
::
profile_conv_fwd_impl
<
NDimSpatial
,
ck
::
tuple_element_t
<
NDimSpatial
-
1
,
ck
::
Tuple
<
ck
::
tensor_layout
::
convolution
::
NWC
,
ck
::
tensor_layout
::
convolution
::
NHWC
,
ck
::
tensor_layout
::
convolution
::
NDHWC
>>
,
ck
::
tuple_element_t
<
NDimSpatial
-
1
,
ck
::
Tuple
<
ck
::
tensor_layout
::
convolution
::
KXC
,
ck
::
tensor_layout
::
convolution
::
KYXC
,
ck
::
tensor_layout
::
convolution
::
KZYXC
>>
,
ck
::
tuple_element_t
<
NDimSpatial
-
1
,
ck
::
Tuple
<
ck
::
tensor_layout
::
convolution
::
NWK
,
ck
::
tensor_layout
::
convolution
::
NHWK
,
ck
::
tensor_layout
::
convolution
::
NDHWK
>>
,
DataType
,
DataType
,
DataType
>
(
true
,
// do_verification
1
,
// init_method integer value
false
,
// do_log
false
,
// time_kernel
param
);
EXPECT_TRUE
(
pass
);
}
}
};
// int8
pass
=
ck
::
profiler
::
profile_conv_fwd_impl
<
1
,
ck
::
tensor_layout
::
convolution
::
NWC
,
ck
::
tensor_layout
::
convolution
::
KXC
,
ck
::
tensor_layout
::
convolution
::
NWK
,
int8_t
,
int8_t
,
int8_t
>
(
true
,
// do_verification
1
,
// init_method
false
,
// do_log
false
,
// time_kernel
param
);
using
KernelTypes
=
::
testing
::
Types
<
std
::
tuple
<
float
>
,
std
::
tuple
<
ck
::
half_t
>
,
std
::
tuple
<
ck
::
bhalf_t
>
,
std
::
tuple
<
std
::
int8_t
>>
;
TYPED_TEST_SUITE
(
TestConvndFwd
,
KernelTypes
);
EXPECT_TRUE
(
pass
);
}
// 1d
TYPED_TEST
(
TestConvndFwd
,
Conv1dFwd
)
{
this
->
conv_params
.
clear
();
this
->
conv_params
.
push_back
({
1
,
1
,
128
,
128
,
256
,
{
1
},
{
14
},
{
2
},
{
1
},
{
0
},
{
0
}});
this
->
conv_params
.
push_back
({
1
,
1
,
128
,
128
,
256
,
{
3
},
{
28
},
{
1
},
{
1
},
{
1
},
{
1
}});
this
->
conv_params
.
push_back
({
1
,
1
,
128
,
128
,
256
,
{
1
},
{
3
},
{
1
},
{
1
},
{
0
},
{
0
}});
this
->
template
Run
<
1
>();
}
// 2d
TEST
_F
(
TestConvndFwd
,
Conv2dFwd
)
TYPED_
TEST
(
TestConvndFwd
,
Conv2dFwd
)
{
conv_params
.
clear
();
conv_params
.
push_back
({
2
,
1
,
128
,
128
,
256
,
{
1
,
1
},
{
7
,
7
},
{
2
,
2
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
conv_params
.
push_back
({
2
,
1
,
128
,
128
,
256
,
{
3
,
3
},
{
14
,
14
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
conv_params
.
push_back
({
2
,
1
,
128
,
128
,
256
,
{
1
,
1
},
{
3
,
3
},
{
1
,
1
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
for
(
auto
&
param
:
conv_params
)
{
bool
pass
;
// fp32
pass
=
ck
::
profiler
::
profile_conv_fwd_impl
<
2
,
ck
::
tensor_layout
::
convolution
::
NHWC
,
ck
::
tensor_layout
::
convolution
::
KYXC
,
ck
::
tensor_layout
::
convolution
::
NHWK
,
float
,
float
,
float
>
(
true
,
// do_verification
1
,
// init_method
false
,
// do_log
false
,
// time_kernel
param
);
EXPECT_TRUE
(
pass
);
// fp16
pass
=
ck
::
profiler
::
profile_conv_fwd_impl
<
2
,
ck
::
tensor_layout
::
convolution
::
NHWC
,
ck
::
tensor_layout
::
convolution
::
KYXC
,
ck
::
tensor_layout
::
convolution
::
NHWK
,
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
>
(
true
,
// do_verification
1
,
// init_method
false
,
// do_log
false
,
// time_kernel
param
);
EXPECT_TRUE
(
pass
);
// bf16
pass
=
ck
::
profiler
::
profile_conv_fwd_impl
<
2
,
ck
::
tensor_layout
::
convolution
::
NHWC
,
ck
::
tensor_layout
::
convolution
::
KYXC
,
ck
::
tensor_layout
::
convolution
::
NHWK
,
ck
::
bhalf_t
,
ck
::
bhalf_t
,
ck
::
bhalf_t
>
(
true
,
// do_verification
1
,
// init_method
false
,
// do_log
false
,
// time_kernel
param
);
EXPECT_TRUE
(
pass
);
// int8
pass
=
ck
::
profiler
::
profile_conv_fwd_impl
<
2
,
ck
::
tensor_layout
::
convolution
::
NHWC
,
ck
::
tensor_layout
::
convolution
::
KYXC
,
ck
::
tensor_layout
::
convolution
::
NHWK
,
int8_t
,
int8_t
,
int8_t
>
(
true
,
// do_verification
1
,
// init_method
false
,
// do_log
false
,
// time_kernel
param
);
EXPECT_TRUE
(
pass
);
}
this
->
conv_params
.
clear
();
this
->
conv_params
.
push_back
(
{
2
,
1
,
128
,
128
,
256
,
{
1
,
1
},
{
7
,
7
},
{
2
,
2
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
this
->
conv_params
.
push_back
(
{
2
,
1
,
128
,
128
,
256
,
{
3
,
3
},
{
14
,
14
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
}});
this
->
conv_params
.
push_back
(
{
2
,
1
,
128
,
128
,
256
,
{
1
,
1
},
{
3
,
3
},
{
1
,
1
},
{
1
,
1
},
{
0
,
0
},
{
0
,
0
}});
this
->
template
Run
<
2
>();
}
// 3d
TEST
_F
(
TestConvndFwd
,
Conv3dFwd
)
TYPED_
TEST
(
TestConvndFwd
,
Conv3dFwd
)
{
conv_params
.
clear
();
conv_params
.
push_back
(
this
->
conv_params
.
clear
();
this
->
conv_params
.
push_back
(
{
3
,
1
,
128
,
128
,
256
,
{
1
,
1
,
1
},
{
7
,
7
,
7
},
{
2
,
2
,
2
},
{
1
,
1
,
1
},
{
0
,
0
,
0
},
{
0
,
0
,
0
}});
conv_params
.
push_back
(
this
->
conv_params
.
push_back
(
{
3
,
1
,
128
,
128
,
256
,
{
3
,
3
,
3
},
{
14
,
14
,
3
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
1
,
1
,
1
}});
conv_params
.
push_back
(
this
->
conv_params
.
push_back
(
{
3
,
1
,
128
,
128
,
256
,
{
1
,
1
,
1
},
{
3
,
3
,
3
},
{
1
,
1
,
1
},
{
1
,
1
,
1
},
{
0
,
0
,
0
},
{
0
,
0
,
0
}});
for
(
auto
&
param
:
conv_params
)
{
bool
pass
;
// fp32
pass
=
ck
::
profiler
::
profile_conv_fwd_impl
<
3
,
ck
::
tensor_layout
::
convolution
::
NDHWC
,
ck
::
tensor_layout
::
convolution
::
KZYXC
,
ck
::
tensor_layout
::
convolution
::
NDHWK
,
float
,
float
,
float
>
(
true
,
// do_verification
1
,
// init_method
false
,
// do_log
false
,
// time_kernel
param
);
EXPECT_TRUE
(
pass
);
// fp16
pass
=
ck
::
profiler
::
profile_conv_fwd_impl
<
3
,
ck
::
tensor_layout
::
convolution
::
NDHWC
,
ck
::
tensor_layout
::
convolution
::
KZYXC
,
ck
::
tensor_layout
::
convolution
::
NDHWK
,
ck
::
half_t
,
ck
::
half_t
,
ck
::
half_t
>
(
true
,
// do_verification
1
,
// init_method
false
,
// do_log
false
,
// time_kernel
param
);
EXPECT_TRUE
(
pass
);
// bf16
pass
=
ck
::
profiler
::
profile_conv_fwd_impl
<
3
,
ck
::
tensor_layout
::
convolution
::
NDHWC
,
ck
::
tensor_layout
::
convolution
::
KZYXC
,
ck
::
tensor_layout
::
convolution
::
NDHWK
,
ck
::
bhalf_t
,
ck
::
bhalf_t
,
ck
::
bhalf_t
>
(
true
,
// do_verification
1
,
// init_method
false
,
// do_log
false
,
// time_kernel
param
);
EXPECT_TRUE
(
pass
);
// int8
pass
=
ck
::
profiler
::
profile_conv_fwd_impl
<
3
,
ck
::
tensor_layout
::
convolution
::
NDHWC
,
ck
::
tensor_layout
::
convolution
::
KZYXC
,
ck
::
tensor_layout
::
convolution
::
NDHWK
,
int8_t
,
int8_t
,
int8_t
>
(
true
,
// do_verification
1
,
// init_method
false
,
// do_log
false
,
// time_kernel
param
);
EXPECT_TRUE
(
pass
);
}
this
->
template
Run
<
3
>();
}
test/
layernorm
/CMakeLists.txt
→
test/
normalization
/CMakeLists.txt
View file @
13587ab3
File moved
test/
layernorm
/test_groupnorm_fp16.cpp
→
test/
normalization
/test_groupnorm_fp16.cpp
View file @
13587ab3
...
...
@@ -26,6 +26,8 @@ class TestGroupnorm : public ::testing::Test
{
256
,
9
,
9
,
9
,
9
},
{
1
,
64
,
64
,
32
,
10
},
{
1
,
32
,
32
,
32
,
20
},
{
2
,
32
,
32
,
32
,
30
},
{
2
,
32
,
32
,
32
,
40
},
{
1
,
16
,
16
,
32
,
40
}};
for
(
auto
length
:
lengths
)
...
...
test/
layernorm
/test_groupnorm_fp32.cpp
→
test/
normalization
/test_groupnorm_fp32.cpp
View file @
13587ab3
File moved
test/
layernorm
/test_layernorm2d_fp16.cpp
→
test/
normalization
/test_layernorm2d_fp16.cpp
View file @
13587ab3
File moved
test/
layernorm
/test_layernorm2d_fp32.cpp
→
test/
normalization
/test_layernorm2d_fp32.cpp
View file @
13587ab3
File moved
test/
layernorm
/test_layernorm2d_util.hpp
→
test/
normalization
/test_layernorm2d_util.hpp
View file @
13587ab3
...
...
@@ -9,7 +9,7 @@
#include "ck/ck.hpp"
#include "ck/utility/number.hpp"
#include "ck/tensor_operation/gpu/device/device_
layernorm
_impl.hpp"
#include "ck/tensor_operation/gpu/device/
impl/
device_
normalization
_impl.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/host_tensor.hpp"
...
...
@@ -65,26 +65,26 @@ class TestLayernorm2d : public ::testing::Test
Rank
,
NumReduceDim
>
;
using
DeviceInstance
=
tensor_operation
::
device
::
Device
Layernorm
Impl
<
XDataType
,
GammaDataType
,
BetaDataType
,
AccDataType
,
YDataType
,
PassThrough
,
Rank
,
NumReduceDim
,
BlockSize
,
MThreadClusterSize
,
KThreadClusterSize
,
MThreadSliceSize
,
KThreadSliceSize
,
XYSrcVectorDim
,
XSrcVectorSize
,
GammaSrcVectorDim
,
GammaSrcVectorSize
,
BetaSrcVectorDim
,
BetaSrcVectorSize
,
YDstVectorSize
>
;
using
DeviceInstance
=
tensor_operation
::
device
::
Device
Normalization
Impl
<
XDataType
,
GammaDataType
,
BetaDataType
,
AccDataType
,
YDataType
,
PassThrough
,
Rank
,
NumReduceDim
,
BlockSize
,
MThreadClusterSize
,
KThreadClusterSize
,
MThreadSliceSize
,
KThreadSliceSize
,
XYSrcVectorDim
,
XSrcVectorSize
,
GammaSrcVectorDim
,
GammaSrcVectorSize
,
BetaSrcVectorDim
,
BetaSrcVectorSize
,
YDstVectorSize
>
;
TestLayernorm2d
()
:
ref_instance_invoker_
(
ReferenceInstance
{}.
MakeInvoker
())
{}
...
...
Prev
1
…
13
14
15
16
17
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment