Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
4b448373
"profiler/include/profile_softmax_impl.hpp" did not exist on "eccf8773a6e7536aa42b3034014a480b779bd651"
Commit
4b448373
authored
Jul 12, 2022
by
carlushuang
Browse files
fix bug on merge latest develop
parent
b79df771
Changes
46
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
63 additions
and
81 deletions
+63
-81
library/src/tensor_operation_instance/cpu/conv2d_fwd_bias_activation_add/device_conv2d_bias_activation_add_avx2_nhwc_kyxc_nhwk_instance.cpp
...nv2d_bias_activation_add_avx2_nhwc_kyxc_nhwk_instance.cpp
+15
-14
library/src/tensor_operation_instance/cpu/conv2d_fwd_bias_activation_add/device_conv2d_bias_activation_add_avx2_nhwc_kyxck8_nhwk_instance.cpp
...2d_bias_activation_add_avx2_nhwc_kyxck8_nhwk_instance.cpp
+15
-14
library/src/tensor_operation_instance/cpu/conv2d_fwd_bias_activation_add/device_conv2d_bias_activation_add_avx2_nhwc_yxck_nhwk_instance.cpp
...nv2d_bias_activation_add_avx2_nhwc_yxck_nhwk_instance.cpp
+15
-14
profiler/src/profiler.cpp
profiler/src/profiler.cpp
+0
-23
test/CMakeLists.txt
test/CMakeLists.txt
+5
-5
test/cpu_ukernel/cpu_gemm_uk.cpp
test/cpu_ukernel/cpu_gemm_uk.cpp
+13
-11
No files found.
library/src/tensor_operation_instance/cpu/conv2d_fwd_bias_activation_add/device_conv2d_bias_activation_add_avx2_nhwc_kyxc_nhwk_instance.cpp
View file @
4b448373
#include <stdlib.h>
#include <utility>
#include "convolution_forward_specialization_cpu.hpp"
#include "config.hpp"
#include "device_convnd_fwd_bias_activation_add_avx2_nhwc_kyxc_nhwk.hpp"
#include "element_wise_operation_cpu.hpp"
#include "device_operation_instance.hpp"
#include <memory>
#include "ck/ck.hpp"
#include "ck/tensor_operation/cpu/device/convolution_forward_specialization_cpu.hpp"
#include "ck/tensor_operation/cpu/device/device_convnd_fwd_bias_activation_add_avx2_nhwc_kyxc_nhwk.hpp"
#include "ck/tensor_operation/cpu/element/element_wise_operation_cpu.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
...
...
@@ -60,7 +61,7 @@ static constexpr auto LoopOver_MKN = ck::tensor_operation::cpu::device::LoopOver
void
add_device_conv2d_fwd_bias_relu_add_avx2_nhwc_kyxc_nhwk
(
std
::
vector
<
DeviceConvFwdBiasActivationAddPtr
<
PT
,
PT
,
AddReluAdd
>>&
instances
)
{
ck
::
tensor_operation
::
device
::
add_device_operation_instances
(
ck
::
tensor_operation
::
device
::
instance
::
add_device_operation_instances
(
instances
,
std
::
make_tuple
(
// clang-format off
...
...
@@ -81,7 +82,7 @@ void add_device_conv2d_fwd_bias_relu_add_avx2_nhwc_kyxc_nhwk(
void
add_device_conv2d_fwd_bias_relu_add_avx2_nhwc_kyxc_nhwk_local_c
(
std
::
vector
<
DeviceConvFwdBiasActivationAddPtr
<
PT
,
PT
,
AddReluAdd
>>&
instances
)
{
ck
::
tensor_operation
::
device
::
add_device_operation_instances
(
ck
::
tensor_operation
::
device
::
instance
::
add_device_operation_instances
(
instances
,
std
::
make_tuple
(
// clang-format off
...
...
@@ -102,7 +103,7 @@ void add_device_conv2d_fwd_bias_relu_add_avx2_nhwc_kyxc_nhwk_local_c(
void
add_device_conv2d_fwd_bias_relu_add_avx2_nhwc_kyxc_nhwk_mt
(
std
::
vector
<
DeviceConvFwdBiasActivationAddPtr
<
PT
,
PT
,
AddReluAdd
>>&
instances
)
{
ck
::
tensor_operation
::
device
::
add_device_operation_instances
(
ck
::
tensor_operation
::
device
::
instance
::
add_device_operation_instances
(
instances
,
std
::
make_tuple
(
// clang-format off
...
...
@@ -141,7 +142,7 @@ void add_device_conv2d_fwd_bias_relu_add_avx2_nhwc_kyxc_nhwk_mt(
void
add_device_conv2d_fwd_bias_relu_avx2_nhwc_kyxc_nhwk
(
std
::
vector
<
DeviceConvFwdBiasActivationAddPtr
<
PT
,
PT
,
AddRelu
>>&
instances
)
{
ck
::
tensor_operation
::
device
::
add_device_operation_instances
(
ck
::
tensor_operation
::
device
::
instance
::
add_device_operation_instances
(
instances
,
std
::
make_tuple
(
// clang-format off
...
...
@@ -162,7 +163,7 @@ void add_device_conv2d_fwd_bias_relu_avx2_nhwc_kyxc_nhwk(
void
add_device_conv2d_fwd_bias_relu_avx2_nhwc_kyxc_nhwk_local_c
(
std
::
vector
<
DeviceConvFwdBiasActivationAddPtr
<
PT
,
PT
,
AddRelu
>>&
instances
)
{
ck
::
tensor_operation
::
device
::
add_device_operation_instances
(
ck
::
tensor_operation
::
device
::
instance
::
add_device_operation_instances
(
instances
,
std
::
make_tuple
(
// clang-format off
...
...
@@ -183,7 +184,7 @@ void add_device_conv2d_fwd_bias_relu_avx2_nhwc_kyxc_nhwk_local_c(
void
add_device_conv2d_fwd_bias_relu_avx2_nhwc_kyxc_nhwk_mt
(
std
::
vector
<
DeviceConvFwdBiasActivationAddPtr
<
PT
,
PT
,
AddRelu
>>&
instances
)
{
ck
::
tensor_operation
::
device
::
add_device_operation_instances
(
ck
::
tensor_operation
::
device
::
instance
::
add_device_operation_instances
(
instances
,
std
::
make_tuple
(
// clang-format off
...
...
@@ -222,7 +223,7 @@ void add_device_conv2d_fwd_bias_relu_avx2_nhwc_kyxc_nhwk_mt(
void
add_device_conv2d_fwd_bias_avx2_nhwc_kyxc_nhwk
(
std
::
vector
<
DeviceConvFwdBiasActivationAddPtr
<
PT
,
PT
,
Add
>>&
instances
)
{
ck
::
tensor_operation
::
device
::
add_device_operation_instances
(
ck
::
tensor_operation
::
device
::
instance
::
add_device_operation_instances
(
instances
,
std
::
make_tuple
(
// clang-format off
...
...
@@ -243,7 +244,7 @@ void add_device_conv2d_fwd_bias_avx2_nhwc_kyxc_nhwk(
void
add_device_conv2d_fwd_bias_avx2_nhwc_kyxc_nhwk_local_c
(
std
::
vector
<
DeviceConvFwdBiasActivationAddPtr
<
PT
,
PT
,
Add
>>&
instances
)
{
ck
::
tensor_operation
::
device
::
add_device_operation_instances
(
ck
::
tensor_operation
::
device
::
instance
::
add_device_operation_instances
(
instances
,
std
::
make_tuple
(
// clang-format off
...
...
@@ -264,7 +265,7 @@ void add_device_conv2d_fwd_bias_avx2_nhwc_kyxc_nhwk_local_c(
void
add_device_conv2d_fwd_bias_avx2_nhwc_kyxc_nhwk_mt
(
std
::
vector
<
DeviceConvFwdBiasActivationAddPtr
<
PT
,
PT
,
Add
>>&
instances
)
{
ck
::
tensor_operation
::
device
::
add_device_operation_instances
(
ck
::
tensor_operation
::
device
::
instance
::
add_device_operation_instances
(
instances
,
std
::
make_tuple
(
// clang-format off
...
...
library/src/tensor_operation_instance/cpu/conv2d_fwd_bias_activation_add/device_conv2d_bias_activation_add_avx2_nhwc_kyxck8_nhwk_instance.cpp
View file @
4b448373
#include <stdlib.h>
#include <utility>
#include "config.hpp"
#include "convolution_forward_specialization_cpu.hpp"
#include "device_convnd_fwd_bias_activation_add_avx2_nhwc_kyxck8_nhwk.hpp"
#include "element_wise_operation_cpu.hpp"
#include "device_operation_instance.hpp"
#include <memory>
#include "ck/ck.hpp"
#include "ck/tensor_operation/cpu/device/convolution_forward_specialization_cpu.hpp"
#include "ck/tensor_operation/cpu/device/device_convnd_fwd_bias_activation_add_avx2_nhwc_kyxck8_nhwk.hpp"
#include "ck/tensor_operation/cpu/element/element_wise_operation_cpu.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
...
...
@@ -60,7 +61,7 @@ static constexpr auto LoopOver_MKN = ck::tensor_operation::cpu::device::LoopOver
void
add_device_conv2d_fwd_bias_relu_add_avx2_nhwc_kyxck8_nhwk
(
std
::
vector
<
DeviceConvFwdBiasActivationAddPtr
<
PT
,
PT
,
AddReluAdd
>>&
instances
)
{
ck
::
tensor_operation
::
device
::
add_device_operation_instances
(
ck
::
tensor_operation
::
device
::
instance
::
add_device_operation_instances
(
instances
,
std
::
make_tuple
(
// clang-format off
...
...
@@ -81,7 +82,7 @@ void add_device_conv2d_fwd_bias_relu_add_avx2_nhwc_kyxck8_nhwk(
void
add_device_conv2d_fwd_bias_relu_add_avx2_nhwc_kyxck8_nhwk_local_c
(
std
::
vector
<
DeviceConvFwdBiasActivationAddPtr
<
PT
,
PT
,
AddReluAdd
>>&
instances
)
{
ck
::
tensor_operation
::
device
::
add_device_operation_instances
(
ck
::
tensor_operation
::
device
::
instance
::
add_device_operation_instances
(
instances
,
std
::
make_tuple
(
// clang-format off
...
...
@@ -102,7 +103,7 @@ void add_device_conv2d_fwd_bias_relu_add_avx2_nhwc_kyxck8_nhwk_local_c(
void
add_device_conv2d_fwd_bias_relu_add_avx2_nhwc_kyxck8_nhwk_mt
(
std
::
vector
<
DeviceConvFwdBiasActivationAddPtr
<
PT
,
PT
,
AddReluAdd
>>&
instances
)
{
ck
::
tensor_operation
::
device
::
add_device_operation_instances
(
ck
::
tensor_operation
::
device
::
instance
::
add_device_operation_instances
(
instances
,
std
::
make_tuple
(
// clang-format off
...
...
@@ -141,7 +142,7 @@ void add_device_conv2d_fwd_bias_relu_add_avx2_nhwc_kyxck8_nhwk_mt(
void
add_device_conv2d_fwd_bias_relu_avx2_nhwc_kyxck8_nhwk
(
std
::
vector
<
DeviceConvFwdBiasActivationAddPtr
<
PT
,
PT
,
AddRelu
>>&
instances
)
{
ck
::
tensor_operation
::
device
::
add_device_operation_instances
(
ck
::
tensor_operation
::
device
::
instance
::
add_device_operation_instances
(
instances
,
std
::
make_tuple
(
// clang-format off
...
...
@@ -162,7 +163,7 @@ void add_device_conv2d_fwd_bias_relu_avx2_nhwc_kyxck8_nhwk(
void
add_device_conv2d_fwd_bias_relu_avx2_nhwc_kyxck8_nhwk_local_c
(
std
::
vector
<
DeviceConvFwdBiasActivationAddPtr
<
PT
,
PT
,
AddRelu
>>&
instances
)
{
ck
::
tensor_operation
::
device
::
add_device_operation_instances
(
ck
::
tensor_operation
::
device
::
instance
::
add_device_operation_instances
(
instances
,
std
::
make_tuple
(
// clang-format off
...
...
@@ -183,7 +184,7 @@ void add_device_conv2d_fwd_bias_relu_avx2_nhwc_kyxck8_nhwk_local_c(
void
add_device_conv2d_fwd_bias_relu_avx2_nhwc_kyxck8_nhwk_mt
(
std
::
vector
<
DeviceConvFwdBiasActivationAddPtr
<
PT
,
PT
,
AddRelu
>>&
instances
)
{
ck
::
tensor_operation
::
device
::
add_device_operation_instances
(
ck
::
tensor_operation
::
device
::
instance
::
add_device_operation_instances
(
instances
,
std
::
make_tuple
(
// clang-format off
...
...
@@ -222,7 +223,7 @@ void add_device_conv2d_fwd_bias_relu_avx2_nhwc_kyxck8_nhwk_mt(
void
add_device_conv2d_fwd_bias_avx2_nhwc_kyxck8_nhwk
(
std
::
vector
<
DeviceConvFwdBiasActivationAddPtr
<
PT
,
PT
,
Add
>>&
instances
)
{
ck
::
tensor_operation
::
device
::
add_device_operation_instances
(
ck
::
tensor_operation
::
device
::
instance
::
add_device_operation_instances
(
instances
,
std
::
make_tuple
(
// clang-format off
...
...
@@ -243,7 +244,7 @@ void add_device_conv2d_fwd_bias_avx2_nhwc_kyxck8_nhwk(
void
add_device_conv2d_fwd_bias_avx2_nhwc_kyxck8_nhwk_local_c
(
std
::
vector
<
DeviceConvFwdBiasActivationAddPtr
<
PT
,
PT
,
Add
>>&
instances
)
{
ck
::
tensor_operation
::
device
::
add_device_operation_instances
(
ck
::
tensor_operation
::
device
::
instance
::
add_device_operation_instances
(
instances
,
std
::
make_tuple
(
// clang-format off
...
...
@@ -264,7 +265,7 @@ void add_device_conv2d_fwd_bias_avx2_nhwc_kyxck8_nhwk_local_c(
void
add_device_conv2d_fwd_bias_avx2_nhwc_kyxck8_nhwk_mt
(
std
::
vector
<
DeviceConvFwdBiasActivationAddPtr
<
PT
,
PT
,
Add
>>&
instances
)
{
ck
::
tensor_operation
::
device
::
add_device_operation_instances
(
ck
::
tensor_operation
::
device
::
instance
::
add_device_operation_instances
(
instances
,
std
::
make_tuple
(
// clang-format off
...
...
library/src/tensor_operation_instance/cpu/conv2d_fwd_bias_activation_add/device_conv2d_bias_activation_add_avx2_nhwc_yxck_nhwk_instance.cpp
View file @
4b448373
#include <stdlib.h>
#include <utility>
#include "config.hpp"
#include "convolution_forward_specialization_cpu.hpp"
#include "device_convnd_fwd_bias_activation_add_avx2_nhwc_yxck_nhwk.hpp"
#include "element_wise_operation_cpu.hpp"
#include "device_operation_instance.hpp"
#include <memory>
#include "ck/ck.hpp"
#include "ck/tensor_operation/cpu/device/convolution_forward_specialization_cpu.hpp"
#include "ck/tensor_operation/cpu/device/device_convnd_fwd_bias_activation_add_avx2_nhwc_yxck_nhwk.hpp"
#include "ck/tensor_operation/cpu/element/element_wise_operation_cpu.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
...
...
@@ -59,7 +60,7 @@ static constexpr auto LoopOver_MKN = ck::tensor_operation::cpu::device::LoopOver
void
add_device_conv2d_fwd_bias_relu_add_avx2_nhwc_yxck_nhwk
(
std
::
vector
<
DeviceConvFwdBiasActivationAddPtr
<
PT
,
PT
,
AddReluAdd
>>&
instances
)
{
ck
::
tensor_operation
::
device
::
add_device_operation_instances
(
ck
::
tensor_operation
::
device
::
instance
::
add_device_operation_instances
(
instances
,
std
::
make_tuple
(
// clang-format off
...
...
@@ -80,7 +81,7 @@ void add_device_conv2d_fwd_bias_relu_add_avx2_nhwc_yxck_nhwk(
void
add_device_conv2d_fwd_bias_relu_add_avx2_nhwc_yxck_nhwk_local_c
(
std
::
vector
<
DeviceConvFwdBiasActivationAddPtr
<
PT
,
PT
,
AddReluAdd
>>&
instances
)
{
ck
::
tensor_operation
::
device
::
add_device_operation_instances
(
ck
::
tensor_operation
::
device
::
instance
::
add_device_operation_instances
(
instances
,
std
::
make_tuple
(
// clang-format off
...
...
@@ -101,7 +102,7 @@ void add_device_conv2d_fwd_bias_relu_add_avx2_nhwc_yxck_nhwk_local_c(
void
add_device_conv2d_fwd_bias_relu_add_avx2_nhwc_yxck_nhwk_mt
(
std
::
vector
<
DeviceConvFwdBiasActivationAddPtr
<
PT
,
PT
,
AddReluAdd
>>&
instances
)
{
ck
::
tensor_operation
::
device
::
add_device_operation_instances
(
ck
::
tensor_operation
::
device
::
instance
::
add_device_operation_instances
(
instances
,
std
::
make_tuple
(
// clang-format off
...
...
@@ -140,7 +141,7 @@ void add_device_conv2d_fwd_bias_relu_add_avx2_nhwc_yxck_nhwk_mt(
void
add_device_conv2d_fwd_bias_relu_avx2_nhwc_yxck_nhwk
(
std
::
vector
<
DeviceConvFwdBiasActivationAddPtr
<
PT
,
PT
,
AddRelu
>>&
instances
)
{
ck
::
tensor_operation
::
device
::
add_device_operation_instances
(
ck
::
tensor_operation
::
device
::
instance
::
add_device_operation_instances
(
instances
,
std
::
make_tuple
(
// clang-format off
...
...
@@ -161,7 +162,7 @@ void add_device_conv2d_fwd_bias_relu_avx2_nhwc_yxck_nhwk(
void
add_device_conv2d_fwd_bias_relu_avx2_nhwc_yxck_nhwk_local_c
(
std
::
vector
<
DeviceConvFwdBiasActivationAddPtr
<
PT
,
PT
,
AddRelu
>>&
instances
)
{
ck
::
tensor_operation
::
device
::
add_device_operation_instances
(
ck
::
tensor_operation
::
device
::
instance
::
add_device_operation_instances
(
instances
,
std
::
make_tuple
(
// clang-format off
...
...
@@ -182,7 +183,7 @@ void add_device_conv2d_fwd_bias_relu_avx2_nhwc_yxck_nhwk_local_c(
void
add_device_conv2d_fwd_bias_relu_avx2_nhwc_yxck_nhwk_mt
(
std
::
vector
<
DeviceConvFwdBiasActivationAddPtr
<
PT
,
PT
,
AddRelu
>>&
instances
)
{
ck
::
tensor_operation
::
device
::
add_device_operation_instances
(
ck
::
tensor_operation
::
device
::
instance
::
add_device_operation_instances
(
instances
,
std
::
make_tuple
(
// clang-format off
...
...
@@ -221,7 +222,7 @@ void add_device_conv2d_fwd_bias_relu_avx2_nhwc_yxck_nhwk_mt(
void
add_device_conv2d_fwd_bias_avx2_nhwc_yxck_nhwk
(
std
::
vector
<
DeviceConvFwdBiasActivationAddPtr
<
PT
,
PT
,
Add
>>&
instances
)
{
ck
::
tensor_operation
::
device
::
add_device_operation_instances
(
ck
::
tensor_operation
::
device
::
instance
::
add_device_operation_instances
(
instances
,
std
::
make_tuple
(
// clang-format off
...
...
@@ -242,7 +243,7 @@ void add_device_conv2d_fwd_bias_avx2_nhwc_yxck_nhwk(
void
add_device_conv2d_fwd_bias_avx2_nhwc_yxck_nhwk_local_c
(
std
::
vector
<
DeviceConvFwdBiasActivationAddPtr
<
PT
,
PT
,
Add
>>&
instances
)
{
ck
::
tensor_operation
::
device
::
add_device_operation_instances
(
ck
::
tensor_operation
::
device
::
instance
::
add_device_operation_instances
(
instances
,
std
::
make_tuple
(
// clang-format off
...
...
@@ -263,7 +264,7 @@ void add_device_conv2d_fwd_bias_avx2_nhwc_yxck_nhwk_local_c(
void
add_device_conv2d_fwd_bias_avx2_nhwc_yxck_nhwk_mt
(
std
::
vector
<
DeviceConvFwdBiasActivationAddPtr
<
PT
,
PT
,
Add
>>&
instances
)
{
ck
::
tensor_operation
::
device
::
add_device_operation_instances
(
ck
::
tensor_operation
::
device
::
instance
::
add_device_operation_instances
(
instances
,
std
::
make_tuple
(
// clang-format off
...
...
profiler/src/profiler.cpp
View file @
4b448373
...
...
@@ -141,31 +141,8 @@ int main(int argc, char* argv[])
}
else
{
<<<<<<<
HEAD
// clang-format off
printf
(
"arg1: tensor operation (gemm: GEMM
\n
"
" gemm_bias_2d: GEMM+Bias(2D)
\n
"
" gemm_bias_relu: GEMM+Bias+ReLU
\n
"
" gemm_bias_relu_add: GEMM+Bias+ReLU+Add
\n
"
" gemm_reduce: GEMM+Reduce
\n
"
" grouped_gemm: Grouped GEMM
\n
"
" conv_fwd: ForwardConvolution
\n
"
" conv_fwd_bias_relu: ForwardConvolution+Bias+ReLU
\n
"
" conv_fwd_bias_relu_add: ForwardConvolution+Bias+ReLU+Add
\n
"
" conv_fwd_bias_relu_atomic_add: ForwardConvolution+Bias+ReLU+AtomicAdd
\n
"
" conv_fwd_cpu: ForwardConvolution+Bias+ReLU+AtomicAdd
\n
"
" conv1d_bwd_data: BackwardConvolution data 1 dim
\n
"
" conv2d_bwd_data: BackwardConvolution data 2 dim
\n
"
" conv3d_bwd_data: BackwardConvolution data 3 dim
\n
"
" reduce: Reduce
\n
"
" conv2d_bwd_weight: Backward Weight Convolution 2d
\n
"
);
// clang-format on
}
return
0
;
=======
print_helper_message
();
return
0
;
}
>>>>>>>
origin
/
develop
}
test/CMakeLists.txt
View file @
4b448373
...
...
@@ -13,7 +13,9 @@ function(add_test_executable TEST_NAME)
add_test
(
NAME
${
TEST_NAME
}
COMMAND $<TARGET_FILE:
${
TEST_NAME
}
>
)
add_dependencies
(
tests
${
TEST_NAME
}
)
add_dependencies
(
check
${
TEST_NAME
}
)
if
(
NOT CK_NOGPU
)
rocm_install
(
TARGETS
${
TEST_NAME
}
COMPONENT tests
)
endif
()
endfunction
(
add_test_executable TEST_NAME
)
include
(
GoogleTest
)
...
...
@@ -27,7 +29,9 @@ function(add_gtest_executable TEST_NAME)
target_compile_options
(
${
TEST_NAME
}
PRIVATE -Wno-global-constructors -Wno-undef
)
target_link_libraries
(
${
TEST_NAME
}
PRIVATE gtest_main
)
gtest_discover_tests
(
${
TEST_NAME
}
)
if
(
NOT CK_NOGPU
)
rocm_install
(
TARGETS
${
TEST_NAME
}
COMPONENT tests
)
endif
()
endfunction
(
add_gtest_executable TEST_NAME
)
...
...
@@ -47,9 +51,5 @@ add_subdirectory(conv2d_bwd_weight)
add_subdirectory
(
convnd_bwd_weight
)
add_subdirectory
(
convnd_bwd_data
)
add_subdirectory
(
block_to_ctile_map
)
<<<<<<< HEAD
add_subdirectory
(
cpu_ukernel
)
# DONOT add client_app, that is tested via CI independently
=======
add_subdirectory
(
softmax
)
>>>>>>> origin/develop
add_subdirectory
(
cpu_ukernel
)
test/cpu_ukernel/cpu_gemm_uk.cpp
View file @
4b448373
...
...
@@ -6,14 +6,16 @@
#include <sstream>
#include <tuple>
#include <memory>
#include <half.hpp>
#include <omp.h>
#include "host_tensor.hpp"
#include "device.hpp"
#include "config.hpp"
#include "print.hpp"
#include "cpuid.hpp"
#include "threadwise_gemm_avx2.hpp"
#include <string.h>
#include <chrono>
#include "ck/ck.hpp"
#include "ck/library/host_tensor/host_tensor.hpp"
#include "ck/device_utility/kernel_launch.hpp"
#include "ck/library/host_tensor/device_memory.hpp"
#include "ck/utility/print.hpp"
#include "ck/utility/cpuid.hpp"
#include "ck/tensor_operation/cpu/thread/threadwise_gemm_avx2.hpp"
#define ITERATE_THREAD_GEMM_AVX2_MXN_6X16_INSTANCE(FA, FB, FC, TA, TB, NT) \
ck::cpu::ThreadwiseGemmAvx2_MxN_6x16<FA, FB, FC, 6, 16, TA, TB, NT>, \
...
...
@@ -294,16 +296,16 @@ void test_ukernel(ukenrel_t uk,
invoke_uk
(
param
,
private_c
);
}
WallTimer
timer
;
timer
.
Start
();
auto
mStart
=
std
::
chrono
::
high_resolution_clock
::
now
();
for
(
int
i
=
0
;
i
<
repeat
;
i
++
)
{
invoke_uk
(
param
,
private_c
);
}
timer
.
End
();
auto
mStop
=
std
::
chrono
::
high_resolution_clock
::
now
();
us
+=
timer
.
GetElapsedTime
()
*
1e3
/
repeat
;
us
+=
static_cast
<
float
>
(
std
::
chrono
::
duration_cast
<
std
::
chrono
::
microseconds
>
(
mStop
-
mStart
).
count
())
/
repeat
;
memset
(
private_c
,
0
,
m
*
n
*
sizeof
(
float
));
invoke_uk
(
param
,
private_c
);
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment