Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
4b448373
"docs/git@developer.sourcefind.cn:hehl2/torchaudio.git" did not exist on "0fafcb3eeca2c260c83366f585c1398ad6b7a6b7"
Commit
4b448373
authored
Jul 12, 2022
by
carlushuang
Browse files
fix bug on merge latest develop
parent
b79df771
Changes
46
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
186 additions
and
124 deletions
+186
-124
CMakeLists.txt
CMakeLists.txt
+5
-0
example/cpu_01_conv2d_fwd/cpu_conv2d_fwd.cpp
example/cpu_01_conv2d_fwd/cpu_conv2d_fwd.cpp
+12
-13
example/cpu_02_conv2d_fwd_bias_relu_add/cpu_conv2d_fwd_bias_relu_add.cpp
...conv2d_fwd_bias_relu_add/cpu_conv2d_fwd_bias_relu_add.cpp
+13
-14
include/ck/ck.hpp
include/ck/ck.hpp
+1
-2
include/ck/device_utility/device_prop.hpp
include/ck/device_utility/device_prop.hpp
+2
-0
include/ck/device_utility/hip_check_error.hpp
include/ck/device_utility/hip_check_error.hpp
+2
-0
include/ck/device_utility/kernel_launch.hpp
include/ck/device_utility/kernel_launch.hpp
+32
-0
include/ck/device_utility/xdnn_desc.hpp
include/ck/device_utility/xdnn_desc.hpp
+1
-0
include/ck/tensor_operation/cpu/block/blockwise_gemm_avx2.hpp
...ude/ck/tensor_operation/cpu/block/blockwise_gemm_avx2.hpp
+5
-5
include/ck/tensor_operation/cpu/device/device_base_cpu.hpp
include/ck/tensor_operation/cpu/device/device_base_cpu.hpp
+1
-1
include/ck/tensor_operation/cpu/device/device_conv_fwd_cpu.hpp
...de/ck/tensor_operation/cpu/device/device_conv_fwd_cpu.hpp
+4
-2
include/ck/tensor_operation/cpu/device/device_convnd_direct_fwd_avx2_nhwc_kyxck8_nhwk.hpp
...device/device_convnd_direct_fwd_avx2_nhwc_kyxck8_nhwk.hpp
+14
-11
include/ck/tensor_operation/cpu/device/device_convnd_fwd_avx2_nhwc_kyxc_nhwk.hpp
...tion/cpu/device/device_convnd_fwd_avx2_nhwc_kyxc_nhwk.hpp
+14
-11
include/ck/tensor_operation/cpu/device/device_convnd_fwd_avx2_nhwc_kyxck8_nhwk.hpp
...on/cpu/device/device_convnd_fwd_avx2_nhwc_kyxck8_nhwk.hpp
+14
-11
include/ck/tensor_operation/cpu/device/device_convnd_fwd_avx2_nhwc_yxck_nhwk.hpp
...tion/cpu/device/device_convnd_fwd_avx2_nhwc_yxck_nhwk.hpp
+14
-11
include/ck/tensor_operation/cpu/device/device_convnd_fwd_bias_activation_add_avx2_nhwc_kyxc_nhwk.hpp
...ce_convnd_fwd_bias_activation_add_avx2_nhwc_kyxc_nhwk.hpp
+14
-11
include/ck/tensor_operation/cpu/device/device_convnd_fwd_bias_activation_add_avx2_nhwc_kyxck8_nhwk.hpp
..._convnd_fwd_bias_activation_add_avx2_nhwc_kyxck8_nhwk.hpp
+14
-11
include/ck/tensor_operation/cpu/device/device_convnd_fwd_bias_activation_add_avx2_nhwc_yxck_nhwk.hpp
...ce_convnd_fwd_bias_activation_add_avx2_nhwc_yxck_nhwk.hpp
+14
-11
include/ck/tensor_operation/cpu/element/element_wise_operation_cpu.hpp
...nsor_operation/cpu/element/element_wise_operation_cpu.hpp
+1
-1
include/ck/tensor_operation/cpu/grid/gridwise_direct_conv_avx2.hpp
...k/tensor_operation/cpu/grid/gridwise_direct_conv_avx2.hpp
+9
-9
No files found.
CMakeLists.txt
View file @
4b448373
...
...
@@ -84,6 +84,7 @@ if( DEFINED CK_OVERRIDE_HIP_VERSION_PATCH )
message
(
STATUS
"CK_HIP_VERSION_PATCH overriden with
${
CK_OVERRIDE_HIP_VERSION_PATCH
}
"
)
endif
()
message
(
STATUS
"Build with HIP
${
HIP_VERSION
}
"
)
endif
()
## tidy
include
(
EnableCompilerWarnings
)
...
...
@@ -251,10 +252,12 @@ message("CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}")
add_custom_target
(
check COMMAND
${
CMAKE_CTEST_COMMAND
}
--output-on-failure -C
${
CMAKE_CFG_INTDIR
}
)
if
(
NOT CK_NOGPU
)
rocm_package_setup_component
(
tests
LIBRARY_NAME composablekernel
PACKAGE_NAME tests
# Prevent -static suffix on package name
)
endif
()
add_subdirectory
(
library
)
add_subdirectory
(
example
)
...
...
@@ -277,6 +280,7 @@ configure_package_config_file(${CMAKE_CURRENT_SOURCE_DIR}/Config.cmake.in
NO_CHECK_REQUIRED_COMPONENTS_MACRO
)
if
(
NOT CK_NOGPU
)
rocm_install
(
FILES
"
${
CMAKE_CURRENT_BINARY_DIR
}
/composable_kernelConfig.cmake"
"
${
CMAKE_CURRENT_BINARY_DIR
}
/composable_kernelConfigVersion.cmake"
...
...
@@ -293,3 +297,4 @@ rocm_create_package(
LDCONFIG
HEADER_ONLY
)
endif
()
example/cpu_01_conv2d_fwd/cpu_conv2d_fwd.cpp
View file @
4b448373
#include <sstream>
#include <iomanip>
#include "config.hpp"
#include "device.hpp"
#include "host_tensor.hpp"
#include "host_tensor_generator.hpp"
#include "tensor_layout.hpp"
#include "device_tensor.hpp"
#include "device_convnd_fwd_avx2_nhwc_kyxc_nhwk.hpp"
#include "element_wise_operation_cpu.hpp"
#include "reference_conv_fwd.hpp"
#include "element_wise_operation_cpu.hpp"
#include "dynamic_buffer_cpu.hpp"
#include "envvar.hpp"
#include "xdnn_desc.hpp"
#include "ck/ck.hpp"
#include "ck/device_utility/kernel_launch.hpp"
#include "ck/library/host_tensor/device_memory.hpp"
#include "ck/library/host_tensor/host_tensor.hpp"
#include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/cpu/device/device_convnd_fwd_avx2_nhwc_kyxc_nhwk.hpp"
#include "ck/tensor_operation/cpu/element/element_wise_operation_cpu.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp"
#include "ck/utility/dynamic_buffer_cpu.hpp"
#include "ck/utility/envvar.hpp"
#include "ck/device_utility/xdnn_desc.hpp"
#include <omp.h>
#define AVX2_DATA_ALIGNMENT 32
...
...
example/cpu_02_conv2d_fwd_bias_relu_add/cpu_conv2d_fwd_bias_relu_add.cpp
View file @
4b448373
#include <sstream>
#include "config.hpp"
#include "device.hpp"
#include "host_tensor.hpp"
#include "host_tensor_generator.hpp"
#include "tensor_layout.hpp"
#include "device_tensor.hpp"
#include "device_convnd_fwd_bias_activation_add_avx2_nhwc_kyxc_nhwk.hpp"
#include "element_wise_operation_cpu.hpp"
#include "reference_conv_fwd_bias_activation_add.hpp"
#include "reference_conv_fwd_bias_activation.hpp"
#include "element_wise_operation_cpu.hpp"
#include "dynamic_buffer_cpu.hpp"
#include "envvar.hpp"
#include "xdnn_desc.hpp"
#include "ck/ck.hpp"
#include "ck/device_utility/kernel_launch.hpp"
#include "ck/library/host_tensor/device_memory.hpp"
#include "ck/library/host_tensor/host_tensor.hpp"
#include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/cpu/device/device_convnd_fwd_bias_activation_add_avx2_nhwc_kyxc_nhwk.hpp"
#include "ck/tensor_operation/cpu/element/element_wise_operation_cpu.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation_add.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation.hpp"
#include "ck/utility/dynamic_buffer_cpu.hpp"
#include "ck/utility/envvar.hpp"
#include "ck/device_utility/xdnn_desc.hpp"
#include <omp.h>
#define AVX2_DATA_ALIGNMENT 32
...
...
include/ck/ck.hpp
View file @
4b448373
...
...
@@ -3,11 +3,10 @@
#pragma once
#include "ck/options.hpp"
#ifdef CK_NOGPU
#define __host__
#define __device__
#include <stdint.h>
#else
#include "hip/hip_runtime.h"
#include "hip/hip_fp16.h"
...
...
include/ck/device_utility/device_prop.hpp
View file @
4b448373
...
...
@@ -2,6 +2,7 @@
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#ifndef CK_NOGPU
#include <string>
#include <map>
...
...
@@ -52,3 +53,4 @@ inline std::string get_device_name()
}
}
// namespace ck
#endif
include/ck/device_utility/hip_check_error.hpp
View file @
4b448373
...
...
@@ -2,6 +2,7 @@
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#ifndef CK_NOGPU
#include <hip/hip_runtime.h>
...
...
@@ -15,3 +16,4 @@ inline void hip_check_error(hipError_t x)
throw
std
::
runtime_error
(
ss
.
str
());
}
}
#endif
include/ck/device_utility/kernel_launch.hpp
View file @
4b448373
...
...
@@ -3,6 +3,9 @@
#pragma once
#include <chrono>
#ifndef CK_NOGPU
#include <hip/hip_runtime.h>
#include "ck/ck.hpp"
...
...
@@ -72,3 +75,32 @@ float launch_and_time_kernel(const StreamConfig& stream_config,
return
0
;
#endif
}
#endif
template
<
typename
...
Args
,
typename
F
>
void
launch_cpu_kernel
(
F
kernel
,
Args
...
args
)
{
kernel
(
args
...);
}
template
<
typename
...
Args
,
typename
F
>
float
launch_and_time_cpu_kernel
(
F
kernel
,
int
nrepeat
,
Args
...
args
)
{
int
nwarmup
=
3
;
for
(
int
i
=
0
;
i
<
nwarmup
;
i
++
)
kernel
(
args
...);
auto
mStart
=
std
::
chrono
::
high_resolution_clock
::
now
();
for
(
int
i
=
0
;
i
<
nrepeat
;
i
++
)
{
kernel
(
args
...);
}
auto
mStop
=
std
::
chrono
::
high_resolution_clock
::
now
();
float
ms
=
static_cast
<
float
>
(
std
::
chrono
::
duration_cast
<
std
::
chrono
::
microseconds
>
(
mStop
-
mStart
).
count
())
*
1e-3
;
return
ms
/
nrepeat
;
}
include/ck/device_utility/xdnn_desc.hpp
View file @
4b448373
...
...
@@ -3,6 +3,7 @@
#include <string>
#include <vector>
#include <functional>
#include <string.h>
#define XDNN_OK 0
#define XDNN_FAIL 1
...
...
include/ck/tensor_operation/cpu/block/blockwise_gemm_avx2.hpp
View file @
4b448373
#ifndef CK_BLOCKWISE_GEMM_AVX2_HPP
#define CK_BLOCKWISE_GEMM_AVX2_HPP
#include "common_header.hpp"
#include "multi_index_transform_helper.hpp"
#include "tensor_descriptor.hpp"
#include "tensor_descriptor_helper.hpp"
#include "threadwise_gemm_avx2.hpp"
#include "
ck/utility/
common_header.hpp"
#include "
ck/tensor_description/
multi_index_transform_helper.hpp"
#include "
ck/tensor_description/
tensor_descriptor.hpp"
#include "
ck/tensor_description/
tensor_descriptor_helper.hpp"
#include "
ck/tensor_operation/cpu/thread/
threadwise_gemm_avx2.hpp"
namespace
ck
{
namespace
cpu
{
...
...
include/ck/tensor_operation/cpu/device/device_base_cpu.hpp
View file @
4b448373
...
...
@@ -2,7 +2,7 @@
#define DEVICE_BASE_CPU_HPP
#include <string>
#include "stream_config.hpp"
#include "
ck/
stream_config.hpp"
namespace
ck
{
namespace
tensor_operation
{
...
...
include/ck/tensor_operation/cpu/device/device_conv_fwd_cpu.hpp
View file @
4b448373
...
...
@@ -2,8 +2,10 @@
#define DEVICE_CONV_FWD_CPU_HPP
#include <iostream>
#include "device_base_cpu.hpp"
#include "convolution_forward_specialization_cpu.hpp"
#include <memory>
#include <vector>
#include "ck/tensor_operation/cpu/device/device_base_cpu.hpp"
#include "ck/tensor_operation/cpu/device/convolution_forward_specialization_cpu.hpp"
namespace
ck
{
namespace
tensor_operation
{
...
...
include/ck/tensor_operation/cpu/device/device_convnd_direct_fwd_avx2_nhwc_kyxck8_nhwk.hpp
View file @
4b448373
...
...
@@ -4,17 +4,20 @@
#include <iostream>
#include <sstream>
#include <numeric>
#include "device.hpp"
#include "device_base_cpu.hpp"
#include "device_conv_fwd_cpu.hpp"
#include "convolution_forward_specialization_cpu.hpp"
#include "common_header.hpp"
#include "../../gpu/device/tensor_layout.hpp"
#include "tensor_descriptor.hpp"
#include "tensor_descriptor_helper.hpp"
#include "gridwise_direct_conv_avx2.hpp"
#include "threadwise_gemm_avx2.hpp"
#include "threadwise_tensor_slice_transfer_avx2_specialization.hpp"
#include <memory>
#include <vector>
#include "ck/device_utility/kernel_launch.hpp"
#include "ck/library/host_tensor/device_memory.hpp"
#include "ck/tensor_operation/cpu/device/device_base_cpu.hpp"
#include "ck/tensor_operation/cpu/device/device_conv_fwd_cpu.hpp"
#include "ck/tensor_operation/cpu/device/convolution_forward_specialization_cpu.hpp"
#include "ck/utility/common_header.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_description/tensor_descriptor.hpp"
#include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "ck/tensor_operation/cpu/grid/gridwise_direct_conv_avx2.hpp"
#include "ck/tensor_operation/cpu/thread/threadwise_gemm_avx2.hpp"
#include "ck/tensor_operation/cpu/thread/threadwise_tensor_slice_transfer_avx2_specialization.hpp"
namespace
ck
{
namespace
tensor_operation
{
...
...
include/ck/tensor_operation/cpu/device/device_convnd_fwd_avx2_nhwc_kyxc_nhwk.hpp
View file @
4b448373
...
...
@@ -4,17 +4,20 @@
#include <iostream>
#include <sstream>
#include <numeric>
#include "device.hpp"
#include "device_base_cpu.hpp"
#include "device_conv_fwd_cpu.hpp"
#include "convolution_forward_specialization_cpu.hpp"
#include "common_header.hpp"
#include "../../gpu/device/tensor_layout.hpp"
#include "tensor_descriptor.hpp"
#include "tensor_descriptor_helper.hpp"
#include "gridwise_gemm_avx2.hpp"
#include "threadwise_gemm_avx2.hpp"
#include "threadwise_tensor_slice_transfer_avx2_specialization.hpp"
#include <memory>
#include <vector>
#include "ck/device_utility/kernel_launch.hpp"
#include "ck/library/host_tensor/device_memory.hpp"
#include "ck/tensor_operation/cpu/device/device_base_cpu.hpp"
#include "ck/tensor_operation/cpu/device/device_conv_fwd_cpu.hpp"
#include "ck/tensor_operation/cpu/device/convolution_forward_specialization_cpu.hpp"
#include "ck/utility/common_header.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_description/tensor_descriptor.hpp"
#include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "ck/tensor_operation/cpu/grid/gridwise_gemm_avx2.hpp"
#include "ck/tensor_operation/cpu/thread/threadwise_gemm_avx2.hpp"
#include "ck/tensor_operation/cpu/thread/threadwise_tensor_slice_transfer_avx2_specialization.hpp"
namespace
ck
{
namespace
tensor_operation
{
...
...
include/ck/tensor_operation/cpu/device/device_convnd_fwd_avx2_nhwc_kyxck8_nhwk.hpp
View file @
4b448373
...
...
@@ -4,17 +4,20 @@
#include <iostream>
#include <sstream>
#include <numeric>
#include "device.hpp"
#include "device_base_cpu.hpp"
#include "device_conv_fwd_cpu.hpp"
#include "convolution_forward_specialization_cpu.hpp"
#include "common_header.hpp"
#include "../../gpu/device/tensor_layout.hpp"
#include "tensor_descriptor.hpp"
#include "tensor_descriptor_helper.hpp"
#include "gridwise_gemm_avx2.hpp"
#include "threadwise_gemm_avx2.hpp"
#include "threadwise_tensor_slice_transfer_avx2_specialization.hpp"
#include <memory>
#include <vector>
#include "ck/device_utility/kernel_launch.hpp"
#include "ck/library/host_tensor/device_memory.hpp"
#include "ck/tensor_operation/cpu/device/device_base_cpu.hpp"
#include "ck/tensor_operation/cpu/device/device_conv_fwd_cpu.hpp"
#include "ck/tensor_operation/cpu/device/convolution_forward_specialization_cpu.hpp"
#include "ck/utility/common_header.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_description/tensor_descriptor.hpp"
#include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "ck/tensor_operation/cpu/grid/gridwise_gemm_avx2.hpp"
#include "ck/tensor_operation/cpu/thread/threadwise_gemm_avx2.hpp"
#include "ck/tensor_operation/cpu/thread/threadwise_tensor_slice_transfer_avx2_specialization.hpp"
namespace
ck
{
namespace
tensor_operation
{
...
...
include/ck/tensor_operation/cpu/device/device_convnd_fwd_avx2_nhwc_yxck_nhwk.hpp
View file @
4b448373
...
...
@@ -4,17 +4,20 @@
#include <iostream>
#include <sstream>
#include <numeric>
#include "device.hpp"
#include "device_base_cpu.hpp"
#include "device_conv_fwd_cpu.hpp"
#include "convolution_forward_specialization_cpu.hpp"
#include "common_header.hpp"
#include "../../gpu/device/tensor_layout.hpp"
#include "tensor_descriptor.hpp"
#include "tensor_descriptor_helper.hpp"
#include "gridwise_gemm_avx2.hpp"
#include "threadwise_gemm_avx2.hpp"
#include "threadwise_tensor_slice_transfer_avx2_specialization.hpp"
#include <memory>
#include <vector>
#include "ck/device_utility/kernel_launch.hpp"
#include "ck/library/host_tensor/device_memory.hpp"
#include "ck/tensor_operation/cpu/device/device_base_cpu.hpp"
#include "ck/tensor_operation/cpu/device/device_conv_fwd_cpu.hpp"
#include "ck/tensor_operation/cpu/device/convolution_forward_specialization_cpu.hpp"
#include "ck/utility/common_header.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_description/tensor_descriptor.hpp"
#include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "ck/tensor_operation/cpu/grid/gridwise_gemm_avx2.hpp"
#include "ck/tensor_operation/cpu/thread/threadwise_gemm_avx2.hpp"
#include "ck/tensor_operation/cpu/thread/threadwise_tensor_slice_transfer_avx2_specialization.hpp"
namespace
ck
{
namespace
tensor_operation
{
...
...
include/ck/tensor_operation/cpu/device/device_convnd_fwd_bias_activation_add_avx2_nhwc_kyxc_nhwk.hpp
View file @
4b448373
...
...
@@ -4,17 +4,20 @@
#include <iostream>
#include <sstream>
#include <numeric>
#include "device.hpp"
#include "device_base_cpu.hpp"
#include "device_conv_fwd_cpu.hpp"
#include "convolution_forward_specialization_cpu.hpp"
#include "common_header.hpp"
#include "../../gpu/device/tensor_layout.hpp"
#include "tensor_descriptor.hpp"
#include "tensor_descriptor_helper.hpp"
#include "gridwise_gemm_bias_activation_add_avx2.hpp"
#include "threadwise_gemm_avx2.hpp"
#include "threadwise_tensor_slice_transfer_avx2_specialization.hpp"
#include <memory>
#include <vector>
#include "ck/device_utility/kernel_launch.hpp"
#include "ck/library/host_tensor/device_memory.hpp"
#include "ck/tensor_operation/cpu/device/device_base_cpu.hpp"
#include "ck/tensor_operation/cpu/device/device_conv_fwd_cpu.hpp"
#include "ck/tensor_operation/cpu/device/convolution_forward_specialization_cpu.hpp"
#include "ck/utility/common_header.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_description/tensor_descriptor.hpp"
#include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "ck/tensor_operation/cpu/grid/gridwise_gemm_bias_activation_add_avx2.hpp"
#include "ck/tensor_operation/cpu/thread/threadwise_gemm_avx2.hpp"
#include "ck/tensor_operation/cpu/thread/threadwise_tensor_slice_transfer_avx2_specialization.hpp"
namespace
ck
{
namespace
tensor_operation
{
...
...
include/ck/tensor_operation/cpu/device/device_convnd_fwd_bias_activation_add_avx2_nhwc_kyxck8_nhwk.hpp
View file @
4b448373
...
...
@@ -4,17 +4,20 @@
#include <iostream>
#include <sstream>
#include <numeric>
#include "device.hpp"
#include "device_base_cpu.hpp"
#include "device_conv_fwd_cpu.hpp"
#include "convolution_forward_specialization_cpu.hpp"
#include "common_header.hpp"
#include "../../gpu/device/tensor_layout.hpp"
#include "tensor_descriptor.hpp"
#include "tensor_descriptor_helper.hpp"
#include "gridwise_gemm_bias_activation_add_avx2.hpp"
#include "threadwise_gemm_avx2.hpp"
#include "threadwise_tensor_slice_transfer_avx2_specialization.hpp"
#include <memory>
#include <vector>
#include "ck/device_utility/kernel_launch.hpp"
#include "ck/library/host_tensor/device_memory.hpp"
#include "ck/tensor_operation/cpu/device/device_base_cpu.hpp"
#include "ck/tensor_operation/cpu/device/device_conv_fwd_cpu.hpp"
#include "ck/tensor_operation/cpu/device/convolution_forward_specialization_cpu.hpp"
#include "ck/utility/common_header.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_description/tensor_descriptor.hpp"
#include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "ck/tensor_operation/cpu/grid/gridwise_gemm_bias_activation_add_avx2.hpp"
#include "ck/tensor_operation/cpu/thread/threadwise_gemm_avx2.hpp"
#include "ck/tensor_operation/cpu/thread/threadwise_tensor_slice_transfer_avx2_specialization.hpp"
namespace
ck
{
namespace
tensor_operation
{
...
...
include/ck/tensor_operation/cpu/device/device_convnd_fwd_bias_activation_add_avx2_nhwc_yxck_nhwk.hpp
View file @
4b448373
...
...
@@ -4,17 +4,20 @@
#include <iostream>
#include <sstream>
#include <numeric>
#include "device.hpp"
#include "device_base_cpu.hpp"
#include "device_conv_fwd_cpu.hpp"
#include "convolution_forward_specialization_cpu.hpp"
#include "common_header.hpp"
#include "../../gpu/device/tensor_layout.hpp"
#include "tensor_descriptor.hpp"
#include "tensor_descriptor_helper.hpp"
#include "gridwise_gemm_bias_activation_add_avx2.hpp"
#include "threadwise_gemm_avx2.hpp"
#include "threadwise_tensor_slice_transfer_avx2_specialization.hpp"
#include <memory>
#include <vector>
#include "ck/device_utility/kernel_launch.hpp"
#include "ck/library/host_tensor/device_memory.hpp"
#include "ck/tensor_operation/cpu/device/device_base_cpu.hpp"
#include "ck/tensor_operation/cpu/device/device_conv_fwd_cpu.hpp"
#include "ck/tensor_operation/cpu/device/convolution_forward_specialization_cpu.hpp"
#include "ck/utility/common_header.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_description/tensor_descriptor.hpp"
#include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "ck/tensor_operation/cpu/grid/gridwise_gemm_bias_activation_add_avx2.hpp"
#include "ck/tensor_operation/cpu/thread/threadwise_gemm_avx2.hpp"
#include "ck/tensor_operation/cpu/thread/threadwise_tensor_slice_transfer_avx2_specialization.hpp"
namespace
ck
{
namespace
tensor_operation
{
...
...
include/ck/tensor_operation/cpu/element/element_wise_operation_cpu.hpp
View file @
4b448373
#pragma once
#include "data_type_cpu.hpp"
#include "
ck/utility/
data_type_cpu.hpp"
namespace
ck
{
namespace
tensor_operation
{
...
...
include/ck/tensor_operation/cpu/grid/gridwise_direct_conv_avx2.hpp
View file @
4b448373
#ifndef CK_GRIDWISE_DIRECT_CONV_AVX2_HPP
#define CK_GRIDWISE_DIRECT_CONV_AVX2_HPP
#include "common_header.hpp"
#include "multi_index_transform_helper.hpp"
#include "tensor_descriptor.hpp"
#include "tensor_descriptor_helper.hpp"
#include "blockwise_gemm_avx2.hpp"
#include "threadwise_tensor_slice_transfer_avx2.hpp"
#include "threadwise_tensor_slice_transfer_avx2_specialization.hpp"
#include "dynamic_buffer_cpu.hpp"
#include "envvar.hpp"
#include "
ck/utility/
common_header.hpp"
#include "
ck/tensor_description/
multi_index_transform_helper.hpp"
#include "
ck/tensor_description/
tensor_descriptor.hpp"
#include "
ck/tensor_description/
tensor_descriptor_helper.hpp"
#include "
ck/tensor_operation/cpu/block/
blockwise_gemm_avx2.hpp"
#include "
ck/tensor_operation/cpu/thread/
threadwise_tensor_slice_transfer_avx2.hpp"
#include "
ck/tensor_operation/cpu/thread/
threadwise_tensor_slice_transfer_avx2_specialization.hpp"
#include "
ck/utility/
dynamic_buffer_cpu.hpp"
#include "
ck/utility/
envvar.hpp"
#include <utility>
#include <unistd.h>
#include <omp.h>
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment