Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
4b448373
Commit
4b448373
authored
Jul 12, 2022
by
carlushuang
Browse files
fix bug on merge latest develop
parent
b79df771
Changes
46
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
167 additions
and
116 deletions
+167
-116
include/ck/tensor_operation/cpu/grid/gridwise_gemm_avx2.hpp
include/ck/tensor_operation/cpu/grid/gridwise_gemm_avx2.hpp
+9
-9
include/ck/tensor_operation/cpu/grid/gridwise_gemm_bias_activation_add_avx2.hpp
...ation/cpu/grid/gridwise_gemm_bias_activation_add_avx2.hpp
+9
-9
include/ck/tensor_operation/cpu/thread/threadwise_gemm_avx2.hpp
...e/ck/tensor_operation/cpu/thread/threadwise_gemm_avx2.hpp
+4
-4
include/ck/tensor_operation/cpu/thread/threadwise_gemm_param.hpp
.../ck/tensor_operation/cpu/thread/threadwise_gemm_param.hpp
+2
-2
include/ck/tensor_operation/cpu/thread/threadwise_tensor_slice_transfer_avx2.hpp
...tion/cpu/thread/threadwise_tensor_slice_transfer_avx2.hpp
+7
-7
include/ck/tensor_operation/cpu/thread/threadwise_tensor_slice_transfer_avx2_specialization.hpp
.../threadwise_tensor_slice_transfer_avx2_specialization.hpp
+9
-9
include/ck/utility/common_header.hpp
include/ck/utility/common_header.hpp
+0
-31
include/ck/utility/data_type.hpp
include/ck/utility/data_type.hpp
+0
-3
include/ck/utility/data_type_cpu.hpp
include/ck/utility/data_type_cpu.hpp
+0
-1
include/ck/utility/dynamic_buffer_cpu.hpp
include/ck/utility/dynamic_buffer_cpu.hpp
+1
-1
include/ck/utility/math.hpp
include/ck/utility/math.hpp
+6
-0
library/CMakeLists.txt
library/CMakeLists.txt
+2
-0
library/include/ck/library/host_tensor/device_memory.hpp
library/include/ck/library/host_tensor/device_memory.hpp
+19
-0
library/src/host_tensor/CMakeLists.txt
library/src/host_tensor/CMakeLists.txt
+15
-0
library/src/host_tensor/device_memory.cpp
library/src/host_tensor/device_memory.cpp
+40
-0
library/src/host_tensor/host_tensor.cpp
library/src/host_tensor/host_tensor.cpp
+1
-1
library/src/tensor_operation_instance/cpu/conv2d_fwd/device_conv2d_direct_fwd_avx2_nhwc_kyxck8_nhwk_instance.cpp
...vice_conv2d_direct_fwd_avx2_nhwc_kyxck8_nhwk_instance.cpp
+7
-6
library/src/tensor_operation_instance/cpu/conv2d_fwd/device_conv2d_fwd_avx2_nhwc_kyxc_nhwk_instance.cpp
...2d_fwd/device_conv2d_fwd_avx2_nhwc_kyxc_nhwk_instance.cpp
+12
-11
library/src/tensor_operation_instance/cpu/conv2d_fwd/device_conv2d_fwd_avx2_nhwc_kyxck8_nhwk_instance.cpp
..._fwd/device_conv2d_fwd_avx2_nhwc_kyxck8_nhwk_instance.cpp
+12
-11
library/src/tensor_operation_instance/cpu/conv2d_fwd/device_conv2d_fwd_avx2_nhwc_yxck_nhwk_instance.cpp
...2d_fwd/device_conv2d_fwd_avx2_nhwc_yxck_nhwk_instance.cpp
+12
-11
No files found.
include/ck/tensor_operation/cpu/grid/gridwise_gemm_avx2.hpp
View file @
4b448373
#ifndef CK_GRIDWISE_GEMM_AVX2_HPP
#ifndef CK_GRIDWISE_GEMM_AVX2_HPP
#define CK_GRIDWISE_GEMM_AVX2_HPP
#define CK_GRIDWISE_GEMM_AVX2_HPP
#include "common_header.hpp"
#include "
ck/utility/
common_header.hpp"
#include "multi_index_transform_helper.hpp"
#include "
ck/tensor_description/
multi_index_transform_helper.hpp"
#include "tensor_descriptor.hpp"
#include "
ck/tensor_description/
tensor_descriptor.hpp"
#include "tensor_descriptor_helper.hpp"
#include "
ck/tensor_description/
tensor_descriptor_helper.hpp"
#include "blockwise_gemm_avx2.hpp"
#include "
ck/tensor_operation/cpu/block/
blockwise_gemm_avx2.hpp"
#include "threadwise_tensor_slice_transfer_avx2.hpp"
#include "
ck/tensor_operation/cpu/thread/
threadwise_tensor_slice_transfer_avx2.hpp"
#include "threadwise_tensor_slice_transfer_avx2_specialization.hpp"
#include "
ck/tensor_operation/cpu/thread/
threadwise_tensor_slice_transfer_avx2_specialization.hpp"
#include "dynamic_buffer_cpu.hpp"
#include "
ck/utility/
dynamic_buffer_cpu.hpp"
#include "envvar.hpp"
#include "
ck/utility/
envvar.hpp"
#include <utility>
#include <utility>
#include <unistd.h>
#include <unistd.h>
#include <omp.h>
#include <omp.h>
...
...
include/ck/tensor_operation/cpu/grid/gridwise_gemm_bias_activation_add_avx2.hpp
View file @
4b448373
#ifndef CK_GRIDWISE_GEMM_BIAS_ACTIVATION_ADD_AVX2_HPP
#ifndef CK_GRIDWISE_GEMM_BIAS_ACTIVATION_ADD_AVX2_HPP
#define CK_GRIDWISE_GEMM_BIAS_ACTIVATION_ADD_AVX2_HPP
#define CK_GRIDWISE_GEMM_BIAS_ACTIVATION_ADD_AVX2_HPP
#include "common_header.hpp"
#include "
ck/utility/
common_header.hpp"
#include "multi_index_transform_helper.hpp"
#include "
ck/tensor_description/
multi_index_transform_helper.hpp"
#include "tensor_descriptor.hpp"
#include "
ck/tensor_description/
tensor_descriptor.hpp"
#include "tensor_descriptor_helper.hpp"
#include "
ck/tensor_description/
tensor_descriptor_helper.hpp"
#include "blockwise_gemm_avx2.hpp"
#include "
ck/tensor_operation/cpu/block/
blockwise_gemm_avx2.hpp"
#include "threadwise_tensor_slice_transfer_avx2.hpp"
#include "
ck/tensor_operation/cpu/thread/
threadwise_tensor_slice_transfer_avx2.hpp"
#include "threadwise_tensor_slice_transfer_avx2_specialization.hpp"
#include "
ck/tensor_operation/cpu/thread/
threadwise_tensor_slice_transfer_avx2_specialization.hpp"
#include "dynamic_buffer_cpu.hpp"
#include "
ck/utility/
dynamic_buffer_cpu.hpp"
#include "envvar.hpp"
#include "
ck/utility/
envvar.hpp"
#include <utility>
#include <utility>
#include <unistd.h>
#include <unistd.h>
#include <omp.h>
#include <omp.h>
...
...
include/ck/tensor_operation/cpu/thread/threadwise_gemm_avx2.hpp
View file @
4b448373
...
@@ -5,10 +5,10 @@
...
@@ -5,10 +5,10 @@
#if CK_USE_X86_INLINE_ASM == 0
#if CK_USE_X86_INLINE_ASM == 0
#include <immintrin.h>
#include <immintrin.h>
#endif
#endif
#include "common_header.hpp"
#include "
ck/utility/
common_header.hpp"
#include "
../..
/gpu/device/tensor_layout.hpp"
#include "
ck/tensor_operation
/gpu/device/tensor_layout.hpp"
#include "math.hpp"
#include "
ck/utility/
math.hpp"
#include "threadwise_gemm_param.hpp"
#include "
ck/tensor_operation/cpu/thread/
threadwise_gemm_param.hpp"
namespace
ck
{
namespace
ck
{
namespace
cpu
{
namespace
cpu
{
...
...
include/ck/tensor_operation/cpu/thread/threadwise_gemm_param.hpp
View file @
4b448373
#ifndef CK_THREADWISE_GEMM_PARAM_HPP
#ifndef CK_THREADWISE_GEMM_PARAM_HPP
#define CK_THREADWISE_GEMM_PARAM_HPP
#define CK_THREADWISE_GEMM_PARAM_HPP
#include "common_header.hpp"
#include "
ck/utility/
common_header.hpp"
#include "math.hpp"
#include "
ck/utility/
math.hpp"
namespace
ck
{
namespace
ck
{
namespace
cpu
{
namespace
cpu
{
...
...
include/ck/tensor_operation/cpu/thread/threadwise_tensor_slice_transfer_avx2.hpp
View file @
4b448373
#ifndef CK_THREADWISE_TENSOR_SLICE_TRANSFER_AVX2_HPP
#ifndef CK_THREADWISE_TENSOR_SLICE_TRANSFER_AVX2_HPP
#define CK_THREADWISE_TENSOR_SLICE_TRANSFER_AVX2_HPP
#define CK_THREADWISE_TENSOR_SLICE_TRANSFER_AVX2_HPP
#include "common_header.hpp"
#include "
ck/utility/
common_header.hpp"
#include "data_type_cpu.hpp"
#include "
ck/utility/
data_type_cpu.hpp"
#include "
../..
/gpu/thread/threadwise_tensor_slice_transfer.hpp"
#include "
ck/tensor_operation
/gpu/thread/threadwise_tensor_slice_transfer.hpp"
#include "tensor_descriptor.hpp"
#include "
ck/tensor_description/
tensor_descriptor.hpp"
#include "tensor_descriptor_helper.hpp"
#include "
ck/tensor_description/
tensor_descriptor_helper.hpp"
#include "tensor_space_filling_curve.hpp"
#include "
ck/tensor_description/
tensor_space_filling_curve.hpp"
#include "dynamic_buffer_cpu.hpp"
#include "
ck/utility/
dynamic_buffer_cpu.hpp"
#include <immintrin.h>
#include <immintrin.h>
namespace
ck
{
namespace
ck
{
...
...
include/ck/tensor_operation/cpu/thread/threadwise_tensor_slice_transfer_avx2_specialization.hpp
View file @
4b448373
#ifndef CK_THREADWISE_TENSOR_SLICE_TRANSFER_AVX2_SPECIALIZED_HPP
#ifndef CK_THREADWISE_TENSOR_SLICE_TRANSFER_AVX2_SPECIALIZED_HPP
#define CK_THREADWISE_TENSOR_SLICE_TRANSFER_AVX2_SPECIALIZED_HPP
#define CK_THREADWISE_TENSOR_SLICE_TRANSFER_AVX2_SPECIALIZED_HPP
#include "common_header.hpp"
#include "
ck/utility/
common_header.hpp"
#include "data_type_cpu.hpp"
#include "
ck/utility/
data_type_cpu.hpp"
#include "
../..
/gpu/thread/threadwise_tensor_slice_transfer.hpp"
#include "
ck/tensor_operation
/gpu/thread/threadwise_tensor_slice_transfer.hpp"
#include "tensor_descriptor.hpp"
#include "
ck/tensor_description/
tensor_descriptor.hpp"
#include "tensor_descriptor_helper.hpp"
#include "
ck/tensor_description/
tensor_descriptor_helper.hpp"
#include "tensor_space_filling_curve.hpp"
#include "
ck/tensor_description/
tensor_space_filling_curve.hpp"
#include "dynamic_buffer_cpu.hpp"
#include "
ck/utility/
dynamic_buffer_cpu.hpp"
#include "element_wise_operation_cpu.hpp"
#include "
ck/tensor_operation/cpu/element/
element_wise_operation_cpu.hpp"
#include "convolution_forward_specialization_cpu.hpp"
#include "
ck/tensor_operation/cpu/device/
convolution_forward_specialization_cpu.hpp"
#include <immintrin.h>
#include <immintrin.h>
namespace
ck
{
namespace
ck
{
...
...
include/ck/utility/common_header.hpp
View file @
4b448373
...
@@ -2,37 +2,6 @@
...
@@ -2,37 +2,6 @@
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#pragma once
#include "config.hpp"
#include "array.hpp"
#include "container_helper.hpp"
#include "statically_indexed_array.hpp"
#include "container_element_picker.hpp"
#include "multi_index.hpp"
#include "data_type.hpp"
#include "data_type_enum.hpp"
#include "data_type_enum_helper.hpp"
#include "functional.hpp"
#include "functional2.hpp"
#include "functional3.hpp"
#include "functional4.hpp"
#include "enable_if.hpp"
#include "ignore.hpp"
#include "integral_constant.hpp"
#include "math.hpp"
#include "number.hpp"
#include "sequence.hpp"
#include "sequence_helper.hpp"
#include "tuple.hpp"
#include "tuple_helper.hpp"
#include "type.hpp"
#include "magic_division.hpp"
#include "c_style_pointer_cast.hpp"
#include "is_known_at_compile_time.hpp"
#include "transpose_vectors.hpp"
#include "inner_product.hpp"
// #include "element_wise_operation.hpp"
#include "thread_group.hpp"
#include "debug.hpp"
#include "ck/ck.hpp"
#include "ck/ck.hpp"
#include "ck/utility/array.hpp"
#include "ck/utility/array.hpp"
...
...
include/ck/utility/data_type.hpp
View file @
4b448373
...
@@ -4,9 +4,6 @@
...
@@ -4,9 +4,6 @@
#pragma once
#pragma once
#include "ck/utility/statically_indexed_array.hpp"
#include "ck/utility/statically_indexed_array.hpp"
#ifdef CK_NOGPU
#include "half.hpp"
#endif
namespace
ck
{
namespace
ck
{
...
...
include/ck/utility/data_type_cpu.hpp
View file @
4b448373
#pragma once
#pragma once
#include <immintrin.h>
#include <immintrin.h>
#include "half.hpp"
namespace
ck
{
namespace
ck
{
...
...
include/ck/utility/dynamic_buffer_cpu.hpp
View file @
4b448373
#ifndef CK_BUFFER_CPU_HPP
#ifndef CK_BUFFER_CPU_HPP
#define CK_BUFFER_CPU_HPP
#define CK_BUFFER_CPU_HPP
#include "c
onfig
.hpp"
#include "c
k/ck
.hpp"
#include "enable_if.hpp"
#include "enable_if.hpp"
#include "data_type_cpu.hpp"
#include "data_type_cpu.hpp"
...
...
include/ck/utility/math.hpp
View file @
4b448373
...
@@ -9,6 +9,10 @@
...
@@ -9,6 +9,10 @@
#include "type.hpp"
#include "type.hpp"
#include "enable_if.hpp"
#include "enable_if.hpp"
#ifndef CK_NOCPU
#include <math.h>
#endif
namespace
ck
{
namespace
ck
{
namespace
math
{
namespace
math
{
...
@@ -144,6 +148,7 @@ __host__ __device__ constexpr auto min(X x, Ys... ys)
...
@@ -144,6 +148,7 @@ __host__ __device__ constexpr auto min(X x, Ys... ys)
return
min
(
x
,
min
(
ys
...));
return
min
(
x
,
min
(
ys
...));
}
}
#ifndef CK_NOGPU
// disallow implicit type casting
// disallow implicit type casting
template
<
typename
T
>
template
<
typename
T
>
__device__
T
exp
(
T
x
);
__device__
T
exp
(
T
x
);
...
@@ -161,6 +166,7 @@ __device__ double exp<double>(double x)
...
@@ -161,6 +166,7 @@ __device__ double exp<double>(double x)
{
{
return
exp
(
x
);
return
exp
(
x
);
}
}
#endif
// greatest common divisor, aka highest common factor
// greatest common divisor, aka highest common factor
__host__
__device__
constexpr
index_t
gcd
(
index_t
x
,
index_t
y
)
__host__
__device__
constexpr
index_t
gcd
(
index_t
x
,
index_t
y
)
...
...
library/CMakeLists.txt
View file @
4b448373
if
(
NOT CK_NOGPU
)
add_subdirectory
(
src/tensor_operation_instance/gpu
)
add_subdirectory
(
src/tensor_operation_instance/gpu
)
endif
()
add_subdirectory
(
src/host_tensor
)
add_subdirectory
(
src/host_tensor
)
add_subdirectory
(
src/utility
)
add_subdirectory
(
src/utility
)
add_subdirectory
(
src/tensor_operation_instance/cpu
)
add_subdirectory
(
src/tensor_operation_instance/cpu
)
library/include/ck/library/host_tensor/device_memory.hpp
View file @
4b448373
...
@@ -3,6 +3,8 @@
...
@@ -3,6 +3,8 @@
#pragma once
#pragma once
#include <cstddef>
#ifndef CK_NOGPU
#include <hip/hip_runtime.h>
#include <hip/hip_runtime.h>
template
<
typename
T
>
template
<
typename
T
>
...
@@ -38,3 +40,20 @@ struct DeviceMem
...
@@ -38,3 +40,20 @@ struct DeviceMem
void
*
mpDeviceBuf
;
void
*
mpDeviceBuf
;
std
::
size_t
mMemSize
;
std
::
size_t
mMemSize
;
};
};
#endif
struct
DeviceAlignedMemCPU
{
DeviceAlignedMemCPU
()
=
delete
;
DeviceAlignedMemCPU
(
std
::
size_t
mem_size
,
std
::
size_t
alignment
);
void
*
GetDeviceBuffer
();
std
::
size_t
GetBufferSize
();
void
ToDevice
(
const
void
*
p
);
void
FromDevice
(
void
*
p
);
void
SetZero
();
~
DeviceAlignedMemCPU
();
void
*
mpDeviceBuf
;
std
::
size_t
mMemSize
;
std
::
size_t
mAlignment
;
};
library/src/host_tensor/CMakeLists.txt
View file @
4b448373
...
@@ -17,6 +17,7 @@ target_include_directories(host_tensor PUBLIC
...
@@ -17,6 +17,7 @@ target_include_directories(host_tensor PUBLIC
"$<INSTALL_INTERFACE:
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck/library/host_tensor>"
"$<INSTALL_INTERFACE:
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck/library/host_tensor>"
)
)
if
(
NOT CK_NOGPU
)
rocm_install
(
rocm_install
(
TARGETS host_tensor
TARGETS host_tensor
EXPORT host_tensorTargets
EXPORT host_tensorTargets
...
@@ -28,5 +29,19 @@ rocm_install(
...
@@ -28,5 +29,19 @@ rocm_install(
NAMESPACE composable_kernel::
NAMESPACE composable_kernel::
DESTINATION
${
CMAKE_INSTALL_LIBDIR
}
/cmake/composable_kernel
DESTINATION
${
CMAKE_INSTALL_LIBDIR
}
/cmake/composable_kernel
)
)
else
()
install
(
TARGETS host_tensor
EXPORT host_tensorTargets
LIBRARY DESTINATION
${
CMAKE_INSTALL_LIBDIR
}
ARCHIVE DESTINATION
${
CMAKE_INSTALL_LIBDIR
}
RUNTIME DESTINATION
${
CMAKE_INSTALL_BINDIR
}
INCLUDES DESTINATION
${
CMAKE_INSTALL_INCLUDEDIR
}
)
install
(
EXPORT host_tensorTargets
FILE composable_kernelhost_tensorTargets.cmake
NAMESPACE composable_kernel::
DESTINATION
${
CMAKE_INSTALL_LIBDIR
}
/cmake/composable_kernel
)
endif
()
clang_tidy_check
(
host_tensor
)
clang_tidy_check
(
host_tensor
)
library/src/host_tensor/device_memory.cpp
View file @
4b448373
...
@@ -4,6 +4,11 @@
...
@@ -4,6 +4,11 @@
#include "ck/device_utility/hip_check_error.hpp"
#include "ck/device_utility/hip_check_error.hpp"
#include "ck/library/host_tensor/device_memory.hpp"
#include "ck/library/host_tensor/device_memory.hpp"
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#ifndef CK_NOGPU
DeviceMem
::
DeviceMem
(
std
::
size_t
mem_size
)
:
mMemSize
(
mem_size
)
DeviceMem
::
DeviceMem
(
std
::
size_t
mem_size
)
:
mMemSize
(
mem_size
)
{
{
hip_check_error
(
hipMalloc
(
static_cast
<
void
**>
(
&
mpDeviceBuf
),
mMemSize
));
hip_check_error
(
hipMalloc
(
static_cast
<
void
**>
(
&
mpDeviceBuf
),
mMemSize
));
...
@@ -26,3 +31,38 @@ void DeviceMem::FromDevice(void* p)
...
@@ -26,3 +31,38 @@ void DeviceMem::FromDevice(void* p)
void
DeviceMem
::
SetZero
()
{
hip_check_error
(
hipMemset
(
mpDeviceBuf
,
0
,
mMemSize
));
}
void
DeviceMem
::
SetZero
()
{
hip_check_error
(
hipMemset
(
mpDeviceBuf
,
0
,
mMemSize
));
}
DeviceMem
::~
DeviceMem
()
{
hip_check_error
(
hipFree
(
mpDeviceBuf
));
}
DeviceMem
::~
DeviceMem
()
{
hip_check_error
(
hipFree
(
mpDeviceBuf
));
}
#endif
DeviceAlignedMemCPU
::
DeviceAlignedMemCPU
(
std
::
size_t
mem_size
,
std
::
size_t
alignment
)
:
mMemSize
(
mem_size
),
mAlignment
(
alignment
)
{
if
(
mem_size
==
0
)
{
mpDeviceBuf
=
nullptr
;
}
else
{
assert
(
!
(
alignment
==
0
||
(
alignment
&
(
alignment
-
1
))));
// check pow of 2
// TODO: posix only
int
rtn
=
posix_memalign
(
&
mpDeviceBuf
,
alignment
,
mem_size
);
assert
(
rtn
==
0
);
}
}
void
*
DeviceAlignedMemCPU
::
GetDeviceBuffer
()
{
return
mpDeviceBuf
;
}
std
::
size_t
DeviceAlignedMemCPU
::
GetBufferSize
()
{
return
mMemSize
;
}
void
DeviceAlignedMemCPU
::
ToDevice
(
const
void
*
p
)
{
memcpy
(
mpDeviceBuf
,
p
,
mMemSize
);
}
void
DeviceAlignedMemCPU
::
FromDevice
(
void
*
p
)
{
memcpy
(
p
,
mpDeviceBuf
,
mMemSize
);
}
void
DeviceAlignedMemCPU
::
SetZero
()
{
memset
(
mpDeviceBuf
,
0
,
mMemSize
);
}
DeviceAlignedMemCPU
::~
DeviceAlignedMemCPU
()
{
if
(
mpDeviceBuf
!=
nullptr
)
free
(
mpDeviceBuf
);
}
library/src/host_tensor/host_tensor.cpp
View file @
4b448373
...
@@ -2,7 +2,7 @@
...
@@ -2,7 +2,7 @@
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <cassert>
#include <cassert>
#include "ck/ck.hpp"
#include "ck/library/host_tensor/host_tensor.hpp"
#include "ck/library/host_tensor/host_tensor.hpp"
void
HostTensorDescriptor
::
CalculateStrides
()
void
HostTensorDescriptor
::
CalculateStrides
()
...
...
library/src/tensor_operation_instance/cpu/conv2d_fwd/device_conv2d_direct_fwd_avx2_nhwc_kyxck8_nhwk_instance.cpp
View file @
4b448373
#include <stdlib.h>
#include <stdlib.h>
#include <utility>
#include <utility>
#include "config.hpp"
#include <memory>
#include "convolution_forward_specialization_cpu.hpp"
#include "ck/ck.hpp"
#include "device_convnd_direct_fwd_avx2_nhwc_kyxck8_nhwk.hpp"
#include "ck/tensor_operation/cpu/device/convolution_forward_specialization_cpu.hpp"
#include "element_wise_operation_cpu.hpp"
#include "ck/tensor_operation/cpu/device/device_convnd_direct_fwd_avx2_nhwc_kyxck8_nhwk.hpp"
#include "device_operation_instance.hpp"
#include "ck/tensor_operation/cpu/element/element_wise_operation_cpu.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
namespace
ck
{
namespace
ck
{
namespace
tensor_operation
{
namespace
tensor_operation
{
...
@@ -58,7 +59,7 @@ static constexpr auto LoopOver_MKN = ck::tensor_operation::cpu::device::LoopOver
...
@@ -58,7 +59,7 @@ static constexpr auto LoopOver_MKN = ck::tensor_operation::cpu::device::LoopOver
void
add_device_conv2d_direct_fwd_avx2_nhwc_kyxck8_nhwk
(
void
add_device_conv2d_direct_fwd_avx2_nhwc_kyxck8_nhwk
(
std
::
vector
<
DeviceConvFwdPtr
<
PT
,
PT
,
PT
>>&
instances
)
std
::
vector
<
DeviceConvFwdPtr
<
PT
,
PT
,
PT
>>&
instances
)
{
{
ck
::
tensor_operation
::
device
::
add_device_operation_instances
(
ck
::
tensor_operation
::
device
::
instance
::
add_device_operation_instances
(
instances
,
instances
,
std
::
make_tuple
(
std
::
make_tuple
(
// clang-format off
// clang-format off
...
...
library/src/tensor_operation_instance/cpu/conv2d_fwd/device_conv2d_fwd_avx2_nhwc_kyxc_nhwk_instance.cpp
View file @
4b448373
#include <stdlib.h>
#include <stdlib.h>
#include <utility>
#include <utility>
#include "convolution_forward_specialization_cpu.hpp"
#include <memory>
#include "config.hpp"
#include "ck/ck.hpp"
#include "device_convnd_fwd_avx2_nhwc_kyxc_nhwk.hpp"
#include "ck/tensor_operation/cpu/device/convolution_forward_specialization_cpu.hpp"
#include "element_wise_operation_cpu.hpp"
#include "ck/tensor_operation/cpu/device/device_convnd_fwd_avx2_nhwc_kyxc_nhwk.hpp"
#include "device_operation_instance.hpp"
#include "ck/tensor_operation/cpu/element/element_wise_operation_cpu.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
namespace
ck
{
namespace
ck
{
namespace
tensor_operation
{
namespace
tensor_operation
{
...
@@ -64,7 +65,7 @@ static constexpr auto LoopOver_MKN = ck::tensor_operation::cpu::device::LoopOver
...
@@ -64,7 +65,7 @@ static constexpr auto LoopOver_MKN = ck::tensor_operation::cpu::device::LoopOver
void
add_device_conv2d_fwd_avx2_nhwc_kyxc_nhwk
(
std
::
vector
<
DeviceConvFwdPtr
<
PT
,
PT
,
PT
>>&
instances
)
void
add_device_conv2d_fwd_avx2_nhwc_kyxc_nhwk
(
std
::
vector
<
DeviceConvFwdPtr
<
PT
,
PT
,
PT
>>&
instances
)
{
{
ck
::
tensor_operation
::
device
::
add_device_operation_instances
(
ck
::
tensor_operation
::
device
::
instance
::
add_device_operation_instances
(
instances
,
instances
,
std
::
make_tuple
(
std
::
make_tuple
(
// clang-format off
// clang-format off
...
@@ -85,7 +86,7 @@ void add_device_conv2d_fwd_avx2_nhwc_kyxc_nhwk(std::vector<DeviceConvFwdPtr<PT,
...
@@ -85,7 +86,7 @@ void add_device_conv2d_fwd_avx2_nhwc_kyxc_nhwk(std::vector<DeviceConvFwdPtr<PT,
void
add_device_conv2d_fwd_avx2_nhwc_kyxc_nhwk_local_c
(
void
add_device_conv2d_fwd_avx2_nhwc_kyxc_nhwk_local_c
(
std
::
vector
<
DeviceConvFwdPtr
<
PT
,
PT
,
PT
>>&
instances
)
std
::
vector
<
DeviceConvFwdPtr
<
PT
,
PT
,
PT
>>&
instances
)
{
{
ck
::
tensor_operation
::
device
::
add_device_operation_instances
(
ck
::
tensor_operation
::
device
::
instance
::
add_device_operation_instances
(
instances
,
instances
,
std
::
make_tuple
(
std
::
make_tuple
(
// clang-format off
// clang-format off
...
@@ -106,7 +107,7 @@ void add_device_conv2d_fwd_avx2_nhwc_kyxc_nhwk_local_c(
...
@@ -106,7 +107,7 @@ void add_device_conv2d_fwd_avx2_nhwc_kyxc_nhwk_local_c(
void
add_device_conv2d_fwd_avx2_nhwc_kyxc_nhwk_mt
(
void
add_device_conv2d_fwd_avx2_nhwc_kyxc_nhwk_mt
(
std
::
vector
<
DeviceConvFwdPtr
<
PT
,
PT
,
PT
>>&
instances
)
std
::
vector
<
DeviceConvFwdPtr
<
PT
,
PT
,
PT
>>&
instances
)
{
{
ck
::
tensor_operation
::
device
::
add_device_operation_instances
(
ck
::
tensor_operation
::
device
::
instance
::
add_device_operation_instances
(
instances
,
instances
,
std
::
make_tuple
(
std
::
make_tuple
(
// clang-format off
// clang-format off
...
@@ -143,7 +144,7 @@ void add_device_conv2d_fwd_avx2_nhwc_kyxc_nhwk_mt(
...
@@ -143,7 +144,7 @@ void add_device_conv2d_fwd_avx2_nhwc_kyxc_nhwk_mt(
void
add_device_conv2d_fwd_avx2_nhwc_kyxc_nhwk_relu
(
void
add_device_conv2d_fwd_avx2_nhwc_kyxc_nhwk_relu
(
std
::
vector
<
DeviceConvFwdPtr
<
PT
,
PT
,
Relu
>>&
instances
)
std
::
vector
<
DeviceConvFwdPtr
<
PT
,
PT
,
Relu
>>&
instances
)
{
{
ck
::
tensor_operation
::
device
::
add_device_operation_instances
(
ck
::
tensor_operation
::
device
::
instance
::
add_device_operation_instances
(
instances
,
instances
,
std
::
make_tuple
(
std
::
make_tuple
(
// clang-format off
// clang-format off
...
@@ -164,7 +165,7 @@ void add_device_conv2d_fwd_avx2_nhwc_kyxc_nhwk_relu(
...
@@ -164,7 +165,7 @@ void add_device_conv2d_fwd_avx2_nhwc_kyxc_nhwk_relu(
void
add_device_conv2d_fwd_avx2_nhwc_kyxc_nhwk_local_c_relu
(
void
add_device_conv2d_fwd_avx2_nhwc_kyxc_nhwk_local_c_relu
(
std
::
vector
<
DeviceConvFwdPtr
<
PT
,
PT
,
Relu
>>&
instances
)
std
::
vector
<
DeviceConvFwdPtr
<
PT
,
PT
,
Relu
>>&
instances
)
{
{
ck
::
tensor_operation
::
device
::
add_device_operation_instances
(
ck
::
tensor_operation
::
device
::
instance
::
add_device_operation_instances
(
instances
,
instances
,
std
::
make_tuple
(
std
::
make_tuple
(
// clang-format off
// clang-format off
...
@@ -185,7 +186,7 @@ void add_device_conv2d_fwd_avx2_nhwc_kyxc_nhwk_local_c_relu(
...
@@ -185,7 +186,7 @@ void add_device_conv2d_fwd_avx2_nhwc_kyxc_nhwk_local_c_relu(
void
add_device_conv2d_fwd_avx2_nhwc_kyxc_nhwk_mt_relu
(
void
add_device_conv2d_fwd_avx2_nhwc_kyxc_nhwk_mt_relu
(
std
::
vector
<
DeviceConvFwdPtr
<
PT
,
PT
,
Relu
>>&
instances
)
std
::
vector
<
DeviceConvFwdPtr
<
PT
,
PT
,
Relu
>>&
instances
)
{
{
ck
::
tensor_operation
::
device
::
add_device_operation_instances
(
ck
::
tensor_operation
::
device
::
instance
::
add_device_operation_instances
(
instances
,
instances
,
std
::
make_tuple
(
std
::
make_tuple
(
// clang-format off
// clang-format off
...
...
library/src/tensor_operation_instance/cpu/conv2d_fwd/device_conv2d_fwd_avx2_nhwc_kyxck8_nhwk_instance.cpp
View file @
4b448373
#include <stdlib.h>
#include <stdlib.h>
#include <utility>
#include <utility>
#include "config.hpp"
#include <memory>
#include "convolution_forward_specialization_cpu.hpp"
#include "ck/ck.hpp"
#include "device_convnd_fwd_avx2_nhwc_kyxck8_nhwk.hpp"
#include "ck/tensor_operation/cpu/device/convolution_forward_specialization_cpu.hpp"
#include "element_wise_operation_cpu.hpp"
#include "ck/tensor_operation/cpu/device/device_convnd_fwd_avx2_nhwc_kyxck8_nhwk.hpp"
#include "device_operation_instance.hpp"
#include "ck/tensor_operation/cpu/element/element_wise_operation_cpu.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
namespace
ck
{
namespace
ck
{
namespace
tensor_operation
{
namespace
tensor_operation
{
...
@@ -58,7 +59,7 @@ static constexpr auto LoopOver_MKN = ck::tensor_operation::cpu::device::LoopOver
...
@@ -58,7 +59,7 @@ static constexpr auto LoopOver_MKN = ck::tensor_operation::cpu::device::LoopOver
void
add_device_conv2d_fwd_avx2_nhwc_kyxck8_nhwk
(
void
add_device_conv2d_fwd_avx2_nhwc_kyxck8_nhwk
(
std
::
vector
<
DeviceConvFwdPtr
<
PT
,
PT
,
PT
>>&
instances
)
std
::
vector
<
DeviceConvFwdPtr
<
PT
,
PT
,
PT
>>&
instances
)
{
{
ck
::
tensor_operation
::
device
::
add_device_operation_instances
(
ck
::
tensor_operation
::
device
::
instance
::
add_device_operation_instances
(
instances
,
instances
,
std
::
make_tuple
(
std
::
make_tuple
(
// clang-format off
// clang-format off
...
@@ -79,7 +80,7 @@ void add_device_conv2d_fwd_avx2_nhwc_kyxck8_nhwk(
...
@@ -79,7 +80,7 @@ void add_device_conv2d_fwd_avx2_nhwc_kyxck8_nhwk(
void
add_device_conv2d_fwd_avx2_nhwc_kyxck8_nhwk_local_c
(
void
add_device_conv2d_fwd_avx2_nhwc_kyxck8_nhwk_local_c
(
std
::
vector
<
DeviceConvFwdPtr
<
PT
,
PT
,
PT
>>&
instances
)
std
::
vector
<
DeviceConvFwdPtr
<
PT
,
PT
,
PT
>>&
instances
)
{
{
ck
::
tensor_operation
::
device
::
add_device_operation_instances
(
ck
::
tensor_operation
::
device
::
instance
::
add_device_operation_instances
(
instances
,
instances
,
std
::
make_tuple
(
std
::
make_tuple
(
// clang-format off
// clang-format off
...
@@ -100,7 +101,7 @@ void add_device_conv2d_fwd_avx2_nhwc_kyxck8_nhwk_local_c(
...
@@ -100,7 +101,7 @@ void add_device_conv2d_fwd_avx2_nhwc_kyxck8_nhwk_local_c(
void
add_device_conv2d_fwd_avx2_nhwc_kyxck8_nhwk_mt
(
void
add_device_conv2d_fwd_avx2_nhwc_kyxck8_nhwk_mt
(
std
::
vector
<
DeviceConvFwdPtr
<
PT
,
PT
,
PT
>>&
instances
)
std
::
vector
<
DeviceConvFwdPtr
<
PT
,
PT
,
PT
>>&
instances
)
{
{
ck
::
tensor_operation
::
device
::
add_device_operation_instances
(
ck
::
tensor_operation
::
device
::
instance
::
add_device_operation_instances
(
instances
,
instances
,
std
::
make_tuple
(
std
::
make_tuple
(
// clang-format off
// clang-format off
...
@@ -137,7 +138,7 @@ void add_device_conv2d_fwd_avx2_nhwc_kyxck8_nhwk_mt(
...
@@ -137,7 +138,7 @@ void add_device_conv2d_fwd_avx2_nhwc_kyxck8_nhwk_mt(
void
add_device_conv2d_fwd_avx2_nhwc_kyxck8_nhwk_relu
(
void
add_device_conv2d_fwd_avx2_nhwc_kyxck8_nhwk_relu
(
std
::
vector
<
DeviceConvFwdPtr
<
PT
,
PT
,
Relu
>>&
instances
)
std
::
vector
<
DeviceConvFwdPtr
<
PT
,
PT
,
Relu
>>&
instances
)
{
{
ck
::
tensor_operation
::
device
::
add_device_operation_instances
(
ck
::
tensor_operation
::
device
::
instance
::
add_device_operation_instances
(
instances
,
instances
,
std
::
make_tuple
(
std
::
make_tuple
(
// clang-format off
// clang-format off
...
@@ -158,7 +159,7 @@ void add_device_conv2d_fwd_avx2_nhwc_kyxck8_nhwk_relu(
...
@@ -158,7 +159,7 @@ void add_device_conv2d_fwd_avx2_nhwc_kyxck8_nhwk_relu(
void
add_device_conv2d_fwd_avx2_nhwc_kyxck8_nhwk_local_c_relu
(
void
add_device_conv2d_fwd_avx2_nhwc_kyxck8_nhwk_local_c_relu
(
std
::
vector
<
DeviceConvFwdPtr
<
PT
,
PT
,
Relu
>>&
instances
)
std
::
vector
<
DeviceConvFwdPtr
<
PT
,
PT
,
Relu
>>&
instances
)
{
{
ck
::
tensor_operation
::
device
::
add_device_operation_instances
(
ck
::
tensor_operation
::
device
::
instance
::
add_device_operation_instances
(
instances
,
instances
,
std
::
make_tuple
(
std
::
make_tuple
(
// clang-format off
// clang-format off
...
@@ -179,7 +180,7 @@ void add_device_conv2d_fwd_avx2_nhwc_kyxck8_nhwk_local_c_relu(
...
@@ -179,7 +180,7 @@ void add_device_conv2d_fwd_avx2_nhwc_kyxck8_nhwk_local_c_relu(
void
add_device_conv2d_fwd_avx2_nhwc_kyxck8_nhwk_mt_relu
(
void
add_device_conv2d_fwd_avx2_nhwc_kyxck8_nhwk_mt_relu
(
std
::
vector
<
DeviceConvFwdPtr
<
PT
,
PT
,
Relu
>>&
instances
)
std
::
vector
<
DeviceConvFwdPtr
<
PT
,
PT
,
Relu
>>&
instances
)
{
{
ck
::
tensor_operation
::
device
::
add_device_operation_instances
(
ck
::
tensor_operation
::
device
::
instance
::
add_device_operation_instances
(
instances
,
instances
,
std
::
make_tuple
(
std
::
make_tuple
(
// clang-format off
// clang-format off
...
...
library/src/tensor_operation_instance/cpu/conv2d_fwd/device_conv2d_fwd_avx2_nhwc_yxck_nhwk_instance.cpp
View file @
4b448373
#include <stdlib.h>
#include <stdlib.h>
#include <utility>
#include <utility>
#include "config.hpp"
#include <memory>
#include "convolution_forward_specialization_cpu.hpp"
#include "ck/ck.hpp"
#include "device_convnd_fwd_avx2_nhwc_yxck_nhwk.hpp"
#include "ck/tensor_operation/cpu/device/convolution_forward_specialization_cpu.hpp"
#include "element_wise_operation_cpu.hpp"
#include "ck/tensor_operation/cpu/device/device_convnd_fwd_avx2_nhwc_yxck_nhwk.hpp"
#include "device_operation_instance.hpp"
#include "ck/tensor_operation/cpu/element/element_wise_operation_cpu.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
namespace
ck
{
namespace
ck
{
namespace
tensor_operation
{
namespace
tensor_operation
{
...
@@ -56,7 +57,7 @@ static constexpr auto LoopOver_MKN = ck::tensor_operation::cpu::device::LoopOver
...
@@ -56,7 +57,7 @@ static constexpr auto LoopOver_MKN = ck::tensor_operation::cpu::device::LoopOver
void
add_device_conv2d_fwd_avx2_nhwc_yxck_nhwk
(
std
::
vector
<
DeviceConvFwdPtr
<
PT
,
PT
,
PT
>>&
instances
)
void
add_device_conv2d_fwd_avx2_nhwc_yxck_nhwk
(
std
::
vector
<
DeviceConvFwdPtr
<
PT
,
PT
,
PT
>>&
instances
)
{
{
ck
::
tensor_operation
::
device
::
add_device_operation_instances
(
ck
::
tensor_operation
::
device
::
instance
::
add_device_operation_instances
(
instances
,
instances
,
std
::
make_tuple
(
std
::
make_tuple
(
// clang-format off
// clang-format off
...
@@ -77,7 +78,7 @@ void add_device_conv2d_fwd_avx2_nhwc_yxck_nhwk(std::vector<DeviceConvFwdPtr<PT,
...
@@ -77,7 +78,7 @@ void add_device_conv2d_fwd_avx2_nhwc_yxck_nhwk(std::vector<DeviceConvFwdPtr<PT,
void
add_device_conv2d_fwd_avx2_nhwc_yxck_nhwk_local_c
(
void
add_device_conv2d_fwd_avx2_nhwc_yxck_nhwk_local_c
(
std
::
vector
<
DeviceConvFwdPtr
<
PT
,
PT
,
PT
>>&
instances
)
std
::
vector
<
DeviceConvFwdPtr
<
PT
,
PT
,
PT
>>&
instances
)
{
{
ck
::
tensor_operation
::
device
::
add_device_operation_instances
(
ck
::
tensor_operation
::
device
::
instance
::
add_device_operation_instances
(
instances
,
instances
,
std
::
make_tuple
(
std
::
make_tuple
(
// clang-format off
// clang-format off
...
@@ -98,7 +99,7 @@ void add_device_conv2d_fwd_avx2_nhwc_yxck_nhwk_local_c(
...
@@ -98,7 +99,7 @@ void add_device_conv2d_fwd_avx2_nhwc_yxck_nhwk_local_c(
void
add_device_conv2d_fwd_avx2_nhwc_yxck_nhwk_mt
(
void
add_device_conv2d_fwd_avx2_nhwc_yxck_nhwk_mt
(
std
::
vector
<
DeviceConvFwdPtr
<
PT
,
PT
,
PT
>>&
instances
)
std
::
vector
<
DeviceConvFwdPtr
<
PT
,
PT
,
PT
>>&
instances
)
{
{
ck
::
tensor_operation
::
device
::
add_device_operation_instances
(
ck
::
tensor_operation
::
device
::
instance
::
add_device_operation_instances
(
instances
,
instances
,
std
::
make_tuple
(
std
::
make_tuple
(
// clang-format off
// clang-format off
...
@@ -136,7 +137,7 @@ void add_device_conv2d_fwd_avx2_nhwc_yxck_nhwk_mt(
...
@@ -136,7 +137,7 @@ void add_device_conv2d_fwd_avx2_nhwc_yxck_nhwk_mt(
void
add_device_conv2d_fwd_avx2_nhwc_yxck_nhwk_relu
(
void
add_device_conv2d_fwd_avx2_nhwc_yxck_nhwk_relu
(
std
::
vector
<
DeviceConvFwdPtr
<
PT
,
PT
,
Relu
>>&
instances
)
std
::
vector
<
DeviceConvFwdPtr
<
PT
,
PT
,
Relu
>>&
instances
)
{
{
ck
::
tensor_operation
::
device
::
add_device_operation_instances
(
ck
::
tensor_operation
::
device
::
instance
::
add_device_operation_instances
(
instances
,
instances
,
std
::
make_tuple
(
std
::
make_tuple
(
// clang-format off
// clang-format off
...
@@ -157,7 +158,7 @@ void add_device_conv2d_fwd_avx2_nhwc_yxck_nhwk_relu(
...
@@ -157,7 +158,7 @@ void add_device_conv2d_fwd_avx2_nhwc_yxck_nhwk_relu(
void
add_device_conv2d_fwd_avx2_nhwc_yxck_nhwk_local_c_relu
(
void
add_device_conv2d_fwd_avx2_nhwc_yxck_nhwk_local_c_relu
(
std
::
vector
<
DeviceConvFwdPtr
<
PT
,
PT
,
Relu
>>&
instances
)
std
::
vector
<
DeviceConvFwdPtr
<
PT
,
PT
,
Relu
>>&
instances
)
{
{
ck
::
tensor_operation
::
device
::
add_device_operation_instances
(
ck
::
tensor_operation
::
device
::
instance
::
add_device_operation_instances
(
instances
,
instances
,
std
::
make_tuple
(
std
::
make_tuple
(
// clang-format off
// clang-format off
...
@@ -178,7 +179,7 @@ void add_device_conv2d_fwd_avx2_nhwc_yxck_nhwk_local_c_relu(
...
@@ -178,7 +179,7 @@ void add_device_conv2d_fwd_avx2_nhwc_yxck_nhwk_local_c_relu(
void
add_device_conv2d_fwd_avx2_nhwc_yxck_nhwk_mt_relu
(
void
add_device_conv2d_fwd_avx2_nhwc_yxck_nhwk_mt_relu
(
std
::
vector
<
DeviceConvFwdPtr
<
PT
,
PT
,
Relu
>>&
instances
)
std
::
vector
<
DeviceConvFwdPtr
<
PT
,
PT
,
Relu
>>&
instances
)
{
{
ck
::
tensor_operation
::
device
::
add_device_operation_instances
(
ck
::
tensor_operation
::
device
::
instance
::
add_device_operation_instances
(
instances
,
instances
,
std
::
make_tuple
(
std
::
make_tuple
(
// clang-format off
// clang-format off
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment