"...git@developer.sourcefind.cn:modelzoo/unet_paddle.git" did not exist on "0d97cc8c98fafa784d0fa9d17fd71d7b0d4683ff"
Commit 4b448373 authored by carlushuang's avatar carlushuang
Browse files

fix bug on merge latest develop

parent b79df771
#ifndef CK_GRIDWISE_GEMM_AVX2_HPP #ifndef CK_GRIDWISE_GEMM_AVX2_HPP
#define CK_GRIDWISE_GEMM_AVX2_HPP #define CK_GRIDWISE_GEMM_AVX2_HPP
#include "common_header.hpp" #include "ck/utility/common_header.hpp"
#include "multi_index_transform_helper.hpp" #include "ck/tensor_description/multi_index_transform_helper.hpp"
#include "tensor_descriptor.hpp" #include "ck/tensor_description/tensor_descriptor.hpp"
#include "tensor_descriptor_helper.hpp" #include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "blockwise_gemm_avx2.hpp" #include "ck/tensor_operation/cpu/block/blockwise_gemm_avx2.hpp"
#include "threadwise_tensor_slice_transfer_avx2.hpp" #include "ck/tensor_operation/cpu/thread/threadwise_tensor_slice_transfer_avx2.hpp"
#include "threadwise_tensor_slice_transfer_avx2_specialization.hpp" #include "ck/tensor_operation/cpu/thread/threadwise_tensor_slice_transfer_avx2_specialization.hpp"
#include "dynamic_buffer_cpu.hpp" #include "ck/utility/dynamic_buffer_cpu.hpp"
#include "envvar.hpp" #include "ck/utility/envvar.hpp"
#include <utility> #include <utility>
#include <unistd.h> #include <unistd.h>
#include <omp.h> #include <omp.h>
......
#ifndef CK_GRIDWISE_GEMM_BIAS_ACTIVATION_ADD_AVX2_HPP #ifndef CK_GRIDWISE_GEMM_BIAS_ACTIVATION_ADD_AVX2_HPP
#define CK_GRIDWISE_GEMM_BIAS_ACTIVATION_ADD_AVX2_HPP #define CK_GRIDWISE_GEMM_BIAS_ACTIVATION_ADD_AVX2_HPP
#include "common_header.hpp" #include "ck/utility/common_header.hpp"
#include "multi_index_transform_helper.hpp" #include "ck/tensor_description/multi_index_transform_helper.hpp"
#include "tensor_descriptor.hpp" #include "ck/tensor_description/tensor_descriptor.hpp"
#include "tensor_descriptor_helper.hpp" #include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "blockwise_gemm_avx2.hpp" #include "ck/tensor_operation/cpu/block/blockwise_gemm_avx2.hpp"
#include "threadwise_tensor_slice_transfer_avx2.hpp" #include "ck/tensor_operation/cpu/thread/threadwise_tensor_slice_transfer_avx2.hpp"
#include "threadwise_tensor_slice_transfer_avx2_specialization.hpp" #include "ck/tensor_operation/cpu/thread/threadwise_tensor_slice_transfer_avx2_specialization.hpp"
#include "dynamic_buffer_cpu.hpp" #include "ck/utility/dynamic_buffer_cpu.hpp"
#include "envvar.hpp" #include "ck/utility/envvar.hpp"
#include <utility> #include <utility>
#include <unistd.h> #include <unistd.h>
#include <omp.h> #include <omp.h>
......
...@@ -5,10 +5,10 @@ ...@@ -5,10 +5,10 @@
#if CK_USE_X86_INLINE_ASM == 0 #if CK_USE_X86_INLINE_ASM == 0
#include <immintrin.h> #include <immintrin.h>
#endif #endif
#include "common_header.hpp" #include "ck/utility/common_header.hpp"
#include "../../gpu/device/tensor_layout.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "math.hpp" #include "ck/utility/math.hpp"
#include "threadwise_gemm_param.hpp" #include "ck/tensor_operation/cpu/thread/threadwise_gemm_param.hpp"
namespace ck { namespace ck {
namespace cpu { namespace cpu {
......
#ifndef CK_THREADWISE_GEMM_PARAM_HPP #ifndef CK_THREADWISE_GEMM_PARAM_HPP
#define CK_THREADWISE_GEMM_PARAM_HPP #define CK_THREADWISE_GEMM_PARAM_HPP
#include "common_header.hpp" #include "ck/utility/common_header.hpp"
#include "math.hpp" #include "ck/utility/math.hpp"
namespace ck { namespace ck {
namespace cpu { namespace cpu {
......
#ifndef CK_THREADWISE_TENSOR_SLICE_TRANSFER_AVX2_HPP #ifndef CK_THREADWISE_TENSOR_SLICE_TRANSFER_AVX2_HPP
#define CK_THREADWISE_TENSOR_SLICE_TRANSFER_AVX2_HPP #define CK_THREADWISE_TENSOR_SLICE_TRANSFER_AVX2_HPP
#include "common_header.hpp" #include "ck/utility/common_header.hpp"
#include "data_type_cpu.hpp" #include "ck/utility/data_type_cpu.hpp"
#include "../../gpu/thread/threadwise_tensor_slice_transfer.hpp" #include "ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer.hpp"
#include "tensor_descriptor.hpp" #include "ck/tensor_description/tensor_descriptor.hpp"
#include "tensor_descriptor_helper.hpp" #include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "tensor_space_filling_curve.hpp" #include "ck/tensor_description/tensor_space_filling_curve.hpp"
#include "dynamic_buffer_cpu.hpp" #include "ck/utility/dynamic_buffer_cpu.hpp"
#include <immintrin.h> #include <immintrin.h>
namespace ck { namespace ck {
......
#ifndef CK_THREADWISE_TENSOR_SLICE_TRANSFER_AVX2_SPECIALIZED_HPP #ifndef CK_THREADWISE_TENSOR_SLICE_TRANSFER_AVX2_SPECIALIZED_HPP
#define CK_THREADWISE_TENSOR_SLICE_TRANSFER_AVX2_SPECIALIZED_HPP #define CK_THREADWISE_TENSOR_SLICE_TRANSFER_AVX2_SPECIALIZED_HPP
#include "common_header.hpp" #include "ck/utility/common_header.hpp"
#include "data_type_cpu.hpp" #include "ck/utility/data_type_cpu.hpp"
#include "../../gpu/thread/threadwise_tensor_slice_transfer.hpp" #include "ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer.hpp"
#include "tensor_descriptor.hpp" #include "ck/tensor_description/tensor_descriptor.hpp"
#include "tensor_descriptor_helper.hpp" #include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "tensor_space_filling_curve.hpp" #include "ck/tensor_description/tensor_space_filling_curve.hpp"
#include "dynamic_buffer_cpu.hpp" #include "ck/utility/dynamic_buffer_cpu.hpp"
#include "element_wise_operation_cpu.hpp" #include "ck/tensor_operation/cpu/element/element_wise_operation_cpu.hpp"
#include "convolution_forward_specialization_cpu.hpp" #include "ck/tensor_operation/cpu/device/convolution_forward_specialization_cpu.hpp"
#include <immintrin.h> #include <immintrin.h>
namespace ck { namespace ck {
......
...@@ -2,37 +2,6 @@ ...@@ -2,37 +2,6 @@
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once #pragma once
#include "config.hpp"
#include "array.hpp"
#include "container_helper.hpp"
#include "statically_indexed_array.hpp"
#include "container_element_picker.hpp"
#include "multi_index.hpp"
#include "data_type.hpp"
#include "data_type_enum.hpp"
#include "data_type_enum_helper.hpp"
#include "functional.hpp"
#include "functional2.hpp"
#include "functional3.hpp"
#include "functional4.hpp"
#include "enable_if.hpp"
#include "ignore.hpp"
#include "integral_constant.hpp"
#include "math.hpp"
#include "number.hpp"
#include "sequence.hpp"
#include "sequence_helper.hpp"
#include "tuple.hpp"
#include "tuple_helper.hpp"
#include "type.hpp"
#include "magic_division.hpp"
#include "c_style_pointer_cast.hpp"
#include "is_known_at_compile_time.hpp"
#include "transpose_vectors.hpp"
#include "inner_product.hpp"
// #include "element_wise_operation.hpp"
#include "thread_group.hpp"
#include "debug.hpp"
#include "ck/ck.hpp" #include "ck/ck.hpp"
#include "ck/utility/array.hpp" #include "ck/utility/array.hpp"
......
...@@ -4,9 +4,6 @@ ...@@ -4,9 +4,6 @@
#pragma once #pragma once
#include "ck/utility/statically_indexed_array.hpp" #include "ck/utility/statically_indexed_array.hpp"
#ifdef CK_NOGPU
#include "half.hpp"
#endif
namespace ck { namespace ck {
......
#pragma once #pragma once
#include <immintrin.h> #include <immintrin.h>
#include "half.hpp"
namespace ck { namespace ck {
......
#ifndef CK_BUFFER_CPU_HPP #ifndef CK_BUFFER_CPU_HPP
#define CK_BUFFER_CPU_HPP #define CK_BUFFER_CPU_HPP
#include "config.hpp" #include "ck/ck.hpp"
#include "enable_if.hpp" #include "enable_if.hpp"
#include "data_type_cpu.hpp" #include "data_type_cpu.hpp"
......
...@@ -9,6 +9,10 @@ ...@@ -9,6 +9,10 @@
#include "type.hpp" #include "type.hpp"
#include "enable_if.hpp" #include "enable_if.hpp"
#ifndef CK_NOCPU
#include <math.h>
#endif
namespace ck { namespace ck {
namespace math { namespace math {
...@@ -144,6 +148,7 @@ __host__ __device__ constexpr auto min(X x, Ys... ys) ...@@ -144,6 +148,7 @@ __host__ __device__ constexpr auto min(X x, Ys... ys)
return min(x, min(ys...)); return min(x, min(ys...));
} }
#ifndef CK_NOGPU
// disallow implicit type casting // disallow implicit type casting
template <typename T> template <typename T>
__device__ T exp(T x); __device__ T exp(T x);
...@@ -161,6 +166,7 @@ __device__ double exp<double>(double x) ...@@ -161,6 +166,7 @@ __device__ double exp<double>(double x)
{ {
return exp(x); return exp(x);
} }
#endif
// greatest common divisor, aka highest common factor // greatest common divisor, aka highest common factor
__host__ __device__ constexpr index_t gcd(index_t x, index_t y) __host__ __device__ constexpr index_t gcd(index_t x, index_t y)
......
if(NOT CK_NOGPU)
add_subdirectory(src/tensor_operation_instance/gpu) add_subdirectory(src/tensor_operation_instance/gpu)
endif()
add_subdirectory(src/host_tensor) add_subdirectory(src/host_tensor)
add_subdirectory(src/utility) add_subdirectory(src/utility)
add_subdirectory(src/tensor_operation_instance/cpu) add_subdirectory(src/tensor_operation_instance/cpu)
...@@ -3,6 +3,8 @@ ...@@ -3,6 +3,8 @@
#pragma once #pragma once
#include <cstddef>
#ifndef CK_NOGPU
#include <hip/hip_runtime.h> #include <hip/hip_runtime.h>
template <typename T> template <typename T>
...@@ -38,3 +40,20 @@ struct DeviceMem ...@@ -38,3 +40,20 @@ struct DeviceMem
void* mpDeviceBuf; void* mpDeviceBuf;
std::size_t mMemSize; std::size_t mMemSize;
}; };
#endif
struct DeviceAlignedMemCPU
{
DeviceAlignedMemCPU() = delete;
DeviceAlignedMemCPU(std::size_t mem_size, std::size_t alignment);
void* GetDeviceBuffer();
std::size_t GetBufferSize();
void ToDevice(const void* p);
void FromDevice(void* p);
void SetZero();
~DeviceAlignedMemCPU();
void* mpDeviceBuf;
std::size_t mMemSize;
std::size_t mAlignment;
};
...@@ -17,6 +17,7 @@ target_include_directories(host_tensor PUBLIC ...@@ -17,6 +17,7 @@ target_include_directories(host_tensor PUBLIC
"$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/library/host_tensor>" "$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/library/host_tensor>"
) )
if(NOT CK_NOGPU)
rocm_install( rocm_install(
TARGETS host_tensor TARGETS host_tensor
EXPORT host_tensorTargets EXPORT host_tensorTargets
...@@ -28,5 +29,19 @@ rocm_install( ...@@ -28,5 +29,19 @@ rocm_install(
NAMESPACE composable_kernel:: NAMESPACE composable_kernel::
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/composable_kernel DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/composable_kernel
) )
else()
install(TARGETS host_tensor
EXPORT host_tensorTargets
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
)
install(EXPORT host_tensorTargets
FILE composable_kernelhost_tensorTargets.cmake
NAMESPACE composable_kernel::
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/composable_kernel
)
endif()
clang_tidy_check(host_tensor) clang_tidy_check(host_tensor)
...@@ -4,6 +4,11 @@ ...@@ -4,6 +4,11 @@
#include "ck/device_utility/hip_check_error.hpp" #include "ck/device_utility/hip_check_error.hpp"
#include "ck/library/host_tensor/device_memory.hpp" #include "ck/library/host_tensor/device_memory.hpp"
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#ifndef CK_NOGPU
DeviceMem::DeviceMem(std::size_t mem_size) : mMemSize(mem_size) DeviceMem::DeviceMem(std::size_t mem_size) : mMemSize(mem_size)
{ {
hip_check_error(hipMalloc(static_cast<void**>(&mpDeviceBuf), mMemSize)); hip_check_error(hipMalloc(static_cast<void**>(&mpDeviceBuf), mMemSize));
...@@ -26,3 +31,38 @@ void DeviceMem::FromDevice(void* p) ...@@ -26,3 +31,38 @@ void DeviceMem::FromDevice(void* p)
void DeviceMem::SetZero() { hip_check_error(hipMemset(mpDeviceBuf, 0, mMemSize)); } void DeviceMem::SetZero() { hip_check_error(hipMemset(mpDeviceBuf, 0, mMemSize)); }
DeviceMem::~DeviceMem() { hip_check_error(hipFree(mpDeviceBuf)); } DeviceMem::~DeviceMem() { hip_check_error(hipFree(mpDeviceBuf)); }
#endif
DeviceAlignedMemCPU::DeviceAlignedMemCPU(std::size_t mem_size, std::size_t alignment)
: mMemSize(mem_size), mAlignment(alignment)
{
if(mem_size == 0)
{
mpDeviceBuf = nullptr;
}
else
{
assert(!(alignment == 0 || (alignment & (alignment - 1)))); // check pow of 2
// TODO: posix only
int rtn = posix_memalign(&mpDeviceBuf, alignment, mem_size);
assert(rtn == 0);
}
}
void* DeviceAlignedMemCPU::GetDeviceBuffer() { return mpDeviceBuf; }
std::size_t DeviceAlignedMemCPU::GetBufferSize() { return mMemSize; }
void DeviceAlignedMemCPU::ToDevice(const void* p) { memcpy(mpDeviceBuf, p, mMemSize); }
void DeviceAlignedMemCPU::FromDevice(void* p) { memcpy(p, mpDeviceBuf, mMemSize); }
void DeviceAlignedMemCPU::SetZero() { memset(mpDeviceBuf, 0, mMemSize); }
DeviceAlignedMemCPU::~DeviceAlignedMemCPU()
{
if(mpDeviceBuf != nullptr)
free(mpDeviceBuf);
}
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <cassert> #include <cassert>
#include "ck/ck.hpp"
#include "ck/library/host_tensor/host_tensor.hpp" #include "ck/library/host_tensor/host_tensor.hpp"
void HostTensorDescriptor::CalculateStrides() void HostTensorDescriptor::CalculateStrides()
......
#include <stdlib.h> #include <stdlib.h>
#include <utility> #include <utility>
#include "config.hpp" #include <memory>
#include "convolution_forward_specialization_cpu.hpp" #include "ck/ck.hpp"
#include "device_convnd_direct_fwd_avx2_nhwc_kyxck8_nhwk.hpp" #include "ck/tensor_operation/cpu/device/convolution_forward_specialization_cpu.hpp"
#include "element_wise_operation_cpu.hpp" #include "ck/tensor_operation/cpu/device/device_convnd_direct_fwd_avx2_nhwc_kyxck8_nhwk.hpp"
#include "device_operation_instance.hpp" #include "ck/tensor_operation/cpu/element/element_wise_operation_cpu.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
namespace ck { namespace ck {
namespace tensor_operation { namespace tensor_operation {
...@@ -58,7 +59,7 @@ static constexpr auto LoopOver_MKN = ck::tensor_operation::cpu::device::LoopOver ...@@ -58,7 +59,7 @@ static constexpr auto LoopOver_MKN = ck::tensor_operation::cpu::device::LoopOver
void add_device_conv2d_direct_fwd_avx2_nhwc_kyxck8_nhwk( void add_device_conv2d_direct_fwd_avx2_nhwc_kyxck8_nhwk(
std::vector<DeviceConvFwdPtr<PT, PT, PT>>& instances) std::vector<DeviceConvFwdPtr<PT, PT, PT>>& instances)
{ {
ck::tensor_operation::device::add_device_operation_instances( ck::tensor_operation::device::instance::add_device_operation_instances(
instances, instances,
std::make_tuple( std::make_tuple(
// clang-format off // clang-format off
......
#include <stdlib.h> #include <stdlib.h>
#include <utility> #include <utility>
#include "convolution_forward_specialization_cpu.hpp" #include <memory>
#include "config.hpp" #include "ck/ck.hpp"
#include "device_convnd_fwd_avx2_nhwc_kyxc_nhwk.hpp" #include "ck/tensor_operation/cpu/device/convolution_forward_specialization_cpu.hpp"
#include "element_wise_operation_cpu.hpp" #include "ck/tensor_operation/cpu/device/device_convnd_fwd_avx2_nhwc_kyxc_nhwk.hpp"
#include "device_operation_instance.hpp" #include "ck/tensor_operation/cpu/element/element_wise_operation_cpu.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
namespace ck { namespace ck {
namespace tensor_operation { namespace tensor_operation {
...@@ -64,7 +65,7 @@ static constexpr auto LoopOver_MKN = ck::tensor_operation::cpu::device::LoopOver ...@@ -64,7 +65,7 @@ static constexpr auto LoopOver_MKN = ck::tensor_operation::cpu::device::LoopOver
void add_device_conv2d_fwd_avx2_nhwc_kyxc_nhwk(std::vector<DeviceConvFwdPtr<PT, PT, PT>>& instances) void add_device_conv2d_fwd_avx2_nhwc_kyxc_nhwk(std::vector<DeviceConvFwdPtr<PT, PT, PT>>& instances)
{ {
ck::tensor_operation::device::add_device_operation_instances( ck::tensor_operation::device::instance::add_device_operation_instances(
instances, instances,
std::make_tuple( std::make_tuple(
// clang-format off // clang-format off
...@@ -85,7 +86,7 @@ void add_device_conv2d_fwd_avx2_nhwc_kyxc_nhwk(std::vector<DeviceConvFwdPtr<PT, ...@@ -85,7 +86,7 @@ void add_device_conv2d_fwd_avx2_nhwc_kyxc_nhwk(std::vector<DeviceConvFwdPtr<PT,
void add_device_conv2d_fwd_avx2_nhwc_kyxc_nhwk_local_c( void add_device_conv2d_fwd_avx2_nhwc_kyxc_nhwk_local_c(
std::vector<DeviceConvFwdPtr<PT, PT, PT>>& instances) std::vector<DeviceConvFwdPtr<PT, PT, PT>>& instances)
{ {
ck::tensor_operation::device::add_device_operation_instances( ck::tensor_operation::device::instance::add_device_operation_instances(
instances, instances,
std::make_tuple( std::make_tuple(
// clang-format off // clang-format off
...@@ -106,7 +107,7 @@ void add_device_conv2d_fwd_avx2_nhwc_kyxc_nhwk_local_c( ...@@ -106,7 +107,7 @@ void add_device_conv2d_fwd_avx2_nhwc_kyxc_nhwk_local_c(
void add_device_conv2d_fwd_avx2_nhwc_kyxc_nhwk_mt( void add_device_conv2d_fwd_avx2_nhwc_kyxc_nhwk_mt(
std::vector<DeviceConvFwdPtr<PT, PT, PT>>& instances) std::vector<DeviceConvFwdPtr<PT, PT, PT>>& instances)
{ {
ck::tensor_operation::device::add_device_operation_instances( ck::tensor_operation::device::instance::add_device_operation_instances(
instances, instances,
std::make_tuple( std::make_tuple(
// clang-format off // clang-format off
...@@ -143,7 +144,7 @@ void add_device_conv2d_fwd_avx2_nhwc_kyxc_nhwk_mt( ...@@ -143,7 +144,7 @@ void add_device_conv2d_fwd_avx2_nhwc_kyxc_nhwk_mt(
void add_device_conv2d_fwd_avx2_nhwc_kyxc_nhwk_relu( void add_device_conv2d_fwd_avx2_nhwc_kyxc_nhwk_relu(
std::vector<DeviceConvFwdPtr<PT, PT, Relu>>& instances) std::vector<DeviceConvFwdPtr<PT, PT, Relu>>& instances)
{ {
ck::tensor_operation::device::add_device_operation_instances( ck::tensor_operation::device::instance::add_device_operation_instances(
instances, instances,
std::make_tuple( std::make_tuple(
// clang-format off // clang-format off
...@@ -164,7 +165,7 @@ void add_device_conv2d_fwd_avx2_nhwc_kyxc_nhwk_relu( ...@@ -164,7 +165,7 @@ void add_device_conv2d_fwd_avx2_nhwc_kyxc_nhwk_relu(
void add_device_conv2d_fwd_avx2_nhwc_kyxc_nhwk_local_c_relu( void add_device_conv2d_fwd_avx2_nhwc_kyxc_nhwk_local_c_relu(
std::vector<DeviceConvFwdPtr<PT, PT, Relu>>& instances) std::vector<DeviceConvFwdPtr<PT, PT, Relu>>& instances)
{ {
ck::tensor_operation::device::add_device_operation_instances( ck::tensor_operation::device::instance::add_device_operation_instances(
instances, instances,
std::make_tuple( std::make_tuple(
// clang-format off // clang-format off
...@@ -185,7 +186,7 @@ void add_device_conv2d_fwd_avx2_nhwc_kyxc_nhwk_local_c_relu( ...@@ -185,7 +186,7 @@ void add_device_conv2d_fwd_avx2_nhwc_kyxc_nhwk_local_c_relu(
void add_device_conv2d_fwd_avx2_nhwc_kyxc_nhwk_mt_relu( void add_device_conv2d_fwd_avx2_nhwc_kyxc_nhwk_mt_relu(
std::vector<DeviceConvFwdPtr<PT, PT, Relu>>& instances) std::vector<DeviceConvFwdPtr<PT, PT, Relu>>& instances)
{ {
ck::tensor_operation::device::add_device_operation_instances( ck::tensor_operation::device::instance::add_device_operation_instances(
instances, instances,
std::make_tuple( std::make_tuple(
// clang-format off // clang-format off
......
#include <stdlib.h> #include <stdlib.h>
#include <utility> #include <utility>
#include "config.hpp" #include <memory>
#include "convolution_forward_specialization_cpu.hpp" #include "ck/ck.hpp"
#include "device_convnd_fwd_avx2_nhwc_kyxck8_nhwk.hpp" #include "ck/tensor_operation/cpu/device/convolution_forward_specialization_cpu.hpp"
#include "element_wise_operation_cpu.hpp" #include "ck/tensor_operation/cpu/device/device_convnd_fwd_avx2_nhwc_kyxck8_nhwk.hpp"
#include "device_operation_instance.hpp" #include "ck/tensor_operation/cpu/element/element_wise_operation_cpu.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
namespace ck { namespace ck {
namespace tensor_operation { namespace tensor_operation {
...@@ -58,7 +59,7 @@ static constexpr auto LoopOver_MKN = ck::tensor_operation::cpu::device::LoopOver ...@@ -58,7 +59,7 @@ static constexpr auto LoopOver_MKN = ck::tensor_operation::cpu::device::LoopOver
void add_device_conv2d_fwd_avx2_nhwc_kyxck8_nhwk( void add_device_conv2d_fwd_avx2_nhwc_kyxck8_nhwk(
std::vector<DeviceConvFwdPtr<PT, PT, PT>>& instances) std::vector<DeviceConvFwdPtr<PT, PT, PT>>& instances)
{ {
ck::tensor_operation::device::add_device_operation_instances( ck::tensor_operation::device::instance::add_device_operation_instances(
instances, instances,
std::make_tuple( std::make_tuple(
// clang-format off // clang-format off
...@@ -79,7 +80,7 @@ void add_device_conv2d_fwd_avx2_nhwc_kyxck8_nhwk( ...@@ -79,7 +80,7 @@ void add_device_conv2d_fwd_avx2_nhwc_kyxck8_nhwk(
void add_device_conv2d_fwd_avx2_nhwc_kyxck8_nhwk_local_c( void add_device_conv2d_fwd_avx2_nhwc_kyxck8_nhwk_local_c(
std::vector<DeviceConvFwdPtr<PT, PT, PT>>& instances) std::vector<DeviceConvFwdPtr<PT, PT, PT>>& instances)
{ {
ck::tensor_operation::device::add_device_operation_instances( ck::tensor_operation::device::instance::add_device_operation_instances(
instances, instances,
std::make_tuple( std::make_tuple(
// clang-format off // clang-format off
...@@ -100,7 +101,7 @@ void add_device_conv2d_fwd_avx2_nhwc_kyxck8_nhwk_local_c( ...@@ -100,7 +101,7 @@ void add_device_conv2d_fwd_avx2_nhwc_kyxck8_nhwk_local_c(
void add_device_conv2d_fwd_avx2_nhwc_kyxck8_nhwk_mt( void add_device_conv2d_fwd_avx2_nhwc_kyxck8_nhwk_mt(
std::vector<DeviceConvFwdPtr<PT, PT, PT>>& instances) std::vector<DeviceConvFwdPtr<PT, PT, PT>>& instances)
{ {
ck::tensor_operation::device::add_device_operation_instances( ck::tensor_operation::device::instance::add_device_operation_instances(
instances, instances,
std::make_tuple( std::make_tuple(
// clang-format off // clang-format off
...@@ -137,7 +138,7 @@ void add_device_conv2d_fwd_avx2_nhwc_kyxck8_nhwk_mt( ...@@ -137,7 +138,7 @@ void add_device_conv2d_fwd_avx2_nhwc_kyxck8_nhwk_mt(
void add_device_conv2d_fwd_avx2_nhwc_kyxck8_nhwk_relu( void add_device_conv2d_fwd_avx2_nhwc_kyxck8_nhwk_relu(
std::vector<DeviceConvFwdPtr<PT, PT, Relu>>& instances) std::vector<DeviceConvFwdPtr<PT, PT, Relu>>& instances)
{ {
ck::tensor_operation::device::add_device_operation_instances( ck::tensor_operation::device::instance::add_device_operation_instances(
instances, instances,
std::make_tuple( std::make_tuple(
// clang-format off // clang-format off
...@@ -158,7 +159,7 @@ void add_device_conv2d_fwd_avx2_nhwc_kyxck8_nhwk_relu( ...@@ -158,7 +159,7 @@ void add_device_conv2d_fwd_avx2_nhwc_kyxck8_nhwk_relu(
void add_device_conv2d_fwd_avx2_nhwc_kyxck8_nhwk_local_c_relu( void add_device_conv2d_fwd_avx2_nhwc_kyxck8_nhwk_local_c_relu(
std::vector<DeviceConvFwdPtr<PT, PT, Relu>>& instances) std::vector<DeviceConvFwdPtr<PT, PT, Relu>>& instances)
{ {
ck::tensor_operation::device::add_device_operation_instances( ck::tensor_operation::device::instance::add_device_operation_instances(
instances, instances,
std::make_tuple( std::make_tuple(
// clang-format off // clang-format off
...@@ -179,7 +180,7 @@ void add_device_conv2d_fwd_avx2_nhwc_kyxck8_nhwk_local_c_relu( ...@@ -179,7 +180,7 @@ void add_device_conv2d_fwd_avx2_nhwc_kyxck8_nhwk_local_c_relu(
void add_device_conv2d_fwd_avx2_nhwc_kyxck8_nhwk_mt_relu( void add_device_conv2d_fwd_avx2_nhwc_kyxck8_nhwk_mt_relu(
std::vector<DeviceConvFwdPtr<PT, PT, Relu>>& instances) std::vector<DeviceConvFwdPtr<PT, PT, Relu>>& instances)
{ {
ck::tensor_operation::device::add_device_operation_instances( ck::tensor_operation::device::instance::add_device_operation_instances(
instances, instances,
std::make_tuple( std::make_tuple(
// clang-format off // clang-format off
......
#include <stdlib.h> #include <stdlib.h>
#include <utility> #include <utility>
#include "config.hpp" #include <memory>
#include "convolution_forward_specialization_cpu.hpp" #include "ck/ck.hpp"
#include "device_convnd_fwd_avx2_nhwc_yxck_nhwk.hpp" #include "ck/tensor_operation/cpu/device/convolution_forward_specialization_cpu.hpp"
#include "element_wise_operation_cpu.hpp" #include "ck/tensor_operation/cpu/device/device_convnd_fwd_avx2_nhwc_yxck_nhwk.hpp"
#include "device_operation_instance.hpp" #include "ck/tensor_operation/cpu/element/element_wise_operation_cpu.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
namespace ck { namespace ck {
namespace tensor_operation { namespace tensor_operation {
...@@ -56,7 +57,7 @@ static constexpr auto LoopOver_MKN = ck::tensor_operation::cpu::device::LoopOver ...@@ -56,7 +57,7 @@ static constexpr auto LoopOver_MKN = ck::tensor_operation::cpu::device::LoopOver
void add_device_conv2d_fwd_avx2_nhwc_yxck_nhwk(std::vector<DeviceConvFwdPtr<PT, PT, PT>>& instances) void add_device_conv2d_fwd_avx2_nhwc_yxck_nhwk(std::vector<DeviceConvFwdPtr<PT, PT, PT>>& instances)
{ {
ck::tensor_operation::device::add_device_operation_instances( ck::tensor_operation::device::instance::add_device_operation_instances(
instances, instances,
std::make_tuple( std::make_tuple(
// clang-format off // clang-format off
...@@ -77,7 +78,7 @@ void add_device_conv2d_fwd_avx2_nhwc_yxck_nhwk(std::vector<DeviceConvFwdPtr<PT, ...@@ -77,7 +78,7 @@ void add_device_conv2d_fwd_avx2_nhwc_yxck_nhwk(std::vector<DeviceConvFwdPtr<PT,
void add_device_conv2d_fwd_avx2_nhwc_yxck_nhwk_local_c( void add_device_conv2d_fwd_avx2_nhwc_yxck_nhwk_local_c(
std::vector<DeviceConvFwdPtr<PT, PT, PT>>& instances) std::vector<DeviceConvFwdPtr<PT, PT, PT>>& instances)
{ {
ck::tensor_operation::device::add_device_operation_instances( ck::tensor_operation::device::instance::add_device_operation_instances(
instances, instances,
std::make_tuple( std::make_tuple(
// clang-format off // clang-format off
...@@ -98,7 +99,7 @@ void add_device_conv2d_fwd_avx2_nhwc_yxck_nhwk_local_c( ...@@ -98,7 +99,7 @@ void add_device_conv2d_fwd_avx2_nhwc_yxck_nhwk_local_c(
void add_device_conv2d_fwd_avx2_nhwc_yxck_nhwk_mt( void add_device_conv2d_fwd_avx2_nhwc_yxck_nhwk_mt(
std::vector<DeviceConvFwdPtr<PT, PT, PT>>& instances) std::vector<DeviceConvFwdPtr<PT, PT, PT>>& instances)
{ {
ck::tensor_operation::device::add_device_operation_instances( ck::tensor_operation::device::instance::add_device_operation_instances(
instances, instances,
std::make_tuple( std::make_tuple(
// clang-format off // clang-format off
...@@ -136,7 +137,7 @@ void add_device_conv2d_fwd_avx2_nhwc_yxck_nhwk_mt( ...@@ -136,7 +137,7 @@ void add_device_conv2d_fwd_avx2_nhwc_yxck_nhwk_mt(
void add_device_conv2d_fwd_avx2_nhwc_yxck_nhwk_relu( void add_device_conv2d_fwd_avx2_nhwc_yxck_nhwk_relu(
std::vector<DeviceConvFwdPtr<PT, PT, Relu>>& instances) std::vector<DeviceConvFwdPtr<PT, PT, Relu>>& instances)
{ {
ck::tensor_operation::device::add_device_operation_instances( ck::tensor_operation::device::instance::add_device_operation_instances(
instances, instances,
std::make_tuple( std::make_tuple(
// clang-format off // clang-format off
...@@ -157,7 +158,7 @@ void add_device_conv2d_fwd_avx2_nhwc_yxck_nhwk_relu( ...@@ -157,7 +158,7 @@ void add_device_conv2d_fwd_avx2_nhwc_yxck_nhwk_relu(
void add_device_conv2d_fwd_avx2_nhwc_yxck_nhwk_local_c_relu( void add_device_conv2d_fwd_avx2_nhwc_yxck_nhwk_local_c_relu(
std::vector<DeviceConvFwdPtr<PT, PT, Relu>>& instances) std::vector<DeviceConvFwdPtr<PT, PT, Relu>>& instances)
{ {
ck::tensor_operation::device::add_device_operation_instances( ck::tensor_operation::device::instance::add_device_operation_instances(
instances, instances,
std::make_tuple( std::make_tuple(
// clang-format off // clang-format off
...@@ -178,7 +179,7 @@ void add_device_conv2d_fwd_avx2_nhwc_yxck_nhwk_local_c_relu( ...@@ -178,7 +179,7 @@ void add_device_conv2d_fwd_avx2_nhwc_yxck_nhwk_local_c_relu(
void add_device_conv2d_fwd_avx2_nhwc_yxck_nhwk_mt_relu( void add_device_conv2d_fwd_avx2_nhwc_yxck_nhwk_mt_relu(
std::vector<DeviceConvFwdPtr<PT, PT, Relu>>& instances) std::vector<DeviceConvFwdPtr<PT, PT, Relu>>& instances)
{ {
ck::tensor_operation::device::add_device_operation_instances( ck::tensor_operation::device::instance::add_device_operation_instances(
instances, instances,
std::make_tuple( std::make_tuple(
// clang-format off // clang-format off
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment