Commit 4b448373 authored by carlushuang's avatar carlushuang
Browse files

fix bug on merge latest develop

parent b79df771
#include <stdlib.h>
#include <utility>
#include "convolution_forward_specialization_cpu.hpp"
#include "config.hpp"
#include "device_convnd_fwd_bias_activation_add_avx2_nhwc_kyxc_nhwk.hpp"
#include "element_wise_operation_cpu.hpp"
#include "device_operation_instance.hpp"
#include <memory>
#include "ck/ck.hpp"
#include "ck/tensor_operation/cpu/device/convolution_forward_specialization_cpu.hpp"
#include "ck/tensor_operation/cpu/device/device_convnd_fwd_bias_activation_add_avx2_nhwc_kyxc_nhwk.hpp"
#include "ck/tensor_operation/cpu/element/element_wise_operation_cpu.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
namespace ck {
namespace tensor_operation {
......@@ -60,7 +61,7 @@ static constexpr auto LoopOver_MKN = ck::tensor_operation::cpu::device::LoopOver
void add_device_conv2d_fwd_bias_relu_add_avx2_nhwc_kyxc_nhwk(
std::vector<DeviceConvFwdBiasActivationAddPtr<PT, PT, AddReluAdd>>& instances)
{
ck::tensor_operation::device::add_device_operation_instances(
ck::tensor_operation::device::instance::add_device_operation_instances(
instances,
std::make_tuple(
// clang-format off
......@@ -81,7 +82,7 @@ void add_device_conv2d_fwd_bias_relu_add_avx2_nhwc_kyxc_nhwk(
void add_device_conv2d_fwd_bias_relu_add_avx2_nhwc_kyxc_nhwk_local_c(
std::vector<DeviceConvFwdBiasActivationAddPtr<PT, PT, AddReluAdd>>& instances)
{
ck::tensor_operation::device::add_device_operation_instances(
ck::tensor_operation::device::instance::add_device_operation_instances(
instances,
std::make_tuple(
// clang-format off
......@@ -102,7 +103,7 @@ void add_device_conv2d_fwd_bias_relu_add_avx2_nhwc_kyxc_nhwk_local_c(
void add_device_conv2d_fwd_bias_relu_add_avx2_nhwc_kyxc_nhwk_mt(
std::vector<DeviceConvFwdBiasActivationAddPtr<PT, PT, AddReluAdd>>& instances)
{
ck::tensor_operation::device::add_device_operation_instances(
ck::tensor_operation::device::instance::add_device_operation_instances(
instances,
std::make_tuple(
// clang-format off
......@@ -141,7 +142,7 @@ void add_device_conv2d_fwd_bias_relu_add_avx2_nhwc_kyxc_nhwk_mt(
void add_device_conv2d_fwd_bias_relu_avx2_nhwc_kyxc_nhwk(
std::vector<DeviceConvFwdBiasActivationAddPtr<PT, PT, AddRelu>>& instances)
{
ck::tensor_operation::device::add_device_operation_instances(
ck::tensor_operation::device::instance::add_device_operation_instances(
instances,
std::make_tuple(
// clang-format off
......@@ -162,7 +163,7 @@ void add_device_conv2d_fwd_bias_relu_avx2_nhwc_kyxc_nhwk(
void add_device_conv2d_fwd_bias_relu_avx2_nhwc_kyxc_nhwk_local_c(
std::vector<DeviceConvFwdBiasActivationAddPtr<PT, PT, AddRelu>>& instances)
{
ck::tensor_operation::device::add_device_operation_instances(
ck::tensor_operation::device::instance::add_device_operation_instances(
instances,
std::make_tuple(
// clang-format off
......@@ -183,7 +184,7 @@ void add_device_conv2d_fwd_bias_relu_avx2_nhwc_kyxc_nhwk_local_c(
void add_device_conv2d_fwd_bias_relu_avx2_nhwc_kyxc_nhwk_mt(
std::vector<DeviceConvFwdBiasActivationAddPtr<PT, PT, AddRelu>>& instances)
{
ck::tensor_operation::device::add_device_operation_instances(
ck::tensor_operation::device::instance::add_device_operation_instances(
instances,
std::make_tuple(
// clang-format off
......@@ -222,7 +223,7 @@ void add_device_conv2d_fwd_bias_relu_avx2_nhwc_kyxc_nhwk_mt(
void add_device_conv2d_fwd_bias_avx2_nhwc_kyxc_nhwk(
std::vector<DeviceConvFwdBiasActivationAddPtr<PT, PT, Add>>& instances)
{
ck::tensor_operation::device::add_device_operation_instances(
ck::tensor_operation::device::instance::add_device_operation_instances(
instances,
std::make_tuple(
// clang-format off
......@@ -243,7 +244,7 @@ void add_device_conv2d_fwd_bias_avx2_nhwc_kyxc_nhwk(
void add_device_conv2d_fwd_bias_avx2_nhwc_kyxc_nhwk_local_c(
std::vector<DeviceConvFwdBiasActivationAddPtr<PT, PT, Add>>& instances)
{
ck::tensor_operation::device::add_device_operation_instances(
ck::tensor_operation::device::instance::add_device_operation_instances(
instances,
std::make_tuple(
// clang-format off
......@@ -264,7 +265,7 @@ void add_device_conv2d_fwd_bias_avx2_nhwc_kyxc_nhwk_local_c(
void add_device_conv2d_fwd_bias_avx2_nhwc_kyxc_nhwk_mt(
std::vector<DeviceConvFwdBiasActivationAddPtr<PT, PT, Add>>& instances)
{
ck::tensor_operation::device::add_device_operation_instances(
ck::tensor_operation::device::instance::add_device_operation_instances(
instances,
std::make_tuple(
// clang-format off
......
#include <stdlib.h>
#include <utility>
#include "config.hpp"
#include "convolution_forward_specialization_cpu.hpp"
#include "device_convnd_fwd_bias_activation_add_avx2_nhwc_kyxck8_nhwk.hpp"
#include "element_wise_operation_cpu.hpp"
#include "device_operation_instance.hpp"
#include <memory>
#include "ck/ck.hpp"
#include "ck/tensor_operation/cpu/device/convolution_forward_specialization_cpu.hpp"
#include "ck/tensor_operation/cpu/device/device_convnd_fwd_bias_activation_add_avx2_nhwc_kyxck8_nhwk.hpp"
#include "ck/tensor_operation/cpu/element/element_wise_operation_cpu.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
namespace ck {
namespace tensor_operation {
......@@ -60,7 +61,7 @@ static constexpr auto LoopOver_MKN = ck::tensor_operation::cpu::device::LoopOver
void add_device_conv2d_fwd_bias_relu_add_avx2_nhwc_kyxck8_nhwk(
std::vector<DeviceConvFwdBiasActivationAddPtr<PT, PT, AddReluAdd>>& instances)
{
ck::tensor_operation::device::add_device_operation_instances(
ck::tensor_operation::device::instance::add_device_operation_instances(
instances,
std::make_tuple(
// clang-format off
......@@ -81,7 +82,7 @@ void add_device_conv2d_fwd_bias_relu_add_avx2_nhwc_kyxck8_nhwk(
void add_device_conv2d_fwd_bias_relu_add_avx2_nhwc_kyxck8_nhwk_local_c(
std::vector<DeviceConvFwdBiasActivationAddPtr<PT, PT, AddReluAdd>>& instances)
{
ck::tensor_operation::device::add_device_operation_instances(
ck::tensor_operation::device::instance::add_device_operation_instances(
instances,
std::make_tuple(
// clang-format off
......@@ -102,7 +103,7 @@ void add_device_conv2d_fwd_bias_relu_add_avx2_nhwc_kyxck8_nhwk_local_c(
void add_device_conv2d_fwd_bias_relu_add_avx2_nhwc_kyxck8_nhwk_mt(
std::vector<DeviceConvFwdBiasActivationAddPtr<PT, PT, AddReluAdd>>& instances)
{
ck::tensor_operation::device::add_device_operation_instances(
ck::tensor_operation::device::instance::add_device_operation_instances(
instances,
std::make_tuple(
// clang-format off
......@@ -141,7 +142,7 @@ void add_device_conv2d_fwd_bias_relu_add_avx2_nhwc_kyxck8_nhwk_mt(
void add_device_conv2d_fwd_bias_relu_avx2_nhwc_kyxck8_nhwk(
std::vector<DeviceConvFwdBiasActivationAddPtr<PT, PT, AddRelu>>& instances)
{
ck::tensor_operation::device::add_device_operation_instances(
ck::tensor_operation::device::instance::add_device_operation_instances(
instances,
std::make_tuple(
// clang-format off
......@@ -162,7 +163,7 @@ void add_device_conv2d_fwd_bias_relu_avx2_nhwc_kyxck8_nhwk(
void add_device_conv2d_fwd_bias_relu_avx2_nhwc_kyxck8_nhwk_local_c(
std::vector<DeviceConvFwdBiasActivationAddPtr<PT, PT, AddRelu>>& instances)
{
ck::tensor_operation::device::add_device_operation_instances(
ck::tensor_operation::device::instance::add_device_operation_instances(
instances,
std::make_tuple(
// clang-format off
......@@ -183,7 +184,7 @@ void add_device_conv2d_fwd_bias_relu_avx2_nhwc_kyxck8_nhwk_local_c(
void add_device_conv2d_fwd_bias_relu_avx2_nhwc_kyxck8_nhwk_mt(
std::vector<DeviceConvFwdBiasActivationAddPtr<PT, PT, AddRelu>>& instances)
{
ck::tensor_operation::device::add_device_operation_instances(
ck::tensor_operation::device::instance::add_device_operation_instances(
instances,
std::make_tuple(
// clang-format off
......@@ -222,7 +223,7 @@ void add_device_conv2d_fwd_bias_relu_avx2_nhwc_kyxck8_nhwk_mt(
void add_device_conv2d_fwd_bias_avx2_nhwc_kyxck8_nhwk(
std::vector<DeviceConvFwdBiasActivationAddPtr<PT, PT, Add>>& instances)
{
ck::tensor_operation::device::add_device_operation_instances(
ck::tensor_operation::device::instance::add_device_operation_instances(
instances,
std::make_tuple(
// clang-format off
......@@ -243,7 +244,7 @@ void add_device_conv2d_fwd_bias_avx2_nhwc_kyxck8_nhwk(
void add_device_conv2d_fwd_bias_avx2_nhwc_kyxck8_nhwk_local_c(
std::vector<DeviceConvFwdBiasActivationAddPtr<PT, PT, Add>>& instances)
{
ck::tensor_operation::device::add_device_operation_instances(
ck::tensor_operation::device::instance::add_device_operation_instances(
instances,
std::make_tuple(
// clang-format off
......@@ -264,7 +265,7 @@ void add_device_conv2d_fwd_bias_avx2_nhwc_kyxck8_nhwk_local_c(
void add_device_conv2d_fwd_bias_avx2_nhwc_kyxck8_nhwk_mt(
std::vector<DeviceConvFwdBiasActivationAddPtr<PT, PT, Add>>& instances)
{
ck::tensor_operation::device::add_device_operation_instances(
ck::tensor_operation::device::instance::add_device_operation_instances(
instances,
std::make_tuple(
// clang-format off
......
#include <stdlib.h>
#include <utility>
#include "config.hpp"
#include "convolution_forward_specialization_cpu.hpp"
#include "device_convnd_fwd_bias_activation_add_avx2_nhwc_yxck_nhwk.hpp"
#include "element_wise_operation_cpu.hpp"
#include "device_operation_instance.hpp"
#include <memory>
#include "ck/ck.hpp"
#include "ck/tensor_operation/cpu/device/convolution_forward_specialization_cpu.hpp"
#include "ck/tensor_operation/cpu/device/device_convnd_fwd_bias_activation_add_avx2_nhwc_yxck_nhwk.hpp"
#include "ck/tensor_operation/cpu/element/element_wise_operation_cpu.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
namespace ck {
namespace tensor_operation {
......@@ -59,7 +60,7 @@ static constexpr auto LoopOver_MKN = ck::tensor_operation::cpu::device::LoopOver
void add_device_conv2d_fwd_bias_relu_add_avx2_nhwc_yxck_nhwk(
std::vector<DeviceConvFwdBiasActivationAddPtr<PT, PT, AddReluAdd>>& instances)
{
ck::tensor_operation::device::add_device_operation_instances(
ck::tensor_operation::device::instance::add_device_operation_instances(
instances,
std::make_tuple(
// clang-format off
......@@ -80,7 +81,7 @@ void add_device_conv2d_fwd_bias_relu_add_avx2_nhwc_yxck_nhwk(
void add_device_conv2d_fwd_bias_relu_add_avx2_nhwc_yxck_nhwk_local_c(
std::vector<DeviceConvFwdBiasActivationAddPtr<PT, PT, AddReluAdd>>& instances)
{
ck::tensor_operation::device::add_device_operation_instances(
ck::tensor_operation::device::instance::add_device_operation_instances(
instances,
std::make_tuple(
// clang-format off
......@@ -101,7 +102,7 @@ void add_device_conv2d_fwd_bias_relu_add_avx2_nhwc_yxck_nhwk_local_c(
void add_device_conv2d_fwd_bias_relu_add_avx2_nhwc_yxck_nhwk_mt(
std::vector<DeviceConvFwdBiasActivationAddPtr<PT, PT, AddReluAdd>>& instances)
{
ck::tensor_operation::device::add_device_operation_instances(
ck::tensor_operation::device::instance::add_device_operation_instances(
instances,
std::make_tuple(
// clang-format off
......@@ -140,7 +141,7 @@ void add_device_conv2d_fwd_bias_relu_add_avx2_nhwc_yxck_nhwk_mt(
void add_device_conv2d_fwd_bias_relu_avx2_nhwc_yxck_nhwk(
std::vector<DeviceConvFwdBiasActivationAddPtr<PT, PT, AddRelu>>& instances)
{
ck::tensor_operation::device::add_device_operation_instances(
ck::tensor_operation::device::instance::add_device_operation_instances(
instances,
std::make_tuple(
// clang-format off
......@@ -161,7 +162,7 @@ void add_device_conv2d_fwd_bias_relu_avx2_nhwc_yxck_nhwk(
void add_device_conv2d_fwd_bias_relu_avx2_nhwc_yxck_nhwk_local_c(
std::vector<DeviceConvFwdBiasActivationAddPtr<PT, PT, AddRelu>>& instances)
{
ck::tensor_operation::device::add_device_operation_instances(
ck::tensor_operation::device::instance::add_device_operation_instances(
instances,
std::make_tuple(
// clang-format off
......@@ -182,7 +183,7 @@ void add_device_conv2d_fwd_bias_relu_avx2_nhwc_yxck_nhwk_local_c(
void add_device_conv2d_fwd_bias_relu_avx2_nhwc_yxck_nhwk_mt(
std::vector<DeviceConvFwdBiasActivationAddPtr<PT, PT, AddRelu>>& instances)
{
ck::tensor_operation::device::add_device_operation_instances(
ck::tensor_operation::device::instance::add_device_operation_instances(
instances,
std::make_tuple(
// clang-format off
......@@ -221,7 +222,7 @@ void add_device_conv2d_fwd_bias_relu_avx2_nhwc_yxck_nhwk_mt(
void add_device_conv2d_fwd_bias_avx2_nhwc_yxck_nhwk(
std::vector<DeviceConvFwdBiasActivationAddPtr<PT, PT, Add>>& instances)
{
ck::tensor_operation::device::add_device_operation_instances(
ck::tensor_operation::device::instance::add_device_operation_instances(
instances,
std::make_tuple(
// clang-format off
......@@ -242,7 +243,7 @@ void add_device_conv2d_fwd_bias_avx2_nhwc_yxck_nhwk(
void add_device_conv2d_fwd_bias_avx2_nhwc_yxck_nhwk_local_c(
std::vector<DeviceConvFwdBiasActivationAddPtr<PT, PT, Add>>& instances)
{
ck::tensor_operation::device::add_device_operation_instances(
ck::tensor_operation::device::instance::add_device_operation_instances(
instances,
std::make_tuple(
// clang-format off
......@@ -263,7 +264,7 @@ void add_device_conv2d_fwd_bias_avx2_nhwc_yxck_nhwk_local_c(
void add_device_conv2d_fwd_bias_avx2_nhwc_yxck_nhwk_mt(
std::vector<DeviceConvFwdBiasActivationAddPtr<PT, PT, Add>>& instances)
{
ck::tensor_operation::device::add_device_operation_instances(
ck::tensor_operation::device::instance::add_device_operation_instances(
instances,
std::make_tuple(
// clang-format off
......
......@@ -141,31 +141,8 @@ int main(int argc, char* argv[])
}
else
{
<<<<<<< HEAD
// clang-format off
printf("arg1: tensor operation (gemm: GEMM\n"
" gemm_bias_2d: GEMM+Bias(2D)\n"
" gemm_bias_relu: GEMM+Bias+ReLU\n"
" gemm_bias_relu_add: GEMM+Bias+ReLU+Add\n"
" gemm_reduce: GEMM+Reduce\n"
" grouped_gemm: Grouped GEMM\n"
" conv_fwd: ForwardConvolution\n"
" conv_fwd_bias_relu: ForwardConvolution+Bias+ReLU\n"
" conv_fwd_bias_relu_add: ForwardConvolution+Bias+ReLU+Add\n"
" conv_fwd_bias_relu_atomic_add: ForwardConvolution+Bias+ReLU+AtomicAdd\n"
" conv_fwd_cpu: ForwardConvolution+Bias+ReLU+AtomicAdd\n"
" conv1d_bwd_data: BackwardConvolution data 1 dim\n"
" conv2d_bwd_data: BackwardConvolution data 2 dim\n"
" conv3d_bwd_data: BackwardConvolution data 3 dim\n"
" reduce: Reduce\n"
" conv2d_bwd_weight: Backward Weight Convolution 2d\n");
// clang-format on
}
return 0;
=======
print_helper_message();
return 0;
}
>>>>>>> origin/develop
}
......@@ -13,7 +13,9 @@ function(add_test_executable TEST_NAME)
add_test(NAME ${TEST_NAME} COMMAND $<TARGET_FILE:${TEST_NAME}> )
add_dependencies(tests ${TEST_NAME})
add_dependencies(check ${TEST_NAME})
if(NOT CK_NOGPU)
rocm_install(TARGETS ${TEST_NAME} COMPONENT tests)
endif()
endfunction(add_test_executable TEST_NAME)
include(GoogleTest)
......@@ -27,7 +29,9 @@ function(add_gtest_executable TEST_NAME)
target_compile_options(${TEST_NAME} PRIVATE -Wno-global-constructors -Wno-undef)
target_link_libraries(${TEST_NAME} PRIVATE gtest_main)
gtest_discover_tests(${TEST_NAME})
if(NOT CK_NOGPU)
rocm_install(TARGETS ${TEST_NAME} COMPONENT tests)
endif()
endfunction(add_gtest_executable TEST_NAME)
......@@ -47,9 +51,5 @@ add_subdirectory(conv2d_bwd_weight)
add_subdirectory(convnd_bwd_weight)
add_subdirectory(convnd_bwd_data)
add_subdirectory(block_to_ctile_map)
<<<<<<< HEAD
add_subdirectory(cpu_ukernel)
# DONOT add client_app, that is tested via CI independently
=======
add_subdirectory(softmax)
>>>>>>> origin/develop
add_subdirectory(cpu_ukernel)
......@@ -6,14 +6,16 @@
#include <sstream>
#include <tuple>
#include <memory>
#include <half.hpp>
#include <omp.h>
#include "host_tensor.hpp"
#include "device.hpp"
#include "config.hpp"
#include "print.hpp"
#include "cpuid.hpp"
#include "threadwise_gemm_avx2.hpp"
#include <string.h>
#include <chrono>
#include "ck/ck.hpp"
#include "ck/library/host_tensor/host_tensor.hpp"
#include "ck/device_utility/kernel_launch.hpp"
#include "ck/library/host_tensor/device_memory.hpp"
#include "ck/utility/print.hpp"
#include "ck/utility/cpuid.hpp"
#include "ck/tensor_operation/cpu/thread/threadwise_gemm_avx2.hpp"
#define ITERATE_THREAD_GEMM_AVX2_MXN_6X16_INSTANCE(FA, FB, FC, TA, TB, NT) \
ck::cpu::ThreadwiseGemmAvx2_MxN_6x16<FA, FB, FC, 6, 16, TA, TB, NT>, \
......@@ -294,16 +296,16 @@ void test_ukernel(ukenrel_t uk,
invoke_uk(param, private_c);
}
WallTimer timer;
timer.Start();
auto mStart = std::chrono::high_resolution_clock::now();
for(int i = 0; i < repeat; i++)
{
invoke_uk(param, private_c);
}
timer.End();
auto mStop = std::chrono::high_resolution_clock::now();
us += timer.GetElapsedTime() * 1e3 / repeat;
us += static_cast<float>(
std::chrono::duration_cast<std::chrono::microseconds>(mStop - mStart).count()) / repeat;
memset(private_c, 0, m * n * sizeof(float));
invoke_uk(param, private_c);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment