Unverified Commit 4173b984 authored by Rostyslav Geyyer's avatar Rostyslav Geyyer Committed by GitHub
Browse files

Merge branch 'develop' into lwpck-756

parents 6de7d10d 85e2e1e2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <iomanip>
#include <iostream>
#include <typeinfo>
#include <limits>
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/device_image_to_column.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_image_to_column_impl.hpp"
#include "ck/library/tensor_operation_instance/gpu/image_to_column.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/host_tensor_generator.hpp"
#include "ck/library/utility/convolution_parameter.hpp"
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_image_to_column.hpp"
namespace ck {
namespace profiler {
template <ck::index_t... Is>
using S = ck::Sequence<Is...>;
template <index_t NDimSpatial,
typename InputLayout,
typename InputDataType,
typename OutputDataType>
bool profile_image_to_column_impl(int do_verification,
int init_method,
bool do_log,
bool time_kernel,
const ck::utils::conv::ConvParam& conv_param)
{
const ck::index_t NDoHoWo =
conv_param.N_ *
ck::accumulate_n<ck::index_t>(
conv_param.output_spatial_lengths_.begin(), NDimSpatial, 1, std::multiplies<>());
const ck::index_t CZYX =
conv_param.C_ *
ck::accumulate_n<ck::index_t>(
conv_param.filter_spatial_lengths_.begin(), NDimSpatial, 1, std::multiplies<>());
const auto in_desc =
ck::utils::conv::make_input_host_tensor_descriptor_g_n_c_wis_packed<InputLayout>(
conv_param);
const auto out_desc = HostTensorDescriptor({NDoHoWo, CZYX});
std::array<ck::index_t, NDimSpatial> input_spatial_lengths{};
std::array<ck::index_t, NDimSpatial> filter_spatial_lengths{};
std::array<ck::index_t, NDimSpatial> output_spatial_lengths{};
std::array<ck::index_t, NDimSpatial + 3> input_g_n_c_wis_strides{};
std::array<ck::index_t, 2> output_m_k_strides{};
std::array<ck::index_t, NDimSpatial> conv_filter_strides{};
std::array<ck::index_t, NDimSpatial> conv_filter_dilations{};
std::array<ck::index_t, NDimSpatial> input_left_pads{};
std::array<ck::index_t, NDimSpatial> input_right_pads{};
auto copy = [](const auto& x, auto& y) { std::copy(x.begin(), x.end(), y.begin()); };
copy(conv_param.input_spatial_lengths_, input_spatial_lengths);
copy(conv_param.filter_spatial_lengths_, filter_spatial_lengths);
copy(conv_param.output_spatial_lengths_, output_spatial_lengths);
copy(in_desc.GetStrides(), input_g_n_c_wis_strides);
copy(out_desc.GetStrides(), output_m_k_strides);
copy(conv_param.conv_filter_strides_, conv_filter_strides);
copy(conv_param.conv_filter_dilations_, conv_filter_dilations);
copy(conv_param.input_left_pads_, input_left_pads);
copy(conv_param.input_right_pads_, input_right_pads);
Tensor<InputDataType> input(in_desc);
Tensor<OutputDataType> host_output(out_desc);
Tensor<OutputDataType> device_output(out_desc);
std::cout << "input: " << input.mDesc << std::endl;
std::cout << "output: " << host_output.mDesc << std::endl;
switch(init_method)
{
case 0: break;
case 1: input.GenerateTensorValue(GeneratorTensor_2<InputDataType>{-5, 5}); break;
default: input.GenerateTensorValue(GeneratorTensor_3<InputDataType>{0.0, 1.0});
}
DeviceMem in_device_buf(sizeof(InputDataType) * input.mDesc.GetElementSpaceSize());
DeviceMem out_device_buf(sizeof(OutputDataType) * device_output.mDesc.GetElementSpaceSize());
in_device_buf.ToDevice(input.mData.data());
// run reference op
if(do_verification)
{
auto ref_image_to_column = ck::tensor_operation::host::
ReferenceImageToColumn<NDimSpatial, InputLayout, InputDataType, OutputDataType>{};
auto ref_invoker = ref_image_to_column.MakeInvoker();
auto ref_argument = ref_image_to_column.MakeArgument(input,
host_output,
conv_param.filter_spatial_lengths_,
conv_param.conv_filter_strides_,
conv_param.conv_filter_dilations_,
conv_param.input_left_pads_,
conv_param.input_right_pads_);
// init host output to zero
host_output.SetZero();
ref_invoker.Run(ref_argument);
}
using DeviceOp = ck::tensor_operation::device::
DeviceImageToColumn<NDimSpatial, InputLayout, InputDataType, OutputDataType>;
// get device op instances
const auto op_ptrs = ck::tensor_operation::device::instance::DeviceOperationInstanceFactory<
DeviceOp>::GetInstances();
std::cout << "found " << op_ptrs.size() << " instances" << std::endl;
std::string best_op_name;
float best_avg_time = std::numeric_limits<float>::max();
float best_gb_per_sec = 0;
// profile device op instances
bool pass = true;
bool is_supporting_instance = false;
for(auto& op_ptr : op_ptrs)
{
auto argument_ptr = op_ptr->MakeArgumentPointer(
static_cast<InputDataType*>(in_device_buf.GetDeviceBuffer()),
static_cast<OutputDataType*>(out_device_buf.GetDeviceBuffer()),
conv_param.N_,
conv_param.C_,
input_spatial_lengths,
filter_spatial_lengths,
output_spatial_lengths,
input_g_n_c_wis_strides,
output_m_k_strides,
conv_filter_strides,
conv_filter_dilations,
input_left_pads,
input_right_pads);
if(op_ptr->IsSupportedArgument(argument_ptr.get()))
{
is_supporting_instance = true;
// re-init output to zero before profiling next kernel
out_device_buf.SetZero();
std::string op_name = op_ptr->GetTypeString();
auto invoker_ptr = op_ptr->MakeInvokerPointer();
float avg_time =
invoker_ptr->Run(argument_ptr.get(), StreamConfig{nullptr, time_kernel});
std::size_t num_btype =
NDoHoWo * CZYX * (sizeof(OutputDataType) + sizeof(InputDataType));
float gb_per_sec = num_btype / 1.E6 / avg_time;
std::cout << "Perf: " << std::setw(10) << avg_time << " ms, " << gb_per_sec << " GB/s, "
<< op_name << std::endl;
if(avg_time < best_avg_time)
{
best_op_name = op_name;
best_avg_time = avg_time;
best_gb_per_sec = gb_per_sec;
}
if(do_verification)
{
out_device_buf.FromDevice(device_output.mData.data());
pass = pass & ck::utils::check_err(device_output, host_output);
if(do_log)
{
LogRangeAsType<float>(std::cout << "input : ", input.mData, ",") << std::endl;
LogRangeAsType<float>(std::cout << "host_output : ", host_output.mData, ",")
<< std::endl;
LogRangeAsType<float>(std::cout << "device_output: ", device_output.mData, ",")
<< std::endl;
}
}
}
else
{
std::cout << op_ptr->GetTypeString() << " does not support this problem" << std::endl;
}
}
std::cout << "Best configuration parameters:"
<< "\nname: " << best_op_name << "\navg_time: " << best_avg_time
<< "\nGB/s: " << best_gb_per_sec << std::endl;
return is_supporting_instance && pass;
}
} // namespace profiler
} // namespace ck
......@@ -28,6 +28,7 @@ set(PROFILER_SOURCES
profile_contraction_bilinear.cpp
profile_contraction_scale.cpp
profile_grouped_conv_bwd_data.cpp
profile_image_to_column.cpp
)
if(DL_KERNELS)
list(APPEND PROFILER_SOURCES profile_batched_gemm_multi_d.cpp)
......@@ -82,6 +83,7 @@ target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_avg_pool3d_bwd_insta
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_max_pool_bwd_instance)
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_grouped_conv2d_bwd_data_instance)
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_grouped_conv3d_bwd_data_instance)
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_image_to_column_instance)
if(DL_KERNELS)
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_batched_gemm_multi_d_instance)
endif()
......
......@@ -71,6 +71,9 @@ int profile_gemm_bilinear(int argc, char* argv[])
using F16 = ck::half_t;
using F32 = float;
using I8 = std::int8_t;
using I32 = std::int32_t;
using Row = ck::tensor_layout::gemm::RowMajor;
using Col = ck::tensor_layout::gemm::ColumnMajor;
......@@ -141,6 +144,22 @@ int profile_gemm_bilinear(int argc, char* argv[])
{
return profile(F16{}, F16{}, F32{}, F16{}, F16{}, Col{}, Col{}, Row{}, Row{});
}
else if(data_type == MatrixDataType::INT8_INT8_INT8_INT8 && layout == MatrixLayout::MK_KN_MN_MN)
{
return profile(I8{}, I8{}, I32{}, I8{}, I8{}, Row{}, Row{}, Row{}, Row{});
}
else if(data_type == MatrixDataType::INT8_INT8_INT8_INT8 && layout == MatrixLayout::MK_NK_MN_MN)
{
return profile(I8{}, I8{}, I32{}, I8{}, I8{}, Row{}, Col{}, Row{}, Row{});
}
else if(data_type == MatrixDataType::INT8_INT8_INT8_INT8 && layout == MatrixLayout::KM_KN_MN_MN)
{
return profile(I8{}, I8{}, I32{}, I8{}, I8{}, Col{}, Row{}, Row{}, Row{});
}
else if(data_type == MatrixDataType::INT8_INT8_INT8_INT8 && layout == MatrixLayout::KM_NK_MN_MN)
{
return profile(I8{}, I8{}, I32{}, I8{}, I8{}, Col{}, Col{}, Row{}, Row{});
}
else
{
std::cout << "this data_type & layout is not implemented" << std::endl;
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <numeric>
#include <initializer_list>
#include <cstdlib>
#include "profiler/profile_image_to_column_impl.hpp"
#include "profiler_operation_registry.hpp"
namespace {
enum struct ConvLayout
{
NHWC, // 0
};
enum struct DataType
{
F32_F32, // 0
F16_F16, // 1
BF16_BF16, // 2
INT8_INT8, // 3
};
#define OP_NAME "image_to_column"
#define OP_DESC "Image To Column"
static void print_helper_msg()
{
std::cout
// clang-format off
<< "arg1: tensor operation (" OP_NAME ": " OP_DESC ")\n"
<< "arg2: data type (0: Input fp32, Weight fp32, Output fp32\n"
<< " 1: Input fp16, Weight fp16, Output fp16\n"
<< " 2: Input bf16, Weight bf16, Output bf16\n"
<< " 3: Input int8, Weight int8, Output int8)\n"
<< "arg3: tensor layout (0: Input[N, Hi, Wi, C], Output[N * Ho * Wo, Y * X * C])\n"
<< "arg4: verification (0: no, 1: yes)\n"
<< "arg5: initialization (0: no init, 1: integer value, 2: decimal value)\n"
<< "arg6: print tensor value (0: no; 1: yes)\n"
<< "arg7: time kernel (0: no, 1: yes)\n"
<< ck::utils::conv::get_conv_param_parser_helper_msg() << std::endl;
// clang-format on
}
} // namespace
int profile_image_to_column(int argc, char* argv[])
{
// 8 for control, 1 for num_dim_spatial
if(argc < 9)
{
print_helper_msg();
return 1;
}
const auto data_type = static_cast<DataType>(std::stoi(argv[2]));
const auto layout = static_cast<ConvLayout>(std::stoi(argv[3]));
const bool do_verification = std::stoi(argv[4]);
const int init_method = std::stoi(argv[5]);
const bool do_log = std::stoi(argv[6]);
const bool time_kernel = std::stoi(argv[7]);
const int num_dim_spatial = std::stoi(argv[8]);
// 8 for control, 1 for num_dim_spatial, 4 for G/N/K/C, and 6 * num_dim_spatial
if(argc != 8 + 1 + 4 + 6 * num_dim_spatial)
{
print_helper_msg();
return 1;
}
const auto params = ck::utils::conv::parse_conv_param(num_dim_spatial, 9, argv);
using F32 = float;
using F16 = ck::half_t;
using BF16 = ck::bhalf_t;
using INT8 = int8_t;
using namespace ck::tensor_layout::convolution;
constexpr auto I1 = ck::Number<1>{};
constexpr auto I2 = ck::Number<2>{};
constexpr auto I3 = ck::Number<3>{};
auto profile = [&](auto num_dim_spatial_tmp, auto in_layout, auto in_type, auto out_type) {
constexpr ck::index_t NDimSpatial = num_dim_spatial_tmp.value;
using InLayout = decltype(in_layout);
using InDataType = decltype(in_type);
using OutDataType = decltype(out_type);
bool pass = ck::profiler::
profile_image_to_column_impl<NDimSpatial, InLayout, InDataType, OutDataType>(
do_verification, init_method, do_log, time_kernel, params);
return pass ? 0 : 1;
};
// NHWC
if(layout == ConvLayout::NHWC)
{
if(num_dim_spatial == 1)
{
if(data_type == DataType::F32_F32)
{
return profile(I1, GNWC{}, F32{}, F32{});
}
else if(data_type == DataType::F16_F16)
{
return profile(I1, GNWC{}, F16{}, F16{});
}
else if(data_type == DataType::BF16_BF16)
{
return profile(I1, GNWC{}, BF16{}, BF16{});
}
else if(data_type == DataType::INT8_INT8)
{
return profile(I1, GNWC{}, INT8{}, INT8{});
}
}
else if(num_dim_spatial == 2)
{
if(data_type == DataType::F32_F32)
{
return profile(I2, GNHWC{}, F32{}, F32{});
}
else if(data_type == DataType::F16_F16)
{
return profile(I2, GNHWC{}, F16{}, F16{});
}
else if(data_type == DataType::BF16_BF16)
{
return profile(I2, GNHWC{}, BF16{}, BF16{});
}
else if(data_type == DataType::INT8_INT8)
{
return profile(I2, GNHWC{}, INT8{}, INT8{});
}
}
else if(num_dim_spatial == 3)
{
if(data_type == DataType::F32_F32)
{
return profile(I3, GNDHWC{}, F32{}, F32{});
}
else if(data_type == DataType::F16_F16)
{
return profile(I3, GNDHWC{}, F16{}, F16{});
}
else if(data_type == DataType::BF16_BF16)
{
return profile(I3, GNDHWC{}, BF16{}, BF16{});
}
else if(data_type == DataType::INT8_INT8)
{
return profile(I3, GNDHWC{}, INT8{}, INT8{});
}
}
}
std::cout << "this data_type & layout is not implemented" << std::endl;
return 1;
}
REGISTER_PROFILER_OPERATION(OP_NAME, OP_DESC, profile_image_to_column);
......@@ -60,6 +60,7 @@ add_subdirectory(contraction)
add_subdirectory(pool)
add_subdirectory(batched_gemm_multi_d)
add_subdirectory(grouped_convnd_bwd_data)
add_subdirectory(image_to_column)
if(GPU_TARGETS MATCHES "gfx11")
add_subdirectory(wmma_op)
endif()
add_gtest_executable(test_image_to_column test_image_to_column.cpp)
target_link_libraries(test_image_to_column PRIVATE utility device_image_to_column_instance)
add_gtest_executable(test_image_to_column_interface test_image_to_column_interface.cpp)
target_link_libraries(test_image_to_column_interface PRIVATE utility)
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib>
#include <iostream>
#include <initializer_list>
#include <tuple>
#include <vector>
#include <gtest/gtest.h>
#include "profiler/profile_image_to_column_impl.hpp"
template <typename Tuple>
class TestImageToColumn : public ::testing::Test
{
protected:
using InDataType = std::tuple_element_t<0, Tuple>;
using OutDataType = std::tuple_element_t<1, Tuple>;
using InLayout = std::tuple_element_t<2, Tuple>;
std::vector<ck::utils::conv::ConvParam> conv_params;
template <ck::index_t NDimSpatial>
void Run()
{
EXPECT_FALSE(conv_params.empty());
bool pass = true;
for(auto& param : conv_params)
{
pass = pass && ck::profiler::profile_image_to_column_impl<NDimSpatial,
InLayout,
InDataType,
OutDataType>(
true, // do_verification
1, // init_method: integer value
false, // do_log
false, // time_kernel
param);
}
EXPECT_TRUE(pass);
}
};
using namespace ck::tensor_layout::convolution;
using KernelTypes1d = ::testing::Types<std::tuple<float, float, GNWC>,
std::tuple<ck::bhalf_t, ck::bhalf_t, GNWC>,
std::tuple<ck::half_t, ck::half_t, GNWC>,
std::tuple<int8_t, int8_t, GNWC>>;
using KernelTypes2d = ::testing::Types<std::tuple<float, float, GNHWC>,
std::tuple<ck::bhalf_t, ck::bhalf_t, GNHWC>,
std::tuple<ck::half_t, ck::half_t, GNHWC>,
std::tuple<int8_t, int8_t, GNHWC>>;
using KernelTypes3d = ::testing::Types<std::tuple<float, float, GNDHWC>,
std::tuple<ck::bhalf_t, ck::bhalf_t, GNDHWC>,
std::tuple<ck::half_t, ck::half_t, GNDHWC>,
std::tuple<int8_t, int8_t, GNDHWC>>;
template <typename Tuple>
class TestImageToColumn1d : public TestImageToColumn<Tuple>
{
};
template <typename Tuple>
class TestImageToColumn2d : public TestImageToColumn<Tuple>
{
};
template <typename Tuple>
class TestImageToColumn3d : public TestImageToColumn<Tuple>
{
};
TYPED_TEST_SUITE(TestImageToColumn1d, KernelTypes1d);
TYPED_TEST_SUITE(TestImageToColumn2d, KernelTypes2d);
TYPED_TEST_SUITE(TestImageToColumn3d, KernelTypes3d);
TYPED_TEST(TestImageToColumn1d, Test1D)
{
this->conv_params.clear();
this->conv_params.push_back({1, 1, 4, 1, 192, {3}, {28}, {1}, {1}, {1}, {1}});
this->conv_params.push_back({1, 1, 64, 1, 64, {3}, {14}, {1}, {1}, {1}, {1}});
this->conv_params.push_back({1, 1, 64, 1, 64, {1}, {7}, {2}, {1}, {0}, {0}});
this->conv_params.push_back({1, 1, 64, 1, 64, {1}, {3}, {1}, {1}, {0}, {0}});
// ScalarPerVector should be 1
this->conv_params.push_back({1, 1, 4, 1, 1, {3}, {28}, {1}, {1}, {1}, {1}});
// stride != 1
this->conv_params.push_back({1, 1, 1, 1, 4, {3}, {28}, {2}, {1}, {1}, {1}});
// dilation != 1
this->conv_params.push_back({1, 1, 1, 1, 4, {3}, {28}, {1}, {2}, {1}, {1}});
this->template Run<1>();
}
TYPED_TEST(TestImageToColumn2d, Test2D)
{
this->conv_params.clear();
this->conv_params.push_back(
{2, 1, 4, 1, 192, {3, 3}, {28, 28}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
this->conv_params.push_back(
{2, 1, 64, 1, 64, {3, 3}, {14, 14}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
this->conv_params.push_back({2, 1, 64, 1, 64, {1, 1}, {7, 7}, {2, 2}, {1, 1}, {0, 0}, {0, 0}});
this->conv_params.push_back({2, 1, 64, 1, 64, {1, 1}, {3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}});
this->template Run<2>();
}
TYPED_TEST(TestImageToColumn3d, Test3D)
{
this->conv_params.clear();
this->conv_params.push_back(
{3, 1, 16, 1, 64, {1, 1, 1}, {7, 7, 7}, {2, 2, 2}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
this->conv_params.push_back(
{3, 1, 2, 1, 64, {3, 3, 3}, {14, 14, 3}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
this->conv_params.push_back(
{3, 1, 32, 1, 64, {1, 1, 1}, {3, 3, 3}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
this->template Run<3>();
}
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib>
#include <iostream>
#include <initializer_list>
#include <tuple>
#include <vector>
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_image_to_column_impl.hpp"
#include "ck/library/utility/convolution_parameter.hpp"
#include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
#include <gtest/gtest.h>
using DataType = float;
using InLayout = ck::tensor_layout::convolution::GNWC;
template <ck::index_t... Is>
using S = ck::Sequence<Is...>;
template <ck::index_t ScalarPerVector, bool IsCPacked>
class TestImageToColumnInterface : public ::testing::Test
{
protected:
static constexpr ck::index_t NDimSpatial = 1;
// clang-format off
using DeviceImgToColInstance = ck::tensor_operation::device::DeviceImageToColumnImpl
//#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per|
//#####################| Spatial| | | | | | | Lengths| Vector|
//#####################| | | | | | | | | |
< NDimSpatial, InLayout, DataType, DataType, 256, 128, 128, S<16, 16>,ScalarPerVector>;
// clang-format on
ck::utils::conv::ConvParam conv_param;
bool Run()
{
const auto N = conv_param.N_;
const auto C = conv_param.C_;
const auto FakeC =
conv_param.C_ / 2; // Fake C to simulate the behavior that C is not packed
const ck::index_t NDoHoWo =
N *
ck::accumulate_n<ck::index_t>(
conv_param.output_spatial_lengths_.begin(), NDimSpatial, 1, std::multiplies<>());
const ck::index_t CZYX =
C *
ck::accumulate_n<ck::index_t>(
conv_param.filter_spatial_lengths_.begin(), NDimSpatial, 1, std::multiplies<>());
const auto in_desc =
ck::utils::conv::make_input_host_tensor_descriptor_g_n_c_wis_packed<InLayout>(
conv_param);
const auto out_desc = HostTensorDescriptor({NDoHoWo, CZYX});
std::array<ck::index_t, NDimSpatial> input_spatial_lengths{};
std::array<ck::index_t, NDimSpatial> filter_spatial_lengths{};
std::array<ck::index_t, NDimSpatial> output_spatial_lengths{};
std::array<ck::index_t, NDimSpatial + 3> input_g_n_c_wis_strides{};
std::array<ck::index_t, 2> output_m_k_strides{};
std::array<ck::index_t, NDimSpatial> conv_filter_strides{};
std::array<ck::index_t, NDimSpatial> conv_filter_dilations{};
std::array<ck::index_t, NDimSpatial> input_left_pads{};
std::array<ck::index_t, NDimSpatial> input_right_pads{};
auto copy = [](const auto& x, auto& y) { std::copy(x.begin(), x.end(), y.begin()); };
copy(conv_param.input_spatial_lengths_, input_spatial_lengths);
copy(conv_param.filter_spatial_lengths_, filter_spatial_lengths);
copy(conv_param.output_spatial_lengths_, output_spatial_lengths);
copy(in_desc.GetStrides(), input_g_n_c_wis_strides);
copy(out_desc.GetStrides(), output_m_k_strides);
copy(conv_param.conv_filter_strides_, conv_filter_strides);
copy(conv_param.conv_filter_dilations_, conv_filter_dilations);
copy(conv_param.input_left_pads_, input_left_pads);
copy(conv_param.input_right_pads_, input_right_pads);
auto img2col = DeviceImgToColInstance{};
auto argument = img2col.MakeArgument(nullptr,
nullptr,
N,
IsCPacked ? C : FakeC,
input_spatial_lengths,
filter_spatial_lengths,
output_spatial_lengths,
input_g_n_c_wis_strides,
output_m_k_strides,
conv_filter_strides,
conv_filter_dilations,
input_left_pads,
input_right_pads);
return img2col.IsSupportedArgument(argument);
}
};
class TestImageToColumnInterface1ScalarPerVector : public TestImageToColumnInterface<1, true>
{
};
class TestImageToColumnInterface4ScalarPerVector : public TestImageToColumnInterface<4, true>
{
};
class TestImageToColumnInterface4ScalarPerVectorFakeC : public TestImageToColumnInterface<4, false>
{
};
TEST_F(TestImageToColumnInterface1ScalarPerVector, X1ScalarPerVector)
{
// vector load C * X % ScalarPerVector
this->conv_param = {1, 1, 1, 1, 1, {3}, {3}, {1}, {1}, {0}, {0}};
bool is_supported = this->Run();
EXPECT_TRUE(is_supported);
// vector load C * left_pad_x % ScalarPerVector
this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {1}, {1}, {3}, {0}};
is_supported = this->Run();
EXPECT_TRUE(is_supported);
// vector load C * right_pad_x % ScalarPerVector
this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {1}, {1}, {0}, {3}};
is_supported = this->Run();
EXPECT_TRUE(is_supported);
// vector load C % ScalarPerVector, right_pad and stride
this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {2}, {1}, {0}, {3}};
is_supported = this->Run();
EXPECT_TRUE(is_supported);
// vector load C % ScalarPerVector, left_pad and stride
this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {2}, {1}, {3}, {0}};
is_supported = this->Run();
EXPECT_TRUE(is_supported);
// vector load C % ScalarPerVector, dilation
this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {1}, {2}, {0}, {0}};
is_supported = this->Run();
EXPECT_TRUE(is_supported);
// C = 4
this->conv_param = {1, 1, 1, 1, 4, {3}, {3}, {1}, {1}, {3}, {3}};
is_supported = this->Run();
EXPECT_TRUE(is_supported);
}
TEST_F(TestImageToColumnInterface4ScalarPerVector, X4ScalarPerVector)
{
// vector load C * X % ScalarPerVector
this->conv_param = {1, 1, 1, 1, 1, {3}, {3}, {1}, {1}, {0}, {0}};
bool is_supported = this->Run();
EXPECT_FALSE(is_supported);
// vector load C * left_pad_x % ScalarPerVector
this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {1}, {1}, {3}, {0}};
is_supported = this->Run();
EXPECT_FALSE(is_supported);
// vector load C * right_pad_x % ScalarPerVector
this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {1}, {1}, {0}, {3}};
is_supported = this->Run();
EXPECT_FALSE(is_supported);
// vector load C % ScalarPerVector, right_pad and stride
this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {2}, {1}, {0}, {3}};
is_supported = this->Run();
EXPECT_FALSE(is_supported);
// vector load C % ScalarPerVector, left_pad and stride
this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {2}, {1}, {3}, {0}};
is_supported = this->Run();
EXPECT_FALSE(is_supported);
// vector load C % ScalarPerVector, dilation
this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {1}, {2}, {0}, {0}};
is_supported = this->Run();
EXPECT_FALSE(is_supported);
// C = 4
this->conv_param = {1, 1, 1, 1, 4, {3}, {3}, {1}, {1}, {3}, {3}};
is_supported = this->Run();
EXPECT_TRUE(is_supported);
}
TEST_F(TestImageToColumnInterface4ScalarPerVectorFakeC, X4ScalarPerVectorFakeC)
{
// C = 3
this->conv_param = {1, 1, 1, 1, 3, {4}, {3}, {1}, {1}, {0}, {0}};
bool is_supported = this->Run();
EXPECT_FALSE(is_supported);
// C = 4
this->conv_param = {1, 1, 1, 1, 8, {4}, {3}, {1}, {1}, {0}, {0}};
is_supported = this->Run();
EXPECT_TRUE(is_supported);
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment