Commit 8f62b6a5 authored by Bartlomiej Kocot's avatar Bartlomiej Kocot
Browse files

Several fixes of image to column

parent 887967c8
...@@ -107,11 +107,9 @@ bool RunImageToColumn(const ExecutionConfig& config, const ck::utils::conv::Conv ...@@ -107,11 +107,9 @@ bool RunImageToColumn(const ExecutionConfig& config, const ck::utils::conv::Conv
} }
float ave_time = invoker.Run(argument, StreamConfig{nullptr, config.time_kernel}); float ave_time = invoker.Run(argument, StreamConfig{nullptr, config.time_kernel});
std::size_t num_btype =
std::size_t num_btype = NDoHoWo * CZYX * sizeof(InDataType); NDoHoWo * CZYX * sizeof(OutDataType) + conv_params.GetInputByte<InputDataType>();
float gb_per_sec = num_btype / 1.E6 / ave_time; float gb_per_sec = num_btype / 1.E6 / ave_time;
std::cout << "Perf: " << ave_time << " ms, " << gb_per_sec << " GB/s" << std::endl; std::cout << "Perf: " << ave_time << " ms, " << gb_per_sec << " GB/s" << std::endl;
if(config.do_verification) if(config.do_verification)
...@@ -159,7 +157,7 @@ int RunImageToColumnExample(int argc, char* argv[]) ...@@ -159,7 +157,7 @@ int RunImageToColumnExample(int argc, char* argv[])
if(conv_params.num_dim_spatial_ != NDimSpatial) if(conv_params.num_dim_spatial_ != NDimSpatial)
{ {
std::cerr << "unsupported # of spatials dimensions" << std::endl; std::cerr << "unsupported # of spatial dimensions" << std::endl;
return EXIT_FAILURE; return EXIT_FAILURE;
} }
......
...@@ -11,18 +11,45 @@ namespace ck { ...@@ -11,18 +11,45 @@ namespace ck {
namespace tensor_operation { namespace tensor_operation {
namespace device { namespace device {
// Image to column: /**
// input : input image [N, Di, Hi, Wi, C], * \brief Image to column.
// output : output image [N * Do * Ho * Wo, Z * Y * X * C] *
* This Device operator converts image ([G, N, Di, Hi, Wi, C]) to the gemm
* problem([N * Do * Ho * Wo, Z * Y * X * C]). G must be equal to 1.
*
* \tparam NDimSpatial Number of spatial dimensions.
* \tparam InputLayout Input Layout.
* \tparam InputDataType Input Data Type.
* \tparam OutputDataType Output Data Type.
*/
template <index_t NDimSpatial, template <index_t NDimSpatial,
typename InputLayout, typename InputLayout,
typename InputDataType, typename InputDataType,
typename OutputDataType> typename OutputDataType>
struct DeviceImageToColumn : public BaseOperator struct DeviceImageToColumn : public BaseOperator
{ {
/**
* \brief Make argument pointer for image to column.
*
* \param p_in A pointer to the device memory of the input image.
* \param p_out A pointer to the device memory of the output.
* \param N Convolution batch size.
* \param C Convolution number of channels.
* \param input_spatial_lengths Input spatial lengths.
* \param filter_spatial_lengths Filter spatial lengths.
* \param output_spatial_lengths Output spatial lengths.
* \param input_g_n_c_wis_strides Input strides in order [G, N, C, D, H, W].
* \param output_m_k_strides Output strides.
* \param conv_filter_strides Convolution filter strides.
* \param conv_filter_dilations Convolution filter dilations.
* \param input_left_pads Convolution left pads.
* \param input_right_pads Convolution right pads.
* \return Pointer to the argument.
*/
virtual std::unique_ptr<BaseArgument> virtual std::unique_ptr<BaseArgument>
MakeArgumentPointer(const void* p_in, // input image MakeArgumentPointer(const void* p_in,
void* p_out, // output image void* p_out,
const ck::index_t N, const ck::index_t N,
const ck::index_t C, const ck::index_t C,
const std::array<index_t, NDimSpatial>& input_spatial_lengths, const std::array<index_t, NDimSpatial>& input_spatial_lengths,
......
...@@ -7,9 +7,7 @@ ...@@ -7,9 +7,7 @@
#include "ck/tensor_description/tensor_descriptor_helper.hpp" #include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "ck/tensor_operation/gpu/device/device_image_to_column.hpp" #include "ck/tensor_operation/gpu/device/device_image_to_column.hpp"
#include "ck/tensor_operation/gpu/grid/gridwise_image_to_column.hpp" #include "ck/tensor_operation/gpu/grid/gridwise_image_to_column.hpp"
#include "ck/host_utility/device_prop.hpp"
#include "ck/host_utility/kernel_launch.hpp" #include "ck/host_utility/kernel_launch.hpp"
#include "ck/host_utility/stream_utility.hpp"
#include "ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp" #include "ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp"
#include "ck/tensor_operation/operator_transform/transform_conv_fwd_to_gemm.hpp" #include "ck/tensor_operation/operator_transform/transform_conv_fwd_to_gemm.hpp"
#include "ck/tensor_operation/gpu/device/convolution_forward_specialization.hpp" #include "ck/tensor_operation/gpu/device/convolution_forward_specialization.hpp"
...@@ -284,24 +282,28 @@ struct DeviceImageToColumnImpl ...@@ -284,24 +282,28 @@ struct DeviceImageToColumnImpl
return false; return false;
} }
const auto x_pad_left = arg.input_left_pads_[NDimSpatial - I1]; const auto w_pad_left = arg.input_left_pads_[NDimSpatial - I1];
const auto x_pad_right = arg.input_right_pads_[NDimSpatial - I1]; const auto w_pad_right = arg.input_right_pads_[NDimSpatial - I1];
const auto dilation_x = arg.conv_filter_dilations_[NDimSpatial - I1]; const auto dilation_x = arg.conv_filter_dilations_[NDimSpatial - I1];
const auto stride_x = arg.conv_filter_strides_[NDimSpatial - I1]; const auto stride_x = arg.conv_filter_strides_[NDimSpatial - I1];
bool is_c_packed = arg.input_g_n_c_wis_strides_[NDimSpatial + I2] == arg.C_; bool is_x_packed = arg.input_g_n_c_wis_strides_[NDimSpatial + I2] == arg.C_;
bool is_c_packed = arg.input_g_n_c_wis_strides_[I2] == 1;
// check vector acces with c not packed
if(!is_c_packed && ScalarPerVector != 1)
return false;
// check vector access of filter window row (only C if C is not packed) // check vector access of filter window row (only C if C is not packed)
if(!is_c_packed && arg.C_ % ScalarPerVector != 0) if(!is_x_packed && arg.C_ % ScalarPerVector != 0)
return false; return false;
// check vector access of filter window row (X * C) // check vector access of filter window row (X * C)
if(arg.X_ * arg.C_ % ScalarPerVector != 0) if(arg.X_ * arg.C_ % ScalarPerVector != 0)
return false; return false;
// check vector access of pads (x_pad_left/x_pad_right * C) // check vector access of pads (w_pad_left/w_pad_right * C)
if(x_pad_left * arg.C_ % ScalarPerVector != 0 || if(w_pad_left * arg.C_ % ScalarPerVector != 0 ||
x_pad_right * arg.C_ % ScalarPerVector != 0) w_pad_right * arg.C_ % ScalarPerVector != 0)
return false; return false;
// check vector access of with stride and pad // check vector access of with stride and pad
if((x_pad_left != 0 || x_pad_right != 0) && stride_x > 1 && arg.C_ % ScalarPerVector != 0) if((w_pad_left != 0 || w_pad_right != 0) && stride_x > 1 && arg.C_ % ScalarPerVector != 0)
return false; return false;
// check vector access of with dilation // check vector access of with dilation
if(dilation_x > 1 && arg.C_ % ScalarPerVector != 0) if(dilation_x > 1 && arg.C_ % ScalarPerVector != 0)
......
...@@ -10,7 +10,6 @@ ...@@ -10,7 +10,6 @@
#include "ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp" #include "ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp"
#include "ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_selector.hpp" #include "ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_selector.hpp"
#include "ck/tensor_operation/gpu/block/blockwise_gemm_xdlops.hpp" #include "ck/tensor_operation/gpu/block/blockwise_gemm_xdlops.hpp"
#include "ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v4r1.hpp"
#include "ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v7.hpp" #include "ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v7.hpp"
#include "ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer.hpp" #include "ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
......
...@@ -15,19 +15,17 @@ namespace ck { ...@@ -15,19 +15,17 @@ namespace ck {
namespace tensor_operation { namespace tensor_operation {
namespace host { namespace host {
// /**
// @brief Reference implementation for image to column. * \brief Reference implementation for image to column.
// *
// @paragraph * Tensor descriptor has [G, N, C, Di, Hi, Wi] data layout.
// Tensor descriptor in NCHW dimensional order * G must be equal to 1. Memory layout is [G, N, Di, Hi, Wi, C].
// *
// @tparam InDataType Input tensor data type. * \tparam NDimSpatial Number of spatial dimensions.
// @tparam OutDataType Output tensor data type. * \tparam InputLayout Input Layout.
// @tparam NDimSpatial Number of spatial dimensions. * \tparam InDataType Input Data Type.
// * \tparam OutDataType Output Data Type.
// input descriptor in [N, C, Di, Hi, Wi] order */
// output descriptor in [N * Do * Ho * Wo, C * Z * Y * X] order
// phyiscal layout is [N, Di, Hi, Wi, C]
template <ck::index_t NDimSpatial, template <ck::index_t NDimSpatial,
typename InputLayout, typename InputLayout,
typename InDataType, typename InDataType,
...@@ -242,12 +240,12 @@ struct ReferenceImageToColumn : public device::BaseOperator ...@@ -242,12 +240,12 @@ struct ReferenceImageToColumn : public device::BaseOperator
{ {
using namespace tensor_layout::convolution; using namespace tensor_layout::convolution;
if(!(std::is_same_v<InputLayout, GNWC> || std::is_same_v<InputLayout, GNHWC> || if constexpr(!(std::is_same_v<InputLayout, GNWC> || std::is_same_v<InputLayout, GNHWC> ||
std::is_same_v<InputLayout, GNDHWC>)) std::is_same_v<InputLayout, GNDHWC>))
{ {
return false; return false;
} }
if(!(NDimSpatial >= 1 && NDimSpatial <= 3)) if constexpr(!(NDimSpatial >= 1 && NDimSpatial <= 3))
{ {
return false; return false;
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment