Unverified Commit e2243a4d authored by Bartłomiej Kocot's avatar Bartłomiej Kocot Committed by GitHub
Browse files

Add column to image kernel (#930)

* Add column to image kernel

* Minor fixes for dtypes and client examples

* Disable tests for disabled dtypes

* Disable add instances functions for disabled data types

* Minor stylistic fixes

* Revert "Disable add instances functions for disabled data types"

This reverts commit 728b8695.

* Instances reduction

* Add comments in device_column_to_image_impl

* Update changelog and Copyrights

* Improve changelog
parent 11676c7e
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <vector>
#include <memory>
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/device_image_to_column.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
// nhwc, 1d
void add_device_image_to_column_nhwc_1d_bf16_instances(
std::vector<std::unique_ptr<DeviceImageToColumn<1, GNWC, BF16, BF16>>>& instances);
void add_device_image_to_column_nhwc_1d_f16_instances(
std::vector<std::unique_ptr<DeviceImageToColumn<1, GNWC, F16, F16>>>& instances);
void add_device_image_to_column_nhwc_1d_f32_instances(
std::vector<std::unique_ptr<DeviceImageToColumn<1, GNWC, F32, F32>>>& instances);
void add_device_image_to_column_nhwc_1d_i8_instances(
std::vector<std::unique_ptr<DeviceImageToColumn<1, GNWC, int8_t, int8_t>>>& instances);
// nhwc, 2d
void add_device_image_to_column_nhwc_2d_bf16_instances(
std::vector<std::unique_ptr<DeviceImageToColumn<2, GNHWC, BF16, BF16>>>& instances);
void add_device_image_to_column_nhwc_2d_f16_instances(
std::vector<std::unique_ptr<DeviceImageToColumn<2, GNHWC, F16, F16>>>& instances);
void add_device_image_to_column_nhwc_2d_f32_instances(
std::vector<std::unique_ptr<DeviceImageToColumn<2, GNHWC, F32, F32>>>& instances);
void add_device_image_to_column_nhwc_2d_i8_instances(
std::vector<std::unique_ptr<DeviceImageToColumn<2, GNHWC, int8_t, int8_t>>>& instances);
// nhwc, 3d
void add_device_image_to_column_nhwc_3d_bf16_instances(
std::vector<std::unique_ptr<DeviceImageToColumn<3, GNDHWC, BF16, BF16>>>& instances);
void add_device_image_to_column_nhwc_3d_f16_instances(
std::vector<std::unique_ptr<DeviceImageToColumn<3, GNDHWC, F16, F16>>>& instances);
void add_device_image_to_column_nhwc_3d_f32_instances(
std::vector<std::unique_ptr<DeviceImageToColumn<3, GNDHWC, F32, F32>>>& instances);
void add_device_image_to_column_nhwc_3d_i8_instances(
std::vector<std::unique_ptr<DeviceImageToColumn<3, GNDHWC, int8_t, int8_t>>>& instances);
template <ck::index_t NumDimSpatial, typename InLayout, typename InDataType, typename OutDataType>
struct DeviceOperationInstanceFactory<
ck::tensor_operation::device::
DeviceImageToColumn<NumDimSpatial, InLayout, InDataType, OutDataType>>
{
using DeviceOp = DeviceImageToColumn<NumDimSpatial, InLayout, InDataType, OutDataType>;
static auto GetInstances()
{
std::vector<std::unique_ptr<DeviceOp>> op_ptrs;
if constexpr(NumDimSpatial == 1 && is_same_v<InLayout, GNWC>)
{
if constexpr(is_same_v<InDataType, float> && is_same_v<OutDataType, float>)
{
add_device_image_to_column_nhwc_1d_f32_instances(op_ptrs);
}
else if constexpr(is_same_v<InDataType, half_t> && is_same_v<OutDataType, half_t>)
{
add_device_image_to_column_nhwc_1d_f16_instances(op_ptrs);
}
else if constexpr(is_same_v<InDataType, ck::bhalf_t> &&
is_same_v<OutDataType, ck::bhalf_t>)
{
add_device_image_to_column_nhwc_1d_bf16_instances(op_ptrs);
}
else if constexpr(is_same_v<InDataType, int8_t> && is_same_v<OutDataType, int8_t>)
{
add_device_image_to_column_nhwc_1d_i8_instances(op_ptrs);
}
}
else if constexpr(NumDimSpatial == 2 && is_same_v<InLayout, GNHWC>)
{
if constexpr(is_same_v<InDataType, float> && is_same_v<OutDataType, float>)
{
add_device_image_to_column_nhwc_2d_f32_instances(op_ptrs);
}
else if constexpr(is_same_v<InDataType, half_t> && is_same_v<OutDataType, half_t>)
{
add_device_image_to_column_nhwc_2d_f16_instances(op_ptrs);
}
else if constexpr(is_same_v<InDataType, ck::bhalf_t> &&
is_same_v<OutDataType, ck::bhalf_t>)
{
add_device_image_to_column_nhwc_2d_bf16_instances(op_ptrs);
}
else if constexpr(is_same_v<InDataType, int8_t> && is_same_v<OutDataType, int8_t>)
{
add_device_image_to_column_nhwc_2d_i8_instances(op_ptrs);
}
}
else if constexpr(NumDimSpatial == 3 && is_same_v<InLayout, GNDHWC>)
{
if constexpr(is_same_v<InDataType, float> && is_same_v<OutDataType, float>)
{
add_device_image_to_column_nhwc_3d_f32_instances(op_ptrs);
}
else if constexpr(is_same_v<InDataType, half_t> && is_same_v<OutDataType, half_t>)
{
add_device_image_to_column_nhwc_3d_f16_instances(op_ptrs);
}
else if constexpr(is_same_v<InDataType, ck::bhalf_t> &&
is_same_v<OutDataType, ck::bhalf_t>)
{
add_device_image_to_column_nhwc_3d_bf16_instances(op_ptrs);
}
else if constexpr(is_same_v<InDataType, int8_t> && is_same_v<OutDataType, int8_t>)
{
add_device_image_to_column_nhwc_3d_i8_instances(op_ptrs);
}
}
return op_ptrs;
}
};
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
add_instance_library(device_column_to_image_instance
device_column_to_image_nhwc_1d_instance.cpp
device_column_to_image_nhwc_2d_instance.cpp
device_column_to_image_nhwc_3d_instance.cpp
)
// SPDX-License-Identifier: MIT
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange/device_column_to_image_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
using namespace ck::conv_tensor_rearrange_op;
void add_device_column_to_image_nwc_1d_bf16_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, BF16, BF16, ColumnToImage>>>&
instances)
{
#ifdef CK_ENABLE_BF16
add_device_operation_instances(instances, device_column_to_image_bf16_instances<1, GNWC>{});
#else
ignore = instances;
#endif
}
void add_device_column_to_image_nwc_1d_f16_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, F16, F16, ColumnToImage>>>&
instances)
{
#ifdef CK_ENABLE_FP16
add_device_operation_instances(instances, device_column_to_image_f16_instances<1, GNWC>{});
#else
ignore = instances;
#endif
}
void add_device_column_to_image_nwc_1d_f32_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, F32, F32, ColumnToImage>>>&
instances)
{
#ifdef CK_ENABLE_FP32
add_device_operation_instances(instances, device_column_to_image_f32_instances<1, GNWC>{});
#else
ignore = instances;
#endif
}
void add_device_column_to_image_nwc_1d_i8_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, int8_t, int8_t, ColumnToImage>>>&
instances)
{
#ifdef CK_ENABLE_INT8
add_device_operation_instances(instances, device_column_to_image_i8_instances<1, GNWC>{});
#else
ignore = instances;
#endif
}
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
// SPDX-License-Identifier: MIT
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange/device_column_to_image_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
using namespace ck::conv_tensor_rearrange_op;
void add_device_column_to_image_nhwc_2d_bf16_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<2, GNHWC, BF16, BF16, ColumnToImage>>>&
instances)
{
#ifdef CK_ENABLE_BF16
add_device_operation_instances(instances, device_column_to_image_bf16_instances<2, GNHWC>{});
#else
ignore = instances;
#endif
}
void add_device_column_to_image_nhwc_2d_f16_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<2, GNHWC, F16, F16, ColumnToImage>>>&
instances)
{
#ifdef CK_ENABLE_FP16
add_device_operation_instances(instances, device_column_to_image_f16_instances<2, GNHWC>{});
#else
ignore = instances;
#endif
}
void add_device_column_to_image_nhwc_2d_f32_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<2, GNHWC, F32, F32, ColumnToImage>>>&
instances)
{
#ifdef CK_ENABLE_FP32
add_device_operation_instances(instances, device_column_to_image_f32_instances<2, GNHWC>{});
#else
ignore = instances;
#endif
}
void add_device_column_to_image_nhwc_2d_i8_instances(
std::vector<
std::unique_ptr<DeviceConvTensorRearrange<2, GNHWC, int8_t, int8_t, ColumnToImage>>>&
instances)
{
#ifdef CK_ENABLE_INT8
add_device_operation_instances(instances, device_column_to_image_i8_instances<2, GNHWC>{});
#else
ignore = instances;
#endif
}
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
// SPDX-License-Identifier: MIT
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange/device_column_to_image_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
using namespace ck::conv_tensor_rearrange_op;
void add_device_column_to_image_ndhwc_3d_bf16_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, BF16, BF16, ColumnToImage>>>&
instances)
{
#ifdef CK_ENABLE_BF16
add_device_operation_instances(instances, device_column_to_image_bf16_instances<3, GNDHWC>{});
#else
ignore = instances;
#endif
}
void add_device_column_to_image_ndhwc_3d_f16_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, F16, F16, ColumnToImage>>>&
instances)
{
#ifdef CK_ENABLE_FP16
add_device_operation_instances(instances, device_column_to_image_f16_instances<3, GNDHWC>{});
#else
ignore = instances;
#endif
}
void add_device_column_to_image_ndhwc_3d_f32_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, F32, F32, ColumnToImage>>>&
instances)
{
#ifdef CK_ENABLE_FP32
add_device_operation_instances(instances, device_column_to_image_f32_instances<3, GNDHWC>{});
#else
ignore = instances;
#endif
}
void add_device_column_to_image_ndhwc_3d_i8_instances(
std::vector<
std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, int8_t, int8_t, ColumnToImage>>>&
instances)
{
#ifdef CK_ENABLE_INT8
add_device_operation_instances(instances, device_column_to_image_i8_instances<3, GNDHWC>{});
#else
ignore = instances;
#endif
}
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/gpu/image_to_column/device_image_to_column_instance.hpp" #include "ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange/device_image_to_column_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp" #include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
namespace ck { namespace ck {
...@@ -9,28 +9,50 @@ namespace tensor_operation { ...@@ -9,28 +9,50 @@ namespace tensor_operation {
namespace device { namespace device {
namespace instance { namespace instance {
void add_device_image_to_column_nhwc_1d_bf16_instances( using namespace ck::conv_tensor_rearrange_op;
std::vector<std::unique_ptr<DeviceImageToColumn<1, GNWC, BF16, BF16>>>& instances)
void add_device_image_to_column_nwc_1d_bf16_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, BF16, BF16, ImageToColumn>>>&
instances)
{ {
#ifdef CK_ENABLE_BF16
add_device_operation_instances(instances, device_image_to_column_bf16_instances<1, GNWC>{}); add_device_operation_instances(instances, device_image_to_column_bf16_instances<1, GNWC>{});
#else
ignore = instances;
#endif
} }
void add_device_image_to_column_nhwc_1d_f16_instances( void add_device_image_to_column_nwc_1d_f16_instances(
std::vector<std::unique_ptr<DeviceImageToColumn<1, GNWC, F16, F16>>>& instances) std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, F16, F16, ImageToColumn>>>&
instances)
{ {
#ifdef CK_ENABLE_FP16
add_device_operation_instances(instances, device_image_to_column_f16_instances<1, GNWC>{}); add_device_operation_instances(instances, device_image_to_column_f16_instances<1, GNWC>{});
#else
ignore = instances;
#endif
} }
void add_device_image_to_column_nhwc_1d_f32_instances( void add_device_image_to_column_nwc_1d_f32_instances(
std::vector<std::unique_ptr<DeviceImageToColumn<1, GNWC, F32, F32>>>& instances) std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, F32, F32, ImageToColumn>>>&
instances)
{ {
#ifdef CK_ENABLE_FP32
add_device_operation_instances(instances, device_image_to_column_f32_instances<1, GNWC>{}); add_device_operation_instances(instances, device_image_to_column_f32_instances<1, GNWC>{});
#else
ignore = instances;
#endif
} }
void add_device_image_to_column_nhwc_1d_i8_instances( void add_device_image_to_column_nwc_1d_i8_instances(
std::vector<std::unique_ptr<DeviceImageToColumn<1, GNWC, int8_t, int8_t>>>& instances) std::vector<std::unique_ptr<DeviceConvTensorRearrange<1, GNWC, int8_t, int8_t, ImageToColumn>>>&
instances)
{ {
#ifdef CK_ENABLE_INT8
add_device_operation_instances(instances, device_image_to_column_i8_instances<1, GNWC>{}); add_device_operation_instances(instances, device_image_to_column_i8_instances<1, GNWC>{});
#else
ignore = instances;
#endif
} }
} // namespace instance } // namespace instance
......
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/gpu/image_to_column/device_image_to_column_instance.hpp" #include "ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange/device_image_to_column_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp" #include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
namespace ck { namespace ck {
...@@ -9,28 +9,51 @@ namespace tensor_operation { ...@@ -9,28 +9,51 @@ namespace tensor_operation {
namespace device { namespace device {
namespace instance { namespace instance {
using namespace ck::conv_tensor_rearrange_op;
void add_device_image_to_column_nhwc_2d_bf16_instances( void add_device_image_to_column_nhwc_2d_bf16_instances(
std::vector<std::unique_ptr<DeviceImageToColumn<2, GNHWC, BF16, BF16>>>& instances) std::vector<std::unique_ptr<DeviceConvTensorRearrange<2, GNHWC, BF16, BF16, ImageToColumn>>>&
instances)
{ {
#ifdef CK_ENABLE_BF16
add_device_operation_instances(instances, device_image_to_column_bf16_instances<2, GNHWC>{}); add_device_operation_instances(instances, device_image_to_column_bf16_instances<2, GNHWC>{});
#else
ignore = instances;
#endif
} }
void add_device_image_to_column_nhwc_2d_f16_instances( void add_device_image_to_column_nhwc_2d_f16_instances(
std::vector<std::unique_ptr<DeviceImageToColumn<2, GNHWC, F16, F16>>>& instances) std::vector<std::unique_ptr<DeviceConvTensorRearrange<2, GNHWC, F16, F16, ImageToColumn>>>&
instances)
{ {
#ifdef CK_ENABLE_FP16
add_device_operation_instances(instances, device_image_to_column_f16_instances<2, GNHWC>{}); add_device_operation_instances(instances, device_image_to_column_f16_instances<2, GNHWC>{});
#else
ignore = instances;
#endif
} }
void add_device_image_to_column_nhwc_2d_f32_instances( void add_device_image_to_column_nhwc_2d_f32_instances(
std::vector<std::unique_ptr<DeviceImageToColumn<2, GNHWC, F32, F32>>>& instances) std::vector<std::unique_ptr<DeviceConvTensorRearrange<2, GNHWC, F32, F32, ImageToColumn>>>&
instances)
{ {
#ifdef CK_ENABLE_FP32
add_device_operation_instances(instances, device_image_to_column_f32_instances<2, GNHWC>{}); add_device_operation_instances(instances, device_image_to_column_f32_instances<2, GNHWC>{});
#else
ignore = instances;
#endif
} }
void add_device_image_to_column_nhwc_2d_i8_instances( void add_device_image_to_column_nhwc_2d_i8_instances(
std::vector<std::unique_ptr<DeviceImageToColumn<2, GNHWC, int8_t, int8_t>>>& instances) std::vector<
std::unique_ptr<DeviceConvTensorRearrange<2, GNHWC, int8_t, int8_t, ImageToColumn>>>&
instances)
{ {
#ifdef CK_ENABLE_INT8
add_device_operation_instances(instances, device_image_to_column_i8_instances<2, GNHWC>{}); add_device_operation_instances(instances, device_image_to_column_i8_instances<2, GNHWC>{});
#else
ignore = instances;
#endif
} }
} // namespace instance } // namespace instance
......
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/library/tensor_operation_instance/gpu/image_to_column/device_image_to_column_instance.hpp" #include "ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange/device_image_to_column_instance.hpp"
#include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp" #include "ck/library/tensor_operation_instance/add_device_operation_instance.hpp"
namespace ck { namespace ck {
...@@ -9,28 +9,51 @@ namespace tensor_operation { ...@@ -9,28 +9,51 @@ namespace tensor_operation {
namespace device { namespace device {
namespace instance { namespace instance {
void add_device_image_to_column_nhwc_3d_bf16_instances( using namespace ck::conv_tensor_rearrange_op;
std::vector<std::unique_ptr<DeviceImageToColumn<3, GNDHWC, BF16, BF16>>>& instances)
void add_device_image_to_column_ndhwc_3d_bf16_instances(
std::vector<std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, BF16, BF16, ImageToColumn>>>&
instances)
{ {
#ifdef CK_ENABLE_BF16
add_device_operation_instances(instances, device_image_to_column_bf16_instances<3, GNDHWC>{}); add_device_operation_instances(instances, device_image_to_column_bf16_instances<3, GNDHWC>{});
#else
ignore = instances;
#endif
} }
void add_device_image_to_column_nhwc_3d_f16_instances( void add_device_image_to_column_ndhwc_3d_f16_instances(
std::vector<std::unique_ptr<DeviceImageToColumn<3, GNDHWC, F16, F16>>>& instances) std::vector<std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, F16, F16, ImageToColumn>>>&
instances)
{ {
#ifdef CK_ENABLE_FP16
add_device_operation_instances(instances, device_image_to_column_f16_instances<3, GNDHWC>{}); add_device_operation_instances(instances, device_image_to_column_f16_instances<3, GNDHWC>{});
#else
ignore = instances;
#endif
} }
void add_device_image_to_column_nhwc_3d_f32_instances( void add_device_image_to_column_ndhwc_3d_f32_instances(
std::vector<std::unique_ptr<DeviceImageToColumn<3, GNDHWC, F32, F32>>>& instances) std::vector<std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, F32, F32, ImageToColumn>>>&
instances)
{ {
#ifdef CK_ENABLE_FP32
add_device_operation_instances(instances, device_image_to_column_f32_instances<3, GNDHWC>{}); add_device_operation_instances(instances, device_image_to_column_f32_instances<3, GNDHWC>{});
#else
ignore = instances;
#endif
} }
void add_device_image_to_column_nhwc_3d_i8_instances( void add_device_image_to_column_ndhwc_3d_i8_instances(
std::vector<std::unique_ptr<DeviceImageToColumn<3, GNDHWC, int8_t, int8_t>>>& instances) std::vector<
std::unique_ptr<DeviceConvTensorRearrange<3, GNDHWC, int8_t, int8_t, ImageToColumn>>>&
instances)
{ {
#ifdef CK_ENABLE_INT8
add_device_operation_instances(instances, device_image_to_column_i8_instances<3, GNDHWC>{}); add_device_operation_instances(instances, device_image_to_column_i8_instances<3, GNDHWC>{});
#else
ignore = instances;
#endif
} }
} // namespace instance } // namespace instance
......
...@@ -185,7 +185,7 @@ GB/s: 69.2301 ...@@ -185,7 +185,7 @@ GB/s: 69.2301
``` ```
Note: This kernel use atomic add, this will cause output buffer to be accumulated multiple times, causing verification failure. To work around it, do not use CK's own timer and do verification at the same time. Note: This kernel use atomic add, this will cause output buffer to be accumulated multiple times, causing verification failure. To work around it, do not use CK's own timer and do verification at the same time.
## Profile image to column kernels ## Profile image to column/column to image kernels
```bash ```bash
# arg1: tensor operation (" OP_NAME ": " OP_DESC ") # arg1: tensor operation (" OP_NAME ": " OP_DESC ")
# arg2: data type (0: Input fp32, Weight fp32, Output fp32 # arg2: data type (0: Input fp32, Weight fp32, Output fp32
...@@ -197,6 +197,7 @@ Note: This kernel use atomic add, this will cause output buffer to be accumulate ...@@ -197,6 +197,7 @@ Note: This kernel use atomic add, this will cause output buffer to be accumulate
# arg5: initialization (0: no init, 1: integer value, 2: decimal value) # arg5: initialization (0: no init, 1: integer value, 2: decimal value)
# arg6: print tensor value (0: no; 1: yes) # arg6: print tensor value (0: no; 1: yes)
# arg7: time kernel (0: no, 1: yes) # arg7: time kernel (0: no, 1: yes)
# arg8: operation type (0: ImageToColumn, 1: ColumnToImage)
# Following arguments (depending on number of spatial dims): # Following arguments (depending on number of spatial dims):
# Number of spatial dimensions (1=Conv1d, 2=Conv2d, 3=Conv3d) # Number of spatial dimensions (1=Conv1d, 2=Conv2d, 3=Conv3d)
# G, N, K, C, # G, N, K, C,
...@@ -207,8 +208,8 @@ Note: This kernel use atomic add, this will cause output buffer to be accumulate ...@@ -207,8 +208,8 @@ Note: This kernel use atomic add, this will cause output buffer to be accumulate
# <left padding>, (ie LeftPy, LeftPx for 2D) # <left padding>, (ie LeftPy, LeftPx for 2D)
# <right padding>, (ie RightPy, RightPx for 2D) # <right padding>, (ie RightPy, RightPx for 2D)
################ op datatype layout verify init log time Ndims G N K C Y X Hi Wi Sy Sx Dy Dx LeftPy LeftPx RightPy RightPx ################ op datatype layout verify init log time opType Ndims G N K C Y X Hi Wi Sy Sx Dy Dx LeftPy LeftPx RightPy RightPx
./bin/ckProfiler image_to_column 0 0 1 1 0 1 2 1 256 1 512 3 3 28 28 1 1 1 1 0 0 0 0 ./bin/ckProfiler conv_tensor_rearrange 0 0 0 1 0 1 0 2 1 256 1 512 3 3 28 28 1 1 1 1 0 0 0 0
``` ```
...@@ -222,3 +223,4 @@ name: DeviceImageToColumn<128, 32, 64, 4> ...@@ -222,3 +223,4 @@ name: DeviceImageToColumn<128, 32, 64, 4>
avg_time: 3.12326 avg_time: 3.12326
GB/s: 2042.59 GB/s: 2042.59
``` ```
Note: Column to image kernel adds to the output memory, this will cause output buffer to be accumulated multiple times, causing verification failure. To work around it, do not use CK's own timer and do verification at the same time.
...@@ -9,9 +9,11 @@ ...@@ -9,9 +9,11 @@
#include <limits> #include <limits>
#include "ck/ck.hpp" #include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/device_image_to_column.hpp" #include "ck/tensor_operation/gpu/device/device_conv_tensor_rearrange.hpp"
#include "ck/tensor_operation/gpu/device/conv_tensor_rearrange_op.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_image_to_column_impl.hpp" #include "ck/tensor_operation/gpu/device/impl/device_image_to_column_impl.hpp"
#include "ck/library/tensor_operation_instance/gpu/image_to_column.hpp" #include "ck/tensor_operation/gpu/device/impl/device_column_to_image_impl.hpp"
#include "ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange.hpp"
#include "ck/library/utility/check_err.hpp" #include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/device_memory.hpp" #include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/host_tensor.hpp" #include "ck/library/utility/host_tensor.hpp"
...@@ -19,22 +21,88 @@ ...@@ -19,22 +21,88 @@
#include "ck/library/utility/convolution_parameter.hpp" #include "ck/library/utility/convolution_parameter.hpp"
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp" #include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_image_to_column.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_image_to_column.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_column_to_image.hpp"
namespace ck { namespace ck {
namespace profiler { namespace profiler {
template <ck::index_t... Is> template <ck::index_t... Is>
using S = ck::Sequence<Is...>; using S = ck::Sequence<Is...>;
using namespace conv_tensor_rearrange_op;
template <typename InputDataType, typename ConvTensorRearrangeOp>
Tensor<InputDataType> create_input(const HostTensorDescriptor& image_desc,
const HostTensorDescriptor& gemm_desc)
{
if constexpr(std::is_same_v<ConvTensorRearrangeOp, ImageToColumn>)
{
Tensor<InputDataType> input(image_desc);
return input;
}
else if constexpr(std::is_same_v<ConvTensorRearrangeOp, ColumnToImage>)
{
Tensor<InputDataType> input(gemm_desc);
return input;
}
else
{
throw std::runtime_error("Unsupported op!");
}
}
template <typename OutputDataType, typename ConvTensorRearrangeOp>
Tensor<OutputDataType> create_output(const HostTensorDescriptor& image_desc,
const HostTensorDescriptor& gemm_desc)
{
if constexpr(std::is_same_v<ConvTensorRearrangeOp, ImageToColumn>)
{
Tensor<OutputDataType> output(gemm_desc);
return output;
}
else if constexpr(std::is_same_v<ConvTensorRearrangeOp, ColumnToImage>)
{
Tensor<OutputDataType> output(image_desc);
return output;
}
else
{
throw std::runtime_error("Unsupported op!");
}
}
template <index_t NDimSpatial,
typename InputLayout,
typename InputDataType,
typename OutputDataType,
typename ConvTensorRearrangeOp>
static auto make_ref_op()
{
if constexpr(std::is_same_v<ConvTensorRearrangeOp, ImageToColumn>)
{
return ck::tensor_operation::host::
ReferenceImageToColumn<NDimSpatial, InputLayout, InputDataType, OutputDataType>{};
}
else if constexpr(std::is_same_v<ConvTensorRearrangeOp, ColumnToImage>)
{
return ck::tensor_operation::host::
ReferenceColumnToImage<NDimSpatial, InputLayout, InputDataType, OutputDataType>{};
}
else
{
throw std::runtime_error("Unsupported op!");
}
}
template <index_t NDimSpatial, template <index_t NDimSpatial,
typename InputLayout, typename InputLayout,
typename InputDataType, typename InputDataType,
typename OutputDataType> typename OutputDataType,
bool profile_image_to_column_impl(int do_verification, typename ConvTensorRearrangeOp>
int init_method, bool profile_conv_tensor_rearrange_impl(int do_verification,
bool do_log, int init_method,
bool time_kernel, bool do_log,
const ck::utils::conv::ConvParam& conv_param) bool time_kernel,
const ck::utils::conv::ConvParam& conv_param)
{ {
const ck::index_t NDoHoWo = const ck::index_t NDoHoWo =
conv_param.N_ * conv_param.N_ *
...@@ -45,16 +113,16 @@ bool profile_image_to_column_impl(int do_verification, ...@@ -45,16 +113,16 @@ bool profile_image_to_column_impl(int do_verification,
ck::accumulate_n<ck::index_t>( ck::accumulate_n<ck::index_t>(
conv_param.filter_spatial_lengths_.begin(), NDimSpatial, 1, std::multiplies<>()); conv_param.filter_spatial_lengths_.begin(), NDimSpatial, 1, std::multiplies<>());
const auto in_desc = const auto image_desc =
ck::utils::conv::make_input_host_tensor_descriptor_g_n_c_wis_packed<InputLayout>( ck::utils::conv::make_input_host_tensor_descriptor_g_n_c_wis_packed<InputLayout>(
conv_param); conv_param);
const auto out_desc = HostTensorDescriptor({NDoHoWo, CZYX}); const auto gemm_desc = HostTensorDescriptor({NDoHoWo, CZYX});
std::array<ck::index_t, NDimSpatial> input_spatial_lengths{}; std::array<ck::index_t, NDimSpatial> input_spatial_lengths{};
std::array<ck::index_t, NDimSpatial> filter_spatial_lengths{}; std::array<ck::index_t, NDimSpatial> filter_spatial_lengths{};
std::array<ck::index_t, NDimSpatial> output_spatial_lengths{}; std::array<ck::index_t, NDimSpatial> output_spatial_lengths{};
std::array<ck::index_t, NDimSpatial + 3> input_g_n_c_wis_strides{}; std::array<ck::index_t, NDimSpatial + 3> image_g_n_c_wis_strides{};
std::array<ck::index_t, 2> output_m_k_strides{}; std::array<ck::index_t, 2> gemm_m_k_strides{};
std::array<ck::index_t, NDimSpatial> conv_filter_strides{}; std::array<ck::index_t, NDimSpatial> conv_filter_strides{};
std::array<ck::index_t, NDimSpatial> conv_filter_dilations{}; std::array<ck::index_t, NDimSpatial> conv_filter_dilations{};
std::array<ck::index_t, NDimSpatial> input_left_pads{}; std::array<ck::index_t, NDimSpatial> input_left_pads{};
...@@ -65,16 +133,19 @@ bool profile_image_to_column_impl(int do_verification, ...@@ -65,16 +133,19 @@ bool profile_image_to_column_impl(int do_verification,
copy(conv_param.input_spatial_lengths_, input_spatial_lengths); copy(conv_param.input_spatial_lengths_, input_spatial_lengths);
copy(conv_param.filter_spatial_lengths_, filter_spatial_lengths); copy(conv_param.filter_spatial_lengths_, filter_spatial_lengths);
copy(conv_param.output_spatial_lengths_, output_spatial_lengths); copy(conv_param.output_spatial_lengths_, output_spatial_lengths);
copy(in_desc.GetStrides(), input_g_n_c_wis_strides); copy(image_desc.GetStrides(), image_g_n_c_wis_strides);
copy(out_desc.GetStrides(), output_m_k_strides); copy(gemm_desc.GetStrides(), gemm_m_k_strides);
copy(conv_param.conv_filter_strides_, conv_filter_strides); copy(conv_param.conv_filter_strides_, conv_filter_strides);
copy(conv_param.conv_filter_dilations_, conv_filter_dilations); copy(conv_param.conv_filter_dilations_, conv_filter_dilations);
copy(conv_param.input_left_pads_, input_left_pads); copy(conv_param.input_left_pads_, input_left_pads);
copy(conv_param.input_right_pads_, input_right_pads); copy(conv_param.input_right_pads_, input_right_pads);
Tensor<InputDataType> input(in_desc); Tensor<InputDataType> input =
Tensor<OutputDataType> host_output(out_desc); create_input<InputDataType, ConvTensorRearrangeOp>(image_desc, gemm_desc);
Tensor<OutputDataType> device_output(out_desc); Tensor<OutputDataType> device_output =
create_output<OutputDataType, ConvTensorRearrangeOp>(image_desc, gemm_desc);
Tensor<OutputDataType> host_output =
create_output<OutputDataType, ConvTensorRearrangeOp>(image_desc, gemm_desc);
std::cout << "input: " << input.mDesc << std::endl; std::cout << "input: " << input.mDesc << std::endl;
std::cout << "output: " << host_output.mDesc << std::endl; std::cout << "output: " << host_output.mDesc << std::endl;
...@@ -94,17 +165,21 @@ bool profile_image_to_column_impl(int do_verification, ...@@ -94,17 +165,21 @@ bool profile_image_to_column_impl(int do_verification,
// run reference op // run reference op
if(do_verification) if(do_verification)
{ {
auto ref_image_to_column = ck::tensor_operation::host:: auto ref_conv_tensor_rearrange = make_ref_op<NDimSpatial,
ReferenceImageToColumn<NDimSpatial, InputLayout, InputDataType, OutputDataType>{}; InputLayout,
InputDataType,
OutputDataType,
ConvTensorRearrangeOp>();
auto ref_invoker = ref_image_to_column.MakeInvoker(); auto ref_invoker = ref_conv_tensor_rearrange.MakeInvoker();
auto ref_argument = ref_image_to_column.MakeArgument(input, auto ref_argument =
host_output, ref_conv_tensor_rearrange.MakeArgument(input,
conv_param.filter_spatial_lengths_, host_output,
conv_param.conv_filter_strides_, conv_param.filter_spatial_lengths_,
conv_param.conv_filter_dilations_, conv_param.conv_filter_strides_,
conv_param.input_left_pads_, conv_param.conv_filter_dilations_,
conv_param.input_right_pads_); conv_param.input_left_pads_,
conv_param.input_right_pads_);
// init host output to zero // init host output to zero
host_output.SetZero(); host_output.SetZero();
...@@ -112,8 +187,11 @@ bool profile_image_to_column_impl(int do_verification, ...@@ -112,8 +187,11 @@ bool profile_image_to_column_impl(int do_verification,
ref_invoker.Run(ref_argument); ref_invoker.Run(ref_argument);
} }
using DeviceOp = ck::tensor_operation::device:: using DeviceOp = ck::tensor_operation::device::DeviceConvTensorRearrange<NDimSpatial,
DeviceImageToColumn<NDimSpatial, InputLayout, InputDataType, OutputDataType>; InputLayout,
InputDataType,
OutputDataType,
ConvTensorRearrangeOp>;
// get device op instances // get device op instances
const auto op_ptrs = ck::tensor_operation::device::instance::DeviceOperationInstanceFactory< const auto op_ptrs = ck::tensor_operation::device::instance::DeviceOperationInstanceFactory<
...@@ -139,8 +217,8 @@ bool profile_image_to_column_impl(int do_verification, ...@@ -139,8 +217,8 @@ bool profile_image_to_column_impl(int do_verification,
input_spatial_lengths, input_spatial_lengths,
filter_spatial_lengths, filter_spatial_lengths,
output_spatial_lengths, output_spatial_lengths,
input_g_n_c_wis_strides, image_g_n_c_wis_strides,
output_m_k_strides, gemm_m_k_strides,
conv_filter_strides, conv_filter_strides,
conv_filter_dilations, conv_filter_dilations,
input_left_pads, input_left_pads,
......
...@@ -28,7 +28,7 @@ set(PROFILER_SOURCES ...@@ -28,7 +28,7 @@ set(PROFILER_SOURCES
profile_contraction_bilinear.cpp profile_contraction_bilinear.cpp
profile_contraction_scale.cpp profile_contraction_scale.cpp
profile_grouped_conv_bwd_data.cpp profile_grouped_conv_bwd_data.cpp
profile_image_to_column.cpp profile_conv_tensor_rearrange.cpp
) )
if(DL_KERNELS) if(DL_KERNELS)
list(APPEND PROFILER_SOURCES profile_batched_gemm_multi_d.cpp) list(APPEND PROFILER_SOURCES profile_batched_gemm_multi_d.cpp)
...@@ -84,6 +84,7 @@ target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_max_pool_bwd_instanc ...@@ -84,6 +84,7 @@ target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_max_pool_bwd_instanc
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_grouped_conv2d_bwd_data_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_grouped_conv2d_bwd_data_instance)
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_grouped_conv3d_bwd_data_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_grouped_conv3d_bwd_data_instance)
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_image_to_column_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_image_to_column_instance)
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_column_to_image_instance)
if(DL_KERNELS) if(DL_KERNELS)
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_batched_gemm_multi_d_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_batched_gemm_multi_d_instance)
endif() endif()
......
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream> #include <iostream>
#include <numeric> #include <numeric>
#include <initializer_list> #include <initializer_list>
#include <cstdlib> #include <cstdlib>
#include "profiler/profile_image_to_column_impl.hpp" #include "profiler/profile_conv_tensor_rearrange_impl.hpp"
#include "profiler_operation_registry.hpp" #include "profiler_operation_registry.hpp"
namespace { namespace {
enum struct RearrangeOp
{
ImageToColumn, // 0
ColumnToImage, // 1
};
enum struct ConvLayout enum struct ConvLayout
{ {
NHWC, // 0 NHWC, // 0
...@@ -24,8 +30,8 @@ enum struct DataType ...@@ -24,8 +30,8 @@ enum struct DataType
INT8_INT8, // 3 INT8_INT8, // 3
}; };
#define OP_NAME "image_to_column" #define OP_NAME "conv_tensor_rearrange"
#define OP_DESC "Image To Column" #define OP_DESC "Conv Tensor Rearrange"
static void print_helper_msg() static void print_helper_msg()
{ {
...@@ -41,16 +47,17 @@ static void print_helper_msg() ...@@ -41,16 +47,17 @@ static void print_helper_msg()
<< "arg5: initialization (0: no init, 1: integer value, 2: decimal value)\n" << "arg5: initialization (0: no init, 1: integer value, 2: decimal value)\n"
<< "arg6: print tensor value (0: no; 1: yes)\n" << "arg6: print tensor value (0: no; 1: yes)\n"
<< "arg7: time kernel (0: no, 1: yes)\n" << "arg7: time kernel (0: no, 1: yes)\n"
<< "arg8: operation type (0: ImageToColumn, 1: ColumnToImage)\n"
<< ck::utils::conv::get_conv_param_parser_helper_msg() << std::endl; << ck::utils::conv::get_conv_param_parser_helper_msg() << std::endl;
// clang-format on // clang-format on
} }
} // namespace } // namespace
int profile_image_to_column(int argc, char* argv[]) int profile_conv_tensor_rearrange(int argc, char* argv[])
{ {
// 8 for control, 1 for num_dim_spatial // 9 for control, 1 for num_dim_spatial
if(argc < 9) if(argc < 10)
{ {
print_helper_msg(); print_helper_msg();
return 1; return 1;
...@@ -62,16 +69,17 @@ int profile_image_to_column(int argc, char* argv[]) ...@@ -62,16 +69,17 @@ int profile_image_to_column(int argc, char* argv[])
const int init_method = std::stoi(argv[5]); const int init_method = std::stoi(argv[5]);
const bool do_log = std::stoi(argv[6]); const bool do_log = std::stoi(argv[6]);
const bool time_kernel = std::stoi(argv[7]); const bool time_kernel = std::stoi(argv[7]);
const int num_dim_spatial = std::stoi(argv[8]); const auto rearrange_op = static_cast<RearrangeOp>(std::stoi(argv[8]));
const int num_dim_spatial = std::stoi(argv[9]);
// 8 for control, 1 for num_dim_spatial, 4 for G/N/K/C, and 6 * num_dim_spatial // 9 for control, 1 for num_dim_spatial, 4 for G/N/K/C, and 6 * num_dim_spatial
if(argc != 8 + 1 + 4 + 6 * num_dim_spatial) if(argc != 9 + 1 + 4 + 6 * num_dim_spatial)
{ {
print_helper_msg(); print_helper_msg();
return 1; return 1;
} }
const auto params = ck::utils::conv::parse_conv_param(num_dim_spatial, 9, argv); const auto params = ck::utils::conv::parse_conv_param(num_dim_spatial, 10, argv);
using F32 = float; using F32 = float;
using F16 = ck::half_t; using F16 = ck::half_t;
...@@ -79,12 +87,17 @@ int profile_image_to_column(int argc, char* argv[]) ...@@ -79,12 +87,17 @@ int profile_image_to_column(int argc, char* argv[])
using INT8 = int8_t; using INT8 = int8_t;
using namespace ck::tensor_layout::convolution; using namespace ck::tensor_layout::convolution;
using namespace ck::conv_tensor_rearrange_op;
constexpr auto I1 = ck::Number<1>{}; constexpr auto I1 = ck::Number<1>{};
constexpr auto I2 = ck::Number<2>{}; constexpr auto I2 = ck::Number<2>{};
constexpr auto I3 = ck::Number<3>{}; constexpr auto I3 = ck::Number<3>{};
auto profile = [&](auto num_dim_spatial_tmp, auto in_layout, auto in_type, auto out_type) { auto profile = [&](auto num_dim_spatial_tmp,
auto in_layout,
auto in_type,
auto out_type,
auto rearrange_op_type) {
constexpr ck::index_t NDimSpatial = num_dim_spatial_tmp.value; constexpr ck::index_t NDimSpatial = num_dim_spatial_tmp.value;
using InLayout = decltype(in_layout); using InLayout = decltype(in_layout);
...@@ -92,78 +105,147 @@ int profile_image_to_column(int argc, char* argv[]) ...@@ -92,78 +105,147 @@ int profile_image_to_column(int argc, char* argv[])
using InDataType = decltype(in_type); using InDataType = decltype(in_type);
using OutDataType = decltype(out_type); using OutDataType = decltype(out_type);
using Op = decltype(rearrange_op_type);
bool pass = ck::profiler:: bool pass = ck::profiler::
profile_image_to_column_impl<NDimSpatial, InLayout, InDataType, OutDataType>( profile_conv_tensor_rearrange_impl<NDimSpatial, InLayout, InDataType, OutDataType, Op>(
do_verification, init_method, do_log, time_kernel, params); do_verification, init_method, do_log, time_kernel, params);
return pass ? 0 : 1; return pass ? 0 : 1;
}; };
// NHWC // Image To Column
if(layout == ConvLayout::NHWC) if(rearrange_op == RearrangeOp::ImageToColumn)
{ {
if(num_dim_spatial == 1) // NHWC
if(layout == ConvLayout::NHWC)
{ {
if(data_type == DataType::F32_F32) if(num_dim_spatial == 1)
{
return profile(I1, GNWC{}, F32{}, F32{});
}
else if(data_type == DataType::F16_F16)
{ {
return profile(I1, GNWC{}, F16{}, F16{}); if(data_type == DataType::F32_F32)
{
return profile(I1, GNWC{}, F32{}, F32{}, ImageToColumn{});
}
else if(data_type == DataType::F16_F16)
{
return profile(I1, GNWC{}, F16{}, F16{}, ImageToColumn{});
}
else if(data_type == DataType::BF16_BF16)
{
return profile(I1, GNWC{}, BF16{}, BF16{}, ImageToColumn{});
}
else if(data_type == DataType::INT8_INT8)
{
return profile(I1, GNWC{}, INT8{}, INT8{}, ImageToColumn{});
}
} }
else if(data_type == DataType::BF16_BF16) else if(num_dim_spatial == 2)
{ {
return profile(I1, GNWC{}, BF16{}, BF16{}); if(data_type == DataType::F32_F32)
{
return profile(I2, GNHWC{}, F32{}, F32{}, ImageToColumn{});
}
else if(data_type == DataType::F16_F16)
{
return profile(I2, GNHWC{}, F16{}, F16{}, ImageToColumn{});
}
else if(data_type == DataType::BF16_BF16)
{
return profile(I2, GNHWC{}, BF16{}, BF16{}, ImageToColumn{});
}
else if(data_type == DataType::INT8_INT8)
{
return profile(I2, GNHWC{}, INT8{}, INT8{}, ImageToColumn{});
}
} }
else if(data_type == DataType::INT8_INT8) else if(num_dim_spatial == 3)
{ {
return profile(I1, GNWC{}, INT8{}, INT8{}); if(data_type == DataType::F32_F32)
{
return profile(I3, GNDHWC{}, F32{}, F32{}, ImageToColumn{});
}
else if(data_type == DataType::F16_F16)
{
return profile(I3, GNDHWC{}, F16{}, F16{}, ImageToColumn{});
}
else if(data_type == DataType::BF16_BF16)
{
return profile(I3, GNDHWC{}, BF16{}, BF16{}, ImageToColumn{});
}
else if(data_type == DataType::INT8_INT8)
{
return profile(I3, GNDHWC{}, INT8{}, INT8{}, ImageToColumn{});
}
} }
} }
else if(num_dim_spatial == 2) }
{ else if(rearrange_op == RearrangeOp::ColumnToImage)
if(data_type == DataType::F32_F32) {
{ // NHWC
return profile(I2, GNHWC{}, F32{}, F32{}); if(layout == ConvLayout::NHWC)
}
else if(data_type == DataType::F16_F16)
{
return profile(I2, GNHWC{}, F16{}, F16{});
}
else if(data_type == DataType::BF16_BF16)
{
return profile(I2, GNHWC{}, BF16{}, BF16{});
}
else if(data_type == DataType::INT8_INT8)
{
return profile(I2, GNHWC{}, INT8{}, INT8{});
}
}
else if(num_dim_spatial == 3)
{ {
if(data_type == DataType::F32_F32) if(num_dim_spatial == 1)
{ {
return profile(I3, GNDHWC{}, F32{}, F32{}); if(data_type == DataType::F32_F32)
{
return profile(I1, GNWC{}, F32{}, F32{}, ColumnToImage{});
}
else if(data_type == DataType::F16_F16)
{
return profile(I1, GNWC{}, F16{}, F16{}, ColumnToImage{});
}
else if(data_type == DataType::BF16_BF16)
{
return profile(I1, GNWC{}, BF16{}, BF16{}, ColumnToImage{});
}
else if(data_type == DataType::INT8_INT8)
{
return profile(I1, GNWC{}, INT8{}, INT8{}, ColumnToImage{});
}
} }
else if(data_type == DataType::F16_F16) else if(num_dim_spatial == 2)
{ {
return profile(I3, GNDHWC{}, F16{}, F16{}); if(data_type == DataType::F32_F32)
{
return profile(I2, GNHWC{}, F32{}, F32{}, ColumnToImage{});
}
else if(data_type == DataType::F16_F16)
{
return profile(I2, GNHWC{}, F16{}, F16{}, ColumnToImage{});
}
else if(data_type == DataType::BF16_BF16)
{
return profile(I2, GNHWC{}, BF16{}, BF16{}, ColumnToImage{});
}
else if(data_type == DataType::INT8_INT8)
{
return profile(I2, GNHWC{}, INT8{}, INT8{}, ColumnToImage{});
}
} }
else if(data_type == DataType::BF16_BF16) else if(num_dim_spatial == 3)
{ {
return profile(I3, GNDHWC{}, BF16{}, BF16{}); if(data_type == DataType::F32_F32)
} {
else if(data_type == DataType::INT8_INT8) return profile(I3, GNDHWC{}, F32{}, F32{}, ColumnToImage{});
{ }
return profile(I3, GNDHWC{}, INT8{}, INT8{}); else if(data_type == DataType::F16_F16)
{
return profile(I3, GNDHWC{}, F16{}, F16{}, ColumnToImage{});
}
else if(data_type == DataType::BF16_BF16)
{
return profile(I3, GNDHWC{}, BF16{}, BF16{}, ColumnToImage{});
}
else if(data_type == DataType::INT8_INT8)
{
return profile(I3, GNDHWC{}, INT8{}, INT8{}, ColumnToImage{});
}
} }
} }
} }
std::cout << "this data_type & layout is not implemented" << std::endl; std::cout << "this data_type & layout is not implemented" << std::endl;
return 1; return 1;
} }
REGISTER_PROFILER_OPERATION(OP_NAME, OP_DESC, profile_image_to_column); REGISTER_PROFILER_OPERATION(OP_NAME, OP_DESC, profile_conv_tensor_rearrange);
...@@ -155,7 +155,7 @@ add_subdirectory(contraction) ...@@ -155,7 +155,7 @@ add_subdirectory(contraction)
add_subdirectory(pool) add_subdirectory(pool)
add_subdirectory(batched_gemm_multi_d) add_subdirectory(batched_gemm_multi_d)
add_subdirectory(grouped_convnd_bwd_data) add_subdirectory(grouped_convnd_bwd_data)
add_subdirectory(image_to_column) add_subdirectory(conv_tensor_rearrange)
if(GPU_TARGETS MATCHES "gfx11") if(GPU_TARGETS MATCHES "gfx11")
add_subdirectory(wmma_op) add_subdirectory(wmma_op)
endif() endif()
add_gtest_executable(test_conv_tensor_rearrange test_conv_tensor_rearrange.cpp)
target_link_libraries(test_conv_tensor_rearrange PRIVATE utility device_image_to_column_instance device_column_to_image_instance)
add_gtest_executable(test_conv_tensor_rearrange_interface test_conv_tensor_rearrange_interface.cpp)
target_link_libraries(test_conv_tensor_rearrange_interface PRIVATE utility)
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib> #include <cstdlib>
#include <iostream> #include <iostream>
...@@ -9,29 +9,29 @@ ...@@ -9,29 +9,29 @@
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "profiler/profile_image_to_column_impl.hpp" #include "profiler/profile_conv_tensor_rearrange_impl.hpp"
template <typename Tuple> template <typename Tuple>
class TestImageToColumn : public ::testing::Test class TestConvTensorRearrange : public ::testing::Test
{ {
protected: protected:
using InDataType = std::tuple_element_t<0, Tuple>; using ImLayout = std::tuple_element_t<0, Tuple>;
using OutDataType = std::tuple_element_t<1, Tuple>; using ConvTensorRearrangeOp = std::tuple_element_t<1, Tuple>;
using InLayout = std::tuple_element_t<2, Tuple>;
std::vector<ck::utils::conv::ConvParam> conv_params; std::vector<ck::utils::conv::ConvParam> conv_params;
template <ck::index_t NDimSpatial> template <ck::index_t NDimSpatial, typename InDataType, typename OutDataType>
void Run() void Run()
{ {
EXPECT_FALSE(conv_params.empty()); EXPECT_FALSE(conv_params.empty());
bool pass = true; bool pass = true;
for(auto& param : conv_params) for(auto& param : conv_params)
{ {
pass = pass && ck::profiler::profile_image_to_column_impl<NDimSpatial, pass = pass && ck::profiler::profile_conv_tensor_rearrange_impl<NDimSpatial,
InLayout, ImLayout,
InDataType, InDataType,
OutDataType>( OutDataType,
ConvTensorRearrangeOp>(
true, // do_verification true, // do_verification
1, // init_method: integer value 1, // init_method: integer value
false, // do_log false, // do_log
...@@ -43,48 +43,43 @@ class TestImageToColumn : public ::testing::Test ...@@ -43,48 +43,43 @@ class TestImageToColumn : public ::testing::Test
}; };
using namespace ck::tensor_layout::convolution; using namespace ck::tensor_layout::convolution;
using namespace ck::conv_tensor_rearrange_op;
using KernelTypes1d = ::testing::Types<std::tuple<float, float, GNWC>, using KernelTypes1d =
std::tuple<ck::bhalf_t, ck::bhalf_t, GNWC>, ::testing::Types<std::tuple<GNWC, ImageToColumn>, std::tuple<GNWC, ColumnToImage>>;
std::tuple<ck::half_t, ck::half_t, GNWC>,
std::tuple<int8_t, int8_t, GNWC>>;
using KernelTypes2d = ::testing::Types<std::tuple<float, float, GNHWC>, using KernelTypes2d =
std::tuple<ck::bhalf_t, ck::bhalf_t, GNHWC>, ::testing::Types<std::tuple<GNHWC, ImageToColumn>, std::tuple<GNHWC, ColumnToImage>>;
std::tuple<ck::half_t, ck::half_t, GNHWC>,
std::tuple<int8_t, int8_t, GNHWC>>;
using KernelTypes3d = ::testing::Types<std::tuple<float, float, GNDHWC>, using KernelTypes3d =
std::tuple<ck::bhalf_t, ck::bhalf_t, GNDHWC>, ::testing::Types<std::tuple<GNDHWC, ImageToColumn>, std::tuple<GNDHWC, ColumnToImage>>;
std::tuple<ck::half_t, ck::half_t, GNDHWC>,
std::tuple<int8_t, int8_t, GNDHWC>>;
template <typename Tuple> template <typename Tuple>
class TestImageToColumn1d : public TestImageToColumn<Tuple> class TestConvTensorRearrange1d : public TestConvTensorRearrange<Tuple>
{ {
}; };
template <typename Tuple> template <typename Tuple>
class TestImageToColumn2d : public TestImageToColumn<Tuple> class TestConvTensorRearrange2d : public TestConvTensorRearrange<Tuple>
{ {
}; };
template <typename Tuple> template <typename Tuple>
class TestImageToColumn3d : public TestImageToColumn<Tuple> class TestConvTensorRearrange3d : public TestConvTensorRearrange<Tuple>
{ {
}; };
TYPED_TEST_SUITE(TestImageToColumn1d, KernelTypes1d); TYPED_TEST_SUITE(TestConvTensorRearrange1d, KernelTypes1d);
TYPED_TEST_SUITE(TestImageToColumn2d, KernelTypes2d); TYPED_TEST_SUITE(TestConvTensorRearrange2d, KernelTypes2d);
TYPED_TEST_SUITE(TestImageToColumn3d, KernelTypes3d); TYPED_TEST_SUITE(TestConvTensorRearrange3d, KernelTypes3d);
TYPED_TEST(TestImageToColumn1d, Test1D) TYPED_TEST(TestConvTensorRearrange1d, Test1D)
{ {
this->conv_params.clear(); this->conv_params.clear();
this->conv_params.push_back({1, 1, 4, 1, 192, {3}, {28}, {1}, {1}, {1}, {1}}); this->conv_params.push_back({1, 1, 4, 1, 192, {3}, {28}, {1}, {1}, {1}, {1}});
this->conv_params.push_back({1, 1, 64, 1, 64, {3}, {14}, {1}, {1}, {1}, {1}}); this->conv_params.push_back({1, 1, 64, 1, 64, {3}, {14}, {1}, {1}, {1}, {1}});
this->conv_params.push_back({1, 1, 64, 1, 64, {1}, {7}, {2}, {1}, {0}, {0}}); this->conv_params.push_back({1, 1, 64, 1, 64, {1}, {7}, {3}, {1}, {0}, {0}});
this->conv_params.push_back({1, 1, 64, 1, 64, {1}, {3}, {1}, {1}, {0}, {0}}); this->conv_params.push_back({1, 1, 64, 1, 64, {1}, {3}, {1}, {1}, {0}, {0}});
// ScalarPerVector should be 1 // ScalarPerVector should be 1
this->conv_params.push_back({1, 1, 4, 1, 1, {3}, {28}, {1}, {1}, {1}, {1}}); this->conv_params.push_back({1, 1, 4, 1, 1, {3}, {28}, {1}, {1}, {1}, {1}});
...@@ -92,10 +87,21 @@ TYPED_TEST(TestImageToColumn1d, Test1D) ...@@ -92,10 +87,21 @@ TYPED_TEST(TestImageToColumn1d, Test1D)
this->conv_params.push_back({1, 1, 1, 1, 4, {3}, {28}, {2}, {1}, {1}, {1}}); this->conv_params.push_back({1, 1, 1, 1, 4, {3}, {28}, {2}, {1}, {1}, {1}});
// dilation != 1 // dilation != 1
this->conv_params.push_back({1, 1, 1, 1, 4, {3}, {28}, {1}, {2}, {1}, {1}}); this->conv_params.push_back({1, 1, 1, 1, 4, {3}, {28}, {1}, {2}, {1}, {1}});
this->template Run<1>(); #ifdef CK_ENABLE_FP32
this->template Run<1, float, float>();
#endif
#ifdef CK_ENABLE_BF16
this->template Run<1, ck::bhalf_t, ck::bhalf_t>();
#endif
#ifdef CK_ENABLE_FP16
this->template Run<1, ck::half_t, ck::half_t>();
#endif
#ifdef CK_ENABLE_INT8
this->template Run<1, int8_t, int8_t>();
#endif
} }
TYPED_TEST(TestImageToColumn2d, Test2D) TYPED_TEST(TestConvTensorRearrange2d, Test2D)
{ {
this->conv_params.clear(); this->conv_params.clear();
...@@ -103,19 +109,45 @@ TYPED_TEST(TestImageToColumn2d, Test2D) ...@@ -103,19 +109,45 @@ TYPED_TEST(TestImageToColumn2d, Test2D)
{2, 1, 4, 1, 192, {3, 3}, {28, 28}, {1, 1}, {1, 1}, {1, 1}, {1, 1}}); {2, 1, 4, 1, 192, {3, 3}, {28, 28}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
this->conv_params.push_back( this->conv_params.push_back(
{2, 1, 64, 1, 64, {3, 3}, {14, 14}, {1, 1}, {1, 1}, {1, 1}, {1, 1}}); {2, 1, 64, 1, 64, {3, 3}, {14, 14}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
this->conv_params.push_back({2, 1, 64, 1, 64, {1, 1}, {7, 7}, {2, 2}, {1, 1}, {0, 0}, {0, 0}}); this->conv_params.push_back({2, 1, 64, 1, 64, {1, 1}, {7, 7}, {3, 3}, {1, 1}, {0, 0}, {0, 0}});
this->conv_params.push_back({2, 1, 64, 1, 64, {1, 1}, {3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}}); this->conv_params.push_back({2, 1, 64, 1, 64, {1, 1}, {3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}});
this->template Run<2>(); this->conv_params.push_back(
{2, 1, 64, 1, 64, {3, 3}, {28, 28}, {2, 2}, {2, 2}, {1, 1}, {1, 1}});
#ifdef CK_ENABLE_FP32
this->template Run<2, float, float>();
#endif
#ifdef CK_ENABLE_BF16
this->template Run<2, ck::bhalf_t, ck::bhalf_t>();
#endif
#ifdef CK_ENABLE_FP16
this->template Run<2, ck::half_t, ck::half_t>();
#endif
#ifdef CK_ENABLE_INT8
this->template Run<2, int8_t, int8_t>();
#endif
} }
TYPED_TEST(TestImageToColumn3d, Test3D) TYPED_TEST(TestConvTensorRearrange3d, Test3D)
{ {
this->conv_params.clear(); this->conv_params.clear();
this->conv_params.push_back( this->conv_params.push_back(
{3, 1, 16, 1, 64, {1, 1, 1}, {7, 7, 7}, {2, 2, 2}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}}); {3, 1, 16, 1, 64, {1, 1, 1}, {7, 7, 7}, {2, 2, 2}, {3, 3, 3}, {0, 0, 0}, {0, 0, 0}});
this->conv_params.push_back( this->conv_params.push_back(
{3, 1, 2, 1, 64, {3, 3, 3}, {14, 14, 3}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}}); {3, 1, 2, 1, 64, {3, 3, 3}, {14, 14, 3}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
this->conv_params.push_back( this->conv_params.push_back(
{3, 1, 32, 1, 64, {1, 1, 1}, {3, 3, 3}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}}); {3, 1, 32, 1, 64, {1, 1, 1}, {3, 3, 3}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
this->template Run<3>(); this->conv_params.push_back(
{3, 1, 64, 1, 64, {3, 3, 3}, {14, 14, 14}, {2, 2, 2}, {2, 2, 2}, {1, 1, 1}, {1, 1, 1}});
#ifdef CK_ENABLE_FP32
this->template Run<3, float, float>();
#endif
#ifdef CK_ENABLE_BF16
this->template Run<3, ck::bhalf_t, ck::bhalf_t>();
#endif
#ifdef CK_ENABLE_FP16
this->template Run<3, ck::half_t, ck::half_t>();
#endif
#ifdef CK_ENABLE_INT8
this->template Run<3, int8_t, int8_t>();
#endif
} }
// SPDX-License-Identifier: MIT // SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. // Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib> #include <cstdlib>
#include <iostream> #include <iostream>
...@@ -10,6 +10,8 @@ ...@@ -10,6 +10,8 @@
#include "ck/ck.hpp" #include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_image_to_column_impl.hpp" #include "ck/tensor_operation/gpu/device/impl/device_image_to_column_impl.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_column_to_image_impl.hpp"
#include "ck/tensor_operation/gpu/device/conv_tensor_rearrange_op.hpp"
#include "ck/library/utility/convolution_parameter.hpp" #include "ck/library/utility/convolution_parameter.hpp"
#include "ck/library/utility/algorithm.hpp" #include "ck/library/utility/algorithm.hpp"
...@@ -18,28 +20,37 @@ ...@@ -18,28 +20,37 @@
#include <gtest/gtest.h> #include <gtest/gtest.h>
using DataType = float; using DataType = float;
using InLayout = ck::tensor_layout::convolution::GNWC; using ImLayout = ck::tensor_layout::convolution::GNWC;
template <ck::index_t... Is> template <ck::index_t... Is>
using S = ck::Sequence<Is...>; using S = ck::Sequence<Is...>;
using namespace ck::conv_tensor_rearrange_op;
template <ck::index_t ScalarPerVector, bool IsCPacked> template <ck::index_t ScalarPerVector, bool IsCPacked>
class TestImageToColumnInterface : public ::testing::Test class TestConvTensorRearrangeInterface : public ::testing::Test
{ {
protected: protected:
static constexpr ck::index_t NDimSpatial = 1; static constexpr ck::index_t NDimSpatial = 1;
// clang-format off // clang-format off
using DeviceImgToColInstance = ck::tensor_operation::device::DeviceImageToColumnImpl using DeviceImgToColInstance = ck::tensor_operation::device::DeviceImageToColumnImpl
//#####################| Num| InLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar| // Num| ImLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
//#####################| Dim| | | | Size| Block| Block| Cluster| Per| // Dim| | | | Size| Block| Block| Cluster| Per|
//#####################| Spatial| | | | | | | Lengths| Vector| // Spatial| | | | | | | Lengths| Vector|
//#####################| | | | | | | | | | // | | | | | | | | |
< NDimSpatial, InLayout, DataType, DataType, 256, 128, 128, S<16, 16>,ScalarPerVector>; < NDimSpatial, ImLayout, DataType, DataType, 256, 128, 128, S<16, 16>,ScalarPerVector>;
using DeviceColToimgInstance = ck::tensor_operation::device::DeviceColumnToImageImpl
// Num| ImLayout| InDataType| OutDataType| Block| MPer| KPer| Thread| Scalar|
// Dim| | | | Size| Block| Block| Cluster| Per|
// Spatial| | | | | | | Lengths| Vector|
// | | | | | | | | |
< NDimSpatial, ImLayout, DataType, DataType, 256, 128, 128, S<16, 16>,ScalarPerVector>;
// clang-format on // clang-format on
ck::utils::conv::ConvParam conv_param; ck::utils::conv::ConvParam conv_param;
template <typename ConvTensorRearrangeOp>
bool Run() bool Run()
{ {
...@@ -57,10 +68,10 @@ class TestImageToColumnInterface : public ::testing::Test ...@@ -57,10 +68,10 @@ class TestImageToColumnInterface : public ::testing::Test
ck::accumulate_n<ck::index_t>( ck::accumulate_n<ck::index_t>(
conv_param.filter_spatial_lengths_.begin(), NDimSpatial, 1, std::multiplies<>()); conv_param.filter_spatial_lengths_.begin(), NDimSpatial, 1, std::multiplies<>());
const auto in_desc = const auto image_desc =
ck::utils::conv::make_input_host_tensor_descriptor_g_n_c_wis_packed<InLayout>( ck::utils::conv::make_input_host_tensor_descriptor_g_n_c_wis_packed<ImLayout>(
conv_param); conv_param);
const auto out_desc = HostTensorDescriptor({NDoHoWo, CZYX}); const auto gemm_desc = HostTensorDescriptor({NDoHoWo, CZYX});
std::array<ck::index_t, NDimSpatial> input_spatial_lengths{}; std::array<ck::index_t, NDimSpatial> input_spatial_lengths{};
std::array<ck::index_t, NDimSpatial> filter_spatial_lengths{}; std::array<ck::index_t, NDimSpatial> filter_spatial_lengths{};
...@@ -77,120 +88,173 @@ class TestImageToColumnInterface : public ::testing::Test ...@@ -77,120 +88,173 @@ class TestImageToColumnInterface : public ::testing::Test
copy(conv_param.input_spatial_lengths_, input_spatial_lengths); copy(conv_param.input_spatial_lengths_, input_spatial_lengths);
copy(conv_param.filter_spatial_lengths_, filter_spatial_lengths); copy(conv_param.filter_spatial_lengths_, filter_spatial_lengths);
copy(conv_param.output_spatial_lengths_, output_spatial_lengths); copy(conv_param.output_spatial_lengths_, output_spatial_lengths);
copy(in_desc.GetStrides(), input_g_n_c_wis_strides); copy(image_desc.GetStrides(), input_g_n_c_wis_strides);
copy(out_desc.GetStrides(), output_m_k_strides); copy(gemm_desc.GetStrides(), output_m_k_strides);
copy(conv_param.conv_filter_strides_, conv_filter_strides); copy(conv_param.conv_filter_strides_, conv_filter_strides);
copy(conv_param.conv_filter_dilations_, conv_filter_dilations); copy(conv_param.conv_filter_dilations_, conv_filter_dilations);
copy(conv_param.input_left_pads_, input_left_pads); copy(conv_param.input_left_pads_, input_left_pads);
copy(conv_param.input_right_pads_, input_right_pads); copy(conv_param.input_right_pads_, input_right_pads);
auto img2col = DeviceImgToColInstance{}; if constexpr(std::is_same_v<ConvTensorRearrangeOp, ImageToColumn>)
auto argument = img2col.MakeArgument(nullptr, {
nullptr, auto img2col = DeviceImgToColInstance{};
N, auto argument = img2col.MakeArgument(nullptr,
IsCPacked ? C : FakeC, nullptr,
input_spatial_lengths, N,
filter_spatial_lengths, IsCPacked ? C : FakeC,
output_spatial_lengths, input_spatial_lengths,
input_g_n_c_wis_strides, filter_spatial_lengths,
output_m_k_strides, output_spatial_lengths,
conv_filter_strides, input_g_n_c_wis_strides,
conv_filter_dilations, output_m_k_strides,
input_left_pads, conv_filter_strides,
input_right_pads); conv_filter_dilations,
input_left_pads,
return img2col.IsSupportedArgument(argument); input_right_pads);
return img2col.IsSupportedArgument(argument);
}
else if constexpr(std::is_same_v<ConvTensorRearrangeOp, ColumnToImage>)
{
auto col2img = DeviceColToimgInstance{};
auto argument = col2img.MakeArgument(nullptr,
nullptr,
N,
IsCPacked ? C : FakeC,
input_spatial_lengths,
filter_spatial_lengths,
output_spatial_lengths,
input_g_n_c_wis_strides,
output_m_k_strides,
conv_filter_strides,
conv_filter_dilations,
input_left_pads,
input_right_pads);
return col2img.IsSupportedArgument(argument);
}
} }
}; };
class TestImageToColumnInterface1ScalarPerVector : public TestImageToColumnInterface<1, true> class TestConvTensorRearrangeInterface1ScalarPerVector
: public TestConvTensorRearrangeInterface<1, true>
{ {
}; };
class TestImageToColumnInterface4ScalarPerVector : public TestImageToColumnInterface<4, true> class TestConvTensorRearrangeInterface4ScalarPerVector
: public TestConvTensorRearrangeInterface<4, true>
{ {
}; };
class TestImageToColumnInterface4ScalarPerVectorFakeC : public TestImageToColumnInterface<4, false> class TestConvTensorRearrangeInterface4ScalarPerVectorFakeC
: public TestConvTensorRearrangeInterface<4, false>
{ {
}; };
TEST_F(TestImageToColumnInterface1ScalarPerVector, X1ScalarPerVector) TEST_F(TestConvTensorRearrangeInterface1ScalarPerVector, X1ScalarPerVector)
{ {
// vector load C * X % ScalarPerVector // vector load C * X % ScalarPerVector
this->conv_param = {1, 1, 1, 1, 1, {3}, {3}, {1}, {1}, {0}, {0}}; this->conv_param = {1, 1, 1, 1, 1, {3}, {3}, {1}, {1}, {0}, {0}};
bool is_supported = this->Run(); bool is_supported = this->template Run<ImageToColumn>();
EXPECT_TRUE(is_supported);
is_supported = this->template Run<ColumnToImage>();
EXPECT_TRUE(is_supported); EXPECT_TRUE(is_supported);
// vector load C * left_pad_x % ScalarPerVector // vector load C * left_pad_x % ScalarPerVector
this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {1}, {1}, {3}, {0}}; this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {1}, {1}, {3}, {0}};
is_supported = this->Run(); is_supported = this->template Run<ImageToColumn>();
EXPECT_TRUE(is_supported);
is_supported = this->template Run<ColumnToImage>();
EXPECT_TRUE(is_supported); EXPECT_TRUE(is_supported);
// vector load C * right_pad_x % ScalarPerVector // vector load C * right_pad_x % ScalarPerVector
this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {1}, {1}, {0}, {3}}; this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {1}, {1}, {0}, {3}};
is_supported = this->Run(); is_supported = this->template Run<ImageToColumn>();
EXPECT_TRUE(is_supported);
is_supported = this->template Run<ColumnToImage>();
EXPECT_TRUE(is_supported); EXPECT_TRUE(is_supported);
// vector load C % ScalarPerVector, right_pad and stride // vector load C % ScalarPerVector, right_pad and stride
this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {2}, {1}, {0}, {3}}; this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {2}, {1}, {0}, {3}};
is_supported = this->Run(); is_supported = this->template Run<ImageToColumn>();
EXPECT_TRUE(is_supported);
is_supported = this->template Run<ColumnToImage>();
EXPECT_TRUE(is_supported); EXPECT_TRUE(is_supported);
// vector load C % ScalarPerVector, left_pad and stride // vector load C % ScalarPerVector, left_pad and stride
this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {2}, {1}, {3}, {0}}; this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {2}, {1}, {3}, {0}};
is_supported = this->Run(); is_supported = this->template Run<ImageToColumn>();
EXPECT_TRUE(is_supported);
is_supported = this->template Run<ColumnToImage>();
EXPECT_TRUE(is_supported); EXPECT_TRUE(is_supported);
// vector load C % ScalarPerVector, dilation // vector load C % ScalarPerVector, dilation
this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {1}, {2}, {0}, {0}}; this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {1}, {2}, {0}, {0}};
is_supported = this->Run(); is_supported = this->template Run<ImageToColumn>();
EXPECT_TRUE(is_supported);
is_supported = this->template Run<ColumnToImage>();
EXPECT_TRUE(is_supported); EXPECT_TRUE(is_supported);
// C = 4 // C = 4
this->conv_param = {1, 1, 1, 1, 4, {3}, {3}, {1}, {1}, {3}, {3}}; this->conv_param = {1, 1, 1, 1, 4, {3}, {3}, {1}, {1}, {3}, {3}};
is_supported = this->Run(); is_supported = this->template Run<ImageToColumn>();
EXPECT_TRUE(is_supported);
is_supported = this->template Run<ColumnToImage>();
EXPECT_TRUE(is_supported); EXPECT_TRUE(is_supported);
} }
TEST_F(TestImageToColumnInterface4ScalarPerVector, X4ScalarPerVector) TEST_F(TestConvTensorRearrangeInterface4ScalarPerVector, X4ScalarPerVector)
{ {
// vector load C * X % ScalarPerVector // vector load C * X % ScalarPerVector
this->conv_param = {1, 1, 1, 1, 1, {3}, {3}, {1}, {1}, {0}, {0}}; this->conv_param = {1, 1, 1, 1, 1, {3}, {3}, {1}, {1}, {0}, {0}};
bool is_supported = this->Run(); bool is_supported = this->template Run<ImageToColumn>();
EXPECT_FALSE(is_supported);
is_supported = this->template Run<ColumnToImage>();
EXPECT_FALSE(is_supported); EXPECT_FALSE(is_supported);
// vector load C * left_pad_x % ScalarPerVector // vector load C * left_pad_x % ScalarPerVector
this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {1}, {1}, {3}, {0}}; this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {1}, {1}, {3}, {0}};
is_supported = this->Run(); is_supported = this->template Run<ImageToColumn>();
EXPECT_FALSE(is_supported);
is_supported = this->template Run<ColumnToImage>();
EXPECT_FALSE(is_supported); EXPECT_FALSE(is_supported);
// vector load C * right_pad_x % ScalarPerVector // vector load C * right_pad_x % ScalarPerVector
this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {1}, {1}, {0}, {3}}; this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {1}, {1}, {0}, {3}};
is_supported = this->Run(); is_supported = this->template Run<ImageToColumn>();
EXPECT_FALSE(is_supported);
is_supported = this->template Run<ColumnToImage>();
EXPECT_FALSE(is_supported); EXPECT_FALSE(is_supported);
// vector load C % ScalarPerVector, right_pad and stride // vector load C % ScalarPerVector, right_pad and stride
this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {2}, {1}, {0}, {3}}; this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {2}, {1}, {0}, {3}};
is_supported = this->Run(); is_supported = this->template Run<ImageToColumn>();
EXPECT_FALSE(is_supported);
is_supported = this->template Run<ColumnToImage>();
EXPECT_FALSE(is_supported); EXPECT_FALSE(is_supported);
// vector load C % ScalarPerVector, left_pad and stride // vector load C % ScalarPerVector, left_pad and stride
this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {2}, {1}, {3}, {0}}; this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {2}, {1}, {3}, {0}};
is_supported = this->Run(); is_supported = this->template Run<ImageToColumn>();
EXPECT_FALSE(is_supported);
is_supported = this->template Run<ColumnToImage>();
EXPECT_FALSE(is_supported); EXPECT_FALSE(is_supported);
// vector load C % ScalarPerVector, dilation // vector load C % ScalarPerVector, dilation
this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {1}, {2}, {0}, {0}}; this->conv_param = {1, 1, 1, 1, 1, {4}, {3}, {1}, {2}, {0}, {0}};
is_supported = this->Run(); is_supported = this->template Run<ImageToColumn>();
EXPECT_FALSE(is_supported);
is_supported = this->template Run<ColumnToImage>();
EXPECT_FALSE(is_supported); EXPECT_FALSE(is_supported);
// C = 4 // C = 4
this->conv_param = {1, 1, 1, 1, 4, {3}, {3}, {1}, {1}, {3}, {3}}; this->conv_param = {1, 1, 1, 1, 4, {3}, {3}, {1}, {1}, {3}, {3}};
is_supported = this->Run(); is_supported = this->template Run<ImageToColumn>();
EXPECT_TRUE(is_supported);
is_supported = this->template Run<ColumnToImage>();
EXPECT_TRUE(is_supported); EXPECT_TRUE(is_supported);
} }
TEST_F(TestImageToColumnInterface4ScalarPerVectorFakeC, X4ScalarPerVectorFakeC) TEST_F(TestConvTensorRearrangeInterface4ScalarPerVectorFakeC, X4ScalarPerVectorFakeC)
{ {
// C = 3 // C = 3
this->conv_param = {1, 1, 1, 1, 3, {4}, {3}, {1}, {1}, {0}, {0}}; this->conv_param = {1, 1, 1, 1, 3, {4}, {3}, {1}, {1}, {0}, {0}};
bool is_supported = this->Run(); bool is_supported = this->template Run<ImageToColumn>();
EXPECT_FALSE(is_supported);
is_supported = this->template Run<ColumnToImage>();
EXPECT_FALSE(is_supported); EXPECT_FALSE(is_supported);
// C = 4 // C = 4
this->conv_param = {1, 1, 1, 1, 8, {4}, {3}, {1}, {1}, {0}, {0}}; this->conv_param = {1, 1, 1, 1, 8, {4}, {3}, {1}, {1}, {0}, {0}};
is_supported = this->Run(); is_supported = this->template Run<ImageToColumn>();
EXPECT_TRUE(is_supported);
is_supported = this->template Run<ColumnToImage>();
EXPECT_TRUE(is_supported); EXPECT_TRUE(is_supported);
} }
add_gtest_executable(test_image_to_column test_image_to_column.cpp)
target_link_libraries(test_image_to_column PRIVATE utility device_image_to_column_instance)
add_gtest_executable(test_image_to_column_interface test_image_to_column_interface.cpp)
target_link_libraries(test_image_to_column_interface PRIVATE utility)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment