Commit 5f50ed89 authored by Po-Yen, Chen's avatar Po-Yen, Chen
Browse files

Add example for demonstrating bundle multiple elems in tensor

parent 82cc8731
...@@ -2,6 +2,8 @@ add_custom_target(example_permute) ...@@ -2,6 +2,8 @@ add_custom_target(example_permute)
add_example_executable(example_permute_1xHxW_fp32 permute_1xHxW_fp32.cpp) add_example_executable(example_permute_1xHxW_fp32 permute_1xHxW_fp32.cpp)
add_example_executable(example_permute_NxHxW_fp32 permute_NxHxW_fp32.cpp) add_example_executable(example_permute_NxHxW_fp32 permute_NxHxW_fp32.cpp)
add_example_executable(example_permute_HxWx4_fp16 permute_HxWx4_fp16.cpp)
add_dependencies(example_permute example_permute_1xHxW_fp32) add_dependencies(example_permute example_permute_1xHxW_fp32)
add_dependencies(example_permute example_permute_NxHxW_fp32) add_dependencies(example_permute example_permute_NxHxW_fp32)
add_dependencies(example_permute example_permute_HxWx4_fp16)
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
using F16 = ck::half_t; using F16 = ck::half_t;
using F32 = float; using F32 = float;
using F64 = double;
struct ExecutionConfig final struct ExecutionConfig final
{ {
...@@ -53,6 +54,36 @@ using PassThrough = ck::tensor_operation::element_wise::PassThrough; ...@@ -53,6 +54,36 @@ using PassThrough = ck::tensor_operation::element_wise::PassThrough;
namespace detail { namespace detail {
template <typename Bundle, std::size_t Divisor>
struct get_bundled;
template <typename Bundle>
struct get_bundled<Bundle, 1>
{
using type = Bundle;
};
template <>
struct get_bundled<F64, 2>
{
using type = F32;
};
template <>
struct get_bundled<F64, 4>
{
using type = F16;
};
template <>
struct get_bundled<F32, 2>
{
using type = F16;
};
template <typename Bundle, std::size_t Divisor>
using get_bundled_t = typename get_bundled<Bundle, Divisor>::type;
template <typename T, typename = void> template <typename T, typename = void>
struct is_iterator : std::false_type struct is_iterator : std::false_type
{ {
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "common.hpp"
using ADataType = F64;
using BDataType = F64;
// clang-format off
using DevicePermuteInstance = ck::tensor_operation::device::DevicePermute
// ######| InData| OutData| Elementwise| NumDim| Block| HPer| WPer| InBlock| InBlockTransfer| InBlockTransfer| Src| Dst| Src| Dst|
// ######| Type| Type| Operation| | Size| Block| Block| LdsExtraW| ThreadClusterLengths| ThreadClusterArrangeOrder| VectorDim| VectorDim| ScalarPerVector| ScalarPerVector|
// ######| | | | | | | | | | | | | | |
// ######| | | | | | | | | | | | | | |
< ADataType, BDataType, PassThrough, 3, 256, 128, 128, 0, S<1, 16, 16>, S<0, 1, 2>, 2, 1, 1, 1>;
// clang-format on
#define NUM_ELEMS_IN_BUNDLE 4
#include "run_permute_example.inc"
int main(int argc, char* argv[])
{
return !run_permute_example(argc, argv, {1, 160, 80}, {0, 2, 1});
}
...@@ -3,6 +3,10 @@ ...@@ -3,6 +3,10 @@
#pragma once #pragma once
#ifndef NUM_ELEMS_IN_BUNDLE
#define NUM_ELEMS_IN_BUNDLE 1
#endif
bool run_permute(const ExecutionConfig& config, const Problem& problem) bool run_permute(const ExecutionConfig& config, const Problem& problem)
{ {
using std::begin, std::end; using std::begin, std::end;
...@@ -14,12 +18,17 @@ bool run_permute(const ExecutionConfig& config, const Problem& problem) ...@@ -14,12 +18,17 @@ bool run_permute(const ExecutionConfig& config, const Problem& problem)
Tensor<ADataType> a(shape); Tensor<ADataType> a(shape);
Tensor<BDataType> b(transposed_shape); Tensor<BDataType> b(transposed_shape);
std::iota(begin(a.mData), end(a.mData), 1); using std::data, std::size;
{
auto* const elems =
reinterpret_cast<detail::get_bundled_t<ADataType, NUM_ELEMS_IN_BUNDLE>*>(data(a.mData));
std::iota(elems, elems + (size(a.mData) * NUM_ELEMS_IN_BUNDLE), 1);
}
DeviceMem a_device_buf(sizeof(ADataType) * a.mDesc.GetElementSpaceSize()); DeviceMem a_device_buf(sizeof(ADataType) * a.mDesc.GetElementSpaceSize());
DeviceMem b_device_buf(sizeof(BDataType) * b.mDesc.GetElementSpaceSize()); DeviceMem b_device_buf(sizeof(BDataType) * b.mDesc.GetElementSpaceSize());
a_device_buf.ToDevice(a.mData.data()); a_device_buf.ToDevice(data(a.mData));
std::array<ck::index_t, 3> a_lengths, b_lengths; std::array<ck::index_t, 3> a_lengths, b_lengths;
std::array<ck::index_t, 3> a_strides, b_strides; std::array<ck::index_t, 3> a_strides, b_strides;
...@@ -55,7 +64,7 @@ bool run_permute(const ExecutionConfig& config, const Problem& problem) ...@@ -55,7 +64,7 @@ bool run_permute(const ExecutionConfig& config, const Problem& problem)
Tensor<BDataType> host_b(transposed_shape); Tensor<BDataType> host_b(transposed_shape);
host_permute(a, problem.axes, PassThrough{}, host_b); host_permute(a, problem.axes, PassThrough{}, host_b);
b_device_buf.FromDevice(b.mData.data()); b_device_buf.FromDevice(data(b.mData));
return ck::utils::check_err( return ck::utils::check_err(
b.mData, host_b.mData, "Error: incorrect results in output tensor", 1e-10, 1e-10); b.mData, host_b.mData, "Error: incorrect results in output tensor", 1e-10, 1e-10);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment