Commit 506df423 authored by Chao Liu's avatar Chao Liu
Browse files

refactor

parent b6bfde53
...@@ -2,9 +2,9 @@ ...@@ -2,9 +2,9 @@
#define CK_DUMMY_DYNAMIC_TRANSFORM_V1_HPP #define CK_DUMMY_DYNAMIC_TRANSFORM_V1_HPP
#include "common_header.hpp" #include "common_header.hpp"
#include "dynamic_tensor_descriptor.hpp" #include "dynamic_tensor_descriptor_v1.hpp"
#include "dynamic_tensor_descriptor_helper.hpp" #include "dynamic_tensor_descriptor_helper_v1.hpp"
#include "dynamic_tensor_coordinate.hpp" #include "dynamic_tensor_coordinate_v1.hpp"
namespace ck { namespace ck {
...@@ -565,7 +565,7 @@ struct DummyDynamicTransform_v1 ...@@ -565,7 +565,7 @@ struct DummyDynamicTransform_v1
const index_t niter = p_wei_global[10]; const index_t niter = p_wei_global[10];
auto in_gemmk_gemmn_coord = auto in_gemmk_gemmn_coord =
make_dynamic_tensor_coordinate<2>(in_gemmk_gemmn_global_desc, idx); make_dynamic_tensor_coordinate_v1(in_gemmk_gemmn_global_desc, idx);
for(index_t iter = 0; iter < niter; ++iter) for(index_t iter = 0; iter < niter; ++iter)
{ {
......
...@@ -2,16 +2,16 @@ ...@@ -2,16 +2,16 @@
#define CK_DUMMY_DYNAMIC_TRANSFORM_V2_HPP #define CK_DUMMY_DYNAMIC_TRANSFORM_V2_HPP
#include "common_header.hpp" #include "common_header.hpp"
#include "dynamic_tensor_descriptor_v2.hpp" #include "dynamic_tensor_descriptor.hpp"
#include "dynamic_tensor_descriptor_helper_v2.hpp" #include "dynamic_tensor_descriptor_helper.hpp"
namespace ck { namespace ck {
template <typename... Wei, typename... In, typename... Out> template <typename... Wei, typename... In, typename... Out>
__host__ __device__ constexpr auto map_convolution_into_gemm_fwd_v4r4( __host__ __device__ constexpr auto
const DynamicTensorDescriptor_v2<Wei...>& wei_k_c_y_x_global_desc, map_convolution_into_gemm_fwd_v4r4(const DynamicTensorDescriptor<Wei...>& wei_k_c_y_x_global_desc,
const DynamicTensorDescriptor_v2<In...>& in_n_c_hi_wi_global_desc, const DynamicTensorDescriptor<In...>& in_n_c_hi_wi_global_desc,
const DynamicTensorDescriptor_v2<Out...>& out_n_k_ho_wo_global_desc, const DynamicTensorDescriptor<Out...>& out_n_k_ho_wo_global_desc,
const MultiIndex<2> conv_strides, const MultiIndex<2> conv_strides,
const MultiIndex<2> conv_dilations, const MultiIndex<2> conv_dilations,
const MultiIndex<2> in_left_pads, const MultiIndex<2> in_left_pads,
...@@ -47,8 +47,8 @@ __host__ __device__ constexpr auto map_convolution_into_gemm_fwd_v4r4( ...@@ -47,8 +47,8 @@ __host__ __device__ constexpr auto map_convolution_into_gemm_fwd_v4r4(
const index_t InRightPadW = in_right_pads[I1]; const index_t InRightPadW = in_right_pads[I1];
// input tensor // input tensor
const auto in_n_c_hip_wip_global_desc = transform_dynamic_tensor_descriptor_v2( const auto in_n_c_hip_wip_global_desc = transform_dynamic_tensor_descriptor(
transform_dynamic_tensor_descriptor_v2( transform_dynamic_tensor_descriptor(
in_n_c_hi_wi_global_desc, in_n_c_hi_wi_global_desc,
make_tuple(DynamicPassThrough{N}, make_tuple(DynamicPassThrough{N},
DynamicPassThrough{C}, DynamicPassThrough{C},
...@@ -66,7 +66,7 @@ __host__ __device__ constexpr auto map_convolution_into_gemm_fwd_v4r4( ...@@ -66,7 +66,7 @@ __host__ __device__ constexpr auto map_convolution_into_gemm_fwd_v4r4(
const index_t Hip = in_n_c_hip_wip_global_desc.GetLength(I2); const index_t Hip = in_n_c_hip_wip_global_desc.GetLength(I2);
const index_t Wip = in_n_c_hip_wip_global_desc.GetLength(I3); const index_t Wip = in_n_c_hip_wip_global_desc.GetLength(I3);
const auto in_n_c_y_ho_x_wo_global_desc = transform_dynamic_tensor_descriptor_v2( const auto in_n_c_y_ho_x_wo_global_desc = transform_dynamic_tensor_descriptor(
in_n_c_hip_wip_global_desc, in_n_c_hip_wip_global_desc,
make_tuple( make_tuple(
DynamicPassThrough{N}, DynamicPassThrough{N},
...@@ -76,7 +76,7 @@ __host__ __device__ constexpr auto map_convolution_into_gemm_fwd_v4r4( ...@@ -76,7 +76,7 @@ __host__ __device__ constexpr auto map_convolution_into_gemm_fwd_v4r4(
make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2>{}, Sequence<3>{}), make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2>{}, Sequence<3>{}),
make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2, 3>{}, Sequence<4, 5>{})); make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2, 3>{}, Sequence<4, 5>{}));
const auto in_gemmktotal_gemmn_global_desc = transform_dynamic_tensor_descriptor_v2( const auto in_gemmktotal_gemmn_global_desc = transform_dynamic_tensor_descriptor(
in_n_c_y_ho_x_wo_global_desc, in_n_c_y_ho_x_wo_global_desc,
make_tuple(DynamicMerge<3>{make_multi_index(C, Y, X)}, make_tuple(DynamicMerge<3>{make_multi_index(C, Y, X)},
DynamicMerge<3>{make_multi_index(N, Ho, Wo)}), DynamicMerge<3>{make_multi_index(N, Ho, Wo)}),
...@@ -89,7 +89,7 @@ __host__ __device__ constexpr auto map_convolution_into_gemm_fwd_v4r4( ...@@ -89,7 +89,7 @@ __host__ __device__ constexpr auto map_convolution_into_gemm_fwd_v4r4(
constexpr index_t GemmKPack = 8; constexpr index_t GemmKPack = 8;
const index_t gemmk = gemmktotal / GemmKPack; const index_t gemmk = gemmktotal / GemmKPack;
const auto in_gemmk_gemmn_gemmkpack_global_desc = transform_dynamic_tensor_descriptor_v2( const auto in_gemmk_gemmn_gemmkpack_global_desc = transform_dynamic_tensor_descriptor(
in_gemmktotal_gemmn_global_desc, in_gemmktotal_gemmn_global_desc,
make_tuple(DynamicUnMerge<2>{make_multi_index(gemmk, GemmKPack)}, make_tuple(DynamicUnMerge<2>{make_multi_index(gemmk, GemmKPack)},
DynamicPassThrough{gemmn}), DynamicPassThrough{gemmn}),
...@@ -105,9 +105,9 @@ __host__ __device__ constexpr auto map_convolution_into_gemm_fwd_v4r4( ...@@ -105,9 +105,9 @@ __host__ __device__ constexpr auto map_convolution_into_gemm_fwd_v4r4(
#if 0 #if 0
template <typename... Wei, typename... In, typename... Out> template <typename... Wei, typename... In, typename... Out>
__host__ __device__ constexpr auto map_convolution_into_gemm_bwd_v4r1( __host__ __device__ constexpr auto map_convolution_into_gemm_bwd_v4r1(
const DynamicTensorDescriptor_v2<Wei...>& wei_k_c_y_x_global_desc, const DynamicTensorDescriptor<Wei...>& wei_k_c_y_x_global_desc,
const DynamicTensorDescriptor_v2<In...>& in_n_c_hi_wi_global_desc, const DynamicTensorDescriptor<In...>& in_n_c_hi_wi_global_desc,
const DynamicTensorDescriptor_v2<Out...>& out_n_k_ho_wo_global_desc, const DynamicTensorDescriptor<Out...>& out_n_k_ho_wo_global_desc,
const MultiIndex<2> conv_strides, const MultiIndex<2> conv_strides,
const MultiIndex<2> conv_dilations, const MultiIndex<2> conv_dilations,
const MultiIndex<2> in_left_pads, const MultiIndex<2> in_left_pads,
...@@ -148,7 +148,7 @@ __host__ __device__ constexpr auto map_convolution_into_gemm_bwd_v4r1( ...@@ -148,7 +148,7 @@ __host__ __device__ constexpr auto map_convolution_into_gemm_bwd_v4r1(
constexpr bool out_skip_out_of_bound_check = true; constexpr bool out_skip_out_of_bound_check = true;
#endif #endif
constexpr auto out_n_k_ydot_htilda_xdot_wtilda_global_desc = transform_tensor_descriptor_v2( constexpr auto out_n_k_ydot_htilda_xdot_wtilda_global_desc = transform_tensor_descriptor(
out_n_k_ho_wo_global_desc, out_n_k_ho_wo_global_desc,
make_tuple(PassThrough{N}, make_tuple(PassThrough{N},
PassThrough{K}, PassThrough{K},
...@@ -158,7 +158,7 @@ __host__ __device__ constexpr auto map_convolution_into_gemm_bwd_v4r1( ...@@ -158,7 +158,7 @@ __host__ __device__ constexpr auto map_convolution_into_gemm_bwd_v4r1(
make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2, 3>{}, Sequence<4, 5>{})); make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2, 3>{}, Sequence<4, 5>{}));
constexpr auto out_n_k_ydot_htildaslice_xdot_wtildaslice_global_desc = constexpr auto out_n_k_ydot_htildaslice_xdot_wtildaslice_global_desc =
transform_tensor_descriptor_v2( transform_tensor_descriptor(
out_n_k_ydot_htilda_xdot_wtilda_global_desc, out_n_k_ydot_htilda_xdot_wtilda_global_desc,
make_tuple(PassThrough{N}, make_tuple(PassThrough{N},
PassThrough{K}, PassThrough{K},
...@@ -230,15 +230,14 @@ struct DummyDynamicTransform_v2_1 ...@@ -230,15 +230,14 @@ struct DummyDynamicTransform_v2_1
const index_t niter = p_wei_global[10]; const index_t niter = p_wei_global[10];
auto in_gemmk_gemmn_coord = auto in_gemmk_gemmn_coord = make_dynamic_tensor_coordinate(in_gemmk_gemmn_global_desc, idx);
make_dynamic_tensor_coordinate_v2(in_gemmk_gemmn_global_desc, idx);
const auto in_gemmk_gemmn_coord_step = make_dynamic_tensor_coordinate_step_v2( const auto in_gemmk_gemmn_coord_step =
in_gemmk_gemmn_global_desc, make_multi_index(1, 0)); make_dynamic_tensor_coordinate_step(in_gemmk_gemmn_global_desc, make_multi_index(1, 0));
for(index_t iter = 0; iter < niter; ++iter) for(index_t iter = 0; iter < niter; ++iter)
{ {
move_dynamic_tensor_coordinate_v2( move_dynamic_tensor_coordinate(
in_gemmk_gemmn_global_desc, in_gemmk_gemmn_coord, in_gemmk_gemmn_coord_step); in_gemmk_gemmn_global_desc, in_gemmk_gemmn_coord, in_gemmk_gemmn_coord_step);
// write // write
...@@ -308,7 +307,7 @@ struct DummyDynamicTransform_v2_1 ...@@ -308,7 +307,7 @@ struct DummyDynamicTransform_v2_1
const index_t InRightPadW = in_right_pads[i1]; const index_t InRightPadW = in_right_pads[i1];
#if 0 #if 0
const auto in_n_c_hip_wip_global_desc = transform_dynamic_tensor_descriptor_v2( const auto in_n_c_hip_wip_global_desc = transform_dynamic_tensor_descriptor(
move(in_n_c_hi_wi_global_desc), move(in_n_c_hi_wi_global_desc),
make_tuple(DynamicPassThrough{N}, make_tuple(DynamicPassThrough{N},
DynamicPassThrough{C}, DynamicPassThrough{C},
...@@ -317,7 +316,7 @@ struct DummyDynamicTransform_v2_1 ...@@ -317,7 +316,7 @@ struct DummyDynamicTransform_v2_1
make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2>{}, Sequence<3>{}), make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2>{}, Sequence<3>{}),
make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2>{}, Sequence<3>{})); make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2>{}, Sequence<3>{}));
#elif 0 #elif 0
const auto in_n_c_hip_wip_global_desc = transform_dynamic_tensor_descriptor_v2( const auto in_n_c_hip_wip_global_desc = transform_dynamic_tensor_descriptor(
move(in_n_c_hi_wi_global_desc), move(in_n_c_hi_wi_global_desc),
make_tuple(DynamicPassThrough{N}, make_tuple(DynamicPassThrough{N},
DynamicPassThrough{C}, DynamicPassThrough{C},
...@@ -326,8 +325,8 @@ struct DummyDynamicTransform_v2_1 ...@@ -326,8 +325,8 @@ struct DummyDynamicTransform_v2_1
make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2>{}, Sequence<3>{}), make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2>{}, Sequence<3>{}),
make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2>{}, Sequence<3>{})); make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2>{}, Sequence<3>{}));
#else #else
const auto in_n_c_hip_wip_global_desc = transform_dynamic_tensor_descriptor_v2( const auto in_n_c_hip_wip_global_desc = transform_dynamic_tensor_descriptor(
transform_dynamic_tensor_descriptor_v2( transform_dynamic_tensor_descriptor(
move(in_n_c_hi_wi_global_desc), move(in_n_c_hi_wi_global_desc),
make_tuple(DynamicPassThrough{N}, make_tuple(DynamicPassThrough{N},
DynamicPassThrough{C}, DynamicPassThrough{C},
...@@ -351,14 +350,14 @@ struct DummyDynamicTransform_v2_1 ...@@ -351,14 +350,14 @@ struct DummyDynamicTransform_v2_1
#if 1 #if 1
const index_t niter = p_wei_global[10]; const index_t niter = p_wei_global[10];
auto in_coord = make_dynamic_tensor_coordinate_v2(in_n_c_hip_wip_global_desc, idx); auto in_coord = make_dynamic_tensor_coordinate(in_n_c_hip_wip_global_desc, idx);
const auto in_coord_step = make_dynamic_tensor_coordinate_step_v2( const auto in_coord_step = make_dynamic_tensor_coordinate_step(
in_n_c_hip_wip_global_desc, make_multi_index(1, 0, 0, 0)); in_n_c_hip_wip_global_desc, make_multi_index(1, 0, 0, 0));
for(index_t iter = 0; iter < niter; ++iter) for(index_t iter = 0; iter < niter; ++iter)
{ {
move_dynamic_tensor_coordinate_v2(in_n_c_hip_wip_global_desc, in_coord, in_coord_step); move_dynamic_tensor_coordinate(in_n_c_hip_wip_global_desc, in_coord, in_coord_step);
// write // write
float value = 1; float value = 1;
...@@ -381,7 +380,7 @@ struct DummyDynamicTransform_v2_1 ...@@ -381,7 +380,7 @@ struct DummyDynamicTransform_v2_1
} }
#else #else
// write // write
// auto in_coord = make_dynamic_tensor_coordinate_v2(in_n_c_hi_wi_global_desc, idx); // auto in_coord = make_dynamic_tensor_coordinate(in_n_c_hi_wi_global_desc, idx);
p_out_global[in_n_c_hip_wip_global_desc.CalculateOffset(idx)] = 1; p_out_global[in_n_c_hip_wip_global_desc.CalculateOffset(idx)] = 1;
#endif #endif
...@@ -429,24 +428,21 @@ struct DummyDynamicTransform_v2_fwd_v4r4 ...@@ -429,24 +428,21 @@ struct DummyDynamicTransform_v2_fwd_v4r4
const index_t niter = p_wei_global[10]; const index_t niter = p_wei_global[10];
auto in_gemmk_gemmn_gemmkpack_coord = auto in_gemmk_gemmn_gemmkpack_coord =
make_dynamic_tensor_coordinate_v2(in_gemmk_gemmn_gemmkpack_global_desc, idx); make_dynamic_tensor_coordinate(in_gemmk_gemmn_gemmkpack_global_desc, idx);
const auto in_gemmk_gemmn_gemmkpack_coord_step_0_0_1 = const auto in_gemmk_gemmn_gemmkpack_coord_step_0_0_1 = make_dynamic_tensor_coordinate_step(
make_dynamic_tensor_coordinate_step_v2(in_gemmk_gemmn_gemmkpack_global_desc, in_gemmk_gemmn_gemmkpack_global_desc, make_multi_index(0, 0, 1));
make_multi_index(0, 0, 1));
const auto in_gemmk_gemmn_gemmkpack_coord_step_0_1_0 = const auto in_gemmk_gemmn_gemmkpack_coord_step_0_1_0 = make_dynamic_tensor_coordinate_step(
make_dynamic_tensor_coordinate_step_v2(in_gemmk_gemmn_gemmkpack_global_desc, in_gemmk_gemmn_gemmkpack_global_desc, make_multi_index(0, 1, 0));
make_multi_index(0, 1, 0));
const auto in_gemmk_gemmn_gemmkpack_coord_step_1_0_0 = const auto in_gemmk_gemmn_gemmkpack_coord_step_1_0_0 = make_dynamic_tensor_coordinate_step(
make_dynamic_tensor_coordinate_step_v2(in_gemmk_gemmn_gemmkpack_global_desc, in_gemmk_gemmn_gemmkpack_global_desc, make_multi_index(1, 0, 0));
make_multi_index(1, 0, 0));
// move (0, 0, 1) // move (0, 0, 1)
for(index_t iter = 0; iter < niter; ++iter) for(index_t iter = 0; iter < niter; ++iter)
{ {
move_dynamic_tensor_coordinate_v2(in_gemmk_gemmn_gemmkpack_global_desc, move_dynamic_tensor_coordinate(in_gemmk_gemmn_gemmkpack_global_desc,
in_gemmk_gemmn_gemmkpack_coord, in_gemmk_gemmn_gemmkpack_coord,
in_gemmk_gemmn_gemmkpack_coord_step_0_0_1); in_gemmk_gemmn_gemmkpack_coord_step_0_0_1);
...@@ -478,7 +474,7 @@ struct DummyDynamicTransform_v2_fwd_v4r4 ...@@ -478,7 +474,7 @@ struct DummyDynamicTransform_v2_fwd_v4r4
// move (0, 1, 0) // move (0, 1, 0)
for(index_t iter = 0; iter < niter; ++iter) for(index_t iter = 0; iter < niter; ++iter)
{ {
move_dynamic_tensor_coordinate_v2(in_gemmk_gemmn_gemmkpack_global_desc, move_dynamic_tensor_coordinate(in_gemmk_gemmn_gemmkpack_global_desc,
in_gemmk_gemmn_gemmkpack_coord, in_gemmk_gemmn_gemmkpack_coord,
in_gemmk_gemmn_gemmkpack_coord_step_0_1_0); in_gemmk_gemmn_gemmkpack_coord_step_0_1_0);
...@@ -510,7 +506,7 @@ struct DummyDynamicTransform_v2_fwd_v4r4 ...@@ -510,7 +506,7 @@ struct DummyDynamicTransform_v2_fwd_v4r4
// move (1, 0, 0) // move (1, 0, 0)
for(index_t iter = 0; iter < niter; ++iter) for(index_t iter = 0; iter < niter; ++iter)
{ {
move_dynamic_tensor_coordinate_v2(in_gemmk_gemmn_gemmkpack_global_desc, move_dynamic_tensor_coordinate(in_gemmk_gemmn_gemmkpack_global_desc,
in_gemmk_gemmn_gemmkpack_coord, in_gemmk_gemmn_gemmkpack_coord,
in_gemmk_gemmn_gemmkpack_coord_step_1_0_0); in_gemmk_gemmn_gemmkpack_coord_step_1_0_0);
......
#ifndef CK_DYNAMIC_TENSOR_COORDINATE_HPP #ifndef CK_DYNAMIC_TENSOR_COORDINATE_V1_HPP
#define CK_DYNAMIC_TENSOR_COORDINATE_HPP #define CK_DYNAMIC_TENSOR_COORDINATE_V1_HPP
#include "common_header.hpp" #include "common_header.hpp"
#include "dynamic_tensor_descriptor.hpp" #include "dynamic_tensor_descriptor_v1.hpp"
namespace ck { namespace ck {
...@@ -19,20 +19,20 @@ namespace ck { ...@@ -19,20 +19,20 @@ namespace ck {
// 1. Given step size in each dimension, update itself, or return a new tensor cooridnate, so user // 1. Given step size in each dimension, update itself, or return a new tensor cooridnate, so user
// can freely move the "point of location" inside the tensor // can freely move the "point of location" inside the tensor
// wrapper class for DynamicNativeTensorCoordinate and DynamicTransformedTensorCoordinate // wrapper class for DynamicNativeTensorCoordinate_v1 and DynamicTransformedTensorCoordinate_v1
template <typename TensorDesc> template <typename TensorDesc>
struct DynamicTensorCoordinate; struct DynamicTensorCoordinate_v1;
// tensor coordinate for native tensor // tensor coordinate for native tensor
template <typename TensorDesc> template <typename TensorDesc>
struct DynamicNativeTensorCoordinate struct DynamicNativeTensorCoordinate_v1
{ {
using type = DynamicNativeTensorCoordinate; using type = DynamicNativeTensorCoordinate_v1;
using tensor_desc_type = TensorDesc; using tensor_desc_type = TensorDesc;
static constexpr index_t NDim = tensor_desc_type::GetNumOfDimension(); static constexpr index_t NDim = tensor_desc_type::GetNumOfDimension();
using Index = MultiIndex<NDim>; using Index = MultiIndex<NDim>;
__host__ __device__ explicit constexpr DynamicNativeTensorCoordinate( __host__ __device__ explicit constexpr DynamicNativeTensorCoordinate_v1(
const tensor_desc_type& tensor_desc, const Index& idx) const tensor_desc_type& tensor_desc, const Index& idx)
: tensor_desc_{tensor_desc}, idx_{idx}, offset_{tensor_desc.CalculateOffset(idx)} : tensor_desc_{tensor_desc}, idx_{idx}, offset_{tensor_desc.CalculateOffset(idx)}
{ {
...@@ -118,17 +118,17 @@ struct DynamicNativeTensorCoordinate ...@@ -118,17 +118,17 @@ struct DynamicNativeTensorCoordinate
// tensor coordinate for transformed tensor // tensor coordinate for transformed tensor
template <typename TensorDesc> template <typename TensorDesc>
struct DynamicTransformedTensorCoordinate struct DynamicTransformedTensorCoordinate_v1
{ {
static constexpr index_t NDimUp = TensorDesc::GetNumOfDimension(); static constexpr index_t NDimUp = TensorDesc::GetNumOfDimension();
using UpperDesc = TensorDesc; using UpperDesc = TensorDesc;
using UpperCoord = DynamicTransformedTensorCoordinate; using UpperCoord = DynamicTransformedTensorCoordinate_v1;
using UpperIndex = MultiIndex<NDimUp>; using UpperIndex = MultiIndex<NDimUp>;
using LowerDesc = typename UpperDesc::LowerDesc; using LowerDesc = typename UpperDesc::LowerDesc;
using LowerCoord = typename DynamicTensorCoordinate<LowerDesc>::type; using LowerCoord = typename DynamicTensorCoordinate_v1<LowerDesc>::type;
__host__ __device__ explicit constexpr DynamicTransformedTensorCoordinate( __host__ __device__ explicit constexpr DynamicTransformedTensorCoordinate_v1(
const UpperDesc& tensor_desc_up, const UpperIndex& idx_up) const UpperDesc& tensor_desc_up, const UpperIndex& idx_up)
: tensor_desc_up_{tensor_desc_up}, : tensor_desc_up_{tensor_desc_up},
idx_up_{idx_up}, idx_up_{idx_up},
...@@ -240,30 +240,32 @@ struct DynamicTransformedTensorCoordinate ...@@ -240,30 +240,32 @@ struct DynamicTransformedTensorCoordinate
template <index_t NDim> template <index_t NDim>
__host__ __device__ constexpr auto __host__ __device__ constexpr auto
make_dynamic_tensor_coordinate(const DynamicNativeTensorDescriptor<NDim>& tensor_desc, make_dynamic_tensor_coordinate_v1(const DynamicNativeTensorDescriptor_v1<NDim>& tensor_desc,
const MultiIndex<NDim>& idx) const MultiIndex<NDim>& idx)
{ {
return DynamicNativeTensorCoordinate<DynamicNativeTensorDescriptor<NDim>>{tensor_desc, idx}; return DynamicNativeTensorCoordinate_v1<DynamicNativeTensorDescriptor_v1<NDim>>{tensor_desc,
idx};
} }
template <index_t NDim, typename... Ts> template <index_t NDim, typename... Ts>
__host__ __device__ constexpr auto __host__ __device__ constexpr auto
make_dynamic_tensor_coordinate(const DynamicTransformedTensorDescriptor<Ts...>& tensor_desc, make_dynamic_tensor_coordinate_v1(const DynamicTransformedTensorDescriptor_v1<Ts...>& tensor_desc,
const MultiIndex<NDim>& idx) const MultiIndex<NDim>& idx)
{ {
static_assert(DynamicTransformedTensorDescriptor<Ts...>::GetNumOfDimension() == NDim, static_assert(DynamicTransformedTensorDescriptor_v1<Ts...>::GetNumOfDimension() == NDim,
"wrong! inconsistent # of dimensions"); "wrong! inconsistent # of dimensions");
return DynamicTransformedTensorCoordinate<DynamicTransformedTensorDescriptor<Ts...>>{ return DynamicTransformedTensorCoordinate_v1<DynamicTransformedTensorDescriptor_v1<Ts...>>{
tensor_desc, idx}; tensor_desc, idx};
} }
template <typename TensorDesc> template <typename TensorDesc>
struct DynamicTensorCoordinate struct DynamicTensorCoordinate_v1
{ {
static constexpr index_t NDim = TensorDesc::GetNumOfDimension(); static constexpr index_t NDim = TensorDesc::GetNumOfDimension();
using type = decltype(make_dynamic_tensor_coordinate<NDim>(TensorDesc{}, MultiIndex<NDim>{})); using type =
decltype(make_dynamic_tensor_coordinate_v1<NDim>(TensorDesc{}, MultiIndex<NDim>{}));
}; };
} // namespace ck } // namespace ck
......
...@@ -6,336 +6,607 @@ ...@@ -6,336 +6,607 @@
namespace ck { namespace ck {
template <index_t NDim> template <index_t NDimHidden, typename VisibleDimensionIds>
struct DynamicNativeTensorDescriptor struct DynamicTensorCoordinate;
template <index_t NTransform, index_t NDimVisible>
struct DynamicTensorCoordinateStep;
template <typename TensorDesc, typename VisibleIndex>
__host__ __device__ constexpr auto make_dynamic_tensor_coordinate(const TensorDesc& tensor_desc,
const VisibleIndex& idx_visible);
template <typename TensorDesc, typename VisibleIndex>
__host__ __device__ constexpr auto
make_dynamic_tensor_coordinate_step(const TensorDesc&, const VisibleIndex& idx_diff_visible);
template <typename TensorDesc, typename TensorCoord, typename TensorCoordStep>
__host__ __device__ void move_dynamic_tensor_coordinate(const TensorDesc& tensor_desc,
TensorCoord& coord,
const TensorCoordStep& coord_step);
template <typename TensorDesc, typename TensorCoord>
__host__ __device__ constexpr bool
coordinate_has_valid_offset_assuming_visible_index_is_valid(const TensorDesc& tensor_desc,
const TensorCoord& coord);
template <typename TensorDesc, typename TensorCoord>
__host__ __device__ constexpr bool coordinate_has_valid_offset(const TensorDesc& tensor_desc,
const TensorCoord& coord);
// Transforms: Tuple<transforms...>
// LowerDimensionIdss : Tuple<Sequence<...>, ...>
// UpperDimensionIdss : Tuple<Sequence<...>, ...>
// VisibleDimensionIds> : Sequence<...>
template <typename Transforms,
typename LowerDimensionIdss,
typename UpperDimensionIdss,
typename VisibleDimensionIds>
struct DynamicTensorDescriptor
{ {
using Index = MultiIndex<NDim>; // private:
__host__ __device__ static constexpr index_t GetNumOfTransform() { return Transforms::Size(); }
const Index lengths_; __host__ __device__ static constexpr index_t GetNumOfVisibleDimension()
const Index strides_;
__host__ __device__ explicit constexpr DynamicNativeTensorDescriptor(const Index& lengths,
const Index& strides)
: lengths_{lengths}, strides_{strides}
{ {
return VisibleDimensionIds::Size();
} }
__host__ __device__ explicit constexpr DynamicNativeTensorDescriptor() __host__ __device__ static constexpr index_t GetNumOfHiddenDimension()
: lengths_{make_zero_multi_index<NDim>()}, strides_{make_zero_multi_index<NDim>()}
{ {
} constexpr auto all_low_dim_ids =
unpack([](auto&&... xs) constexpr { return merge_sequences(xs...); },
LowerDimensionIdss{});
__host__ __device__ static constexpr index_t GetNumOfDimension() { return NDim; } constexpr auto all_up_dim_ids =
unpack([](auto&&... xs) constexpr { return merge_sequences(xs...); },
UpperDimensionIdss{});
__host__ __device__ constexpr auto GetLengths() const { return lengths_; } constexpr auto all_dim_ids = merge_sequences(all_low_dim_ids, all_up_dim_ids);
__host__ __device__ constexpr auto GetStrides() const { return strides_; } using unique_sort_all_dim_ids = typename sequence_unique_sort<decltype(all_dim_ids),
math::less<index_t>,
math::equal<index_t>>::type;
template <index_t IDim> return unique_sort_all_dim_ids::Size();
__host__ __device__ constexpr index_t GetLength(Number<IDim>) const
{
return lengths_[Number<IDim>{}];
} }
template <index_t IDim> constexpr static index_t ntransform_ = GetNumOfTransform();
__host__ __device__ constexpr index_t GetStride(Number<IDim>) const constexpr static index_t ndim_visible_ = GetNumOfVisibleDimension();
constexpr static index_t ndim_hidden_ = GetNumOfHiddenDimension();
using VisibleIndex = MultiIndex<ndim_visible_>;
using HiddenIndex = MultiIndex<ndim_hidden_>;
using Coordinate = DynamicTensorCoordinate<ndim_hidden_, VisibleDimensionIds>;
using CoordinateStep = DynamicTensorCoordinateStep<ntransform_, ndim_visible_>;
// public:
__host__ __device__ explicit constexpr DynamicTensorDescriptor(const Transforms& transforms,
index_t element_space_size)
: transforms_{transforms},
hidden_lengths_{InitializeHiddenLengths(transforms_, element_space_size)},
visible_lengths_{hidden_lengths_}
{ {
return strides_[Number<IDim>{}]; static_assert(Transforms::Size() == ntransform_ &&
LowerDimensionIdss::Size() == ntransform_ &&
UpperDimensionIdss::Size() == ntransform_,
"wrong! inconsistent # of transformations");
// TODO check dependency of dimensions is valid
} }
__host__ __device__ constexpr index_t GetElementSize() const __host__ __device__ explicit constexpr DynamicTensorDescriptor()
: DynamicTensorDescriptor(Transforms{}, index_t{0})
{ {
return container_reduce(GetLengths(), math::multiplies<index_t>{}, index_t{1});
} }
__host__ __device__ constexpr index_t GetElementSpace() const __host__ __device__ static constexpr index_t GetNumOfDimension()
{ {
index_t space = 1; return GetNumOfVisibleDimension();
static_for<0, NDim, 1>{}([&](auto i) { space += (GetLength(i) - 1) * GetStride(i); });
return space;
} }
template <typename Idx> template <index_t IDim>
__host__ __device__ constexpr index_t CalculateOffset(const Idx& idx) const __host__ __device__ constexpr index_t GetLength(Number<IDim>) const
{ {
index_t offset = 0; return visible_lengths_[Number<IDim>{}];
}
static_for<0, NDim, 1>{}([&](auto i) { offset += idx[i] * GetStride(i); }); __host__ __device__ constexpr const auto& GetLengths() const { return visible_lengths_; }
return offset; // maybe this result should be saved as a member variable
__host__ __device__ constexpr index_t GetElementSize() const
{
return container_reduce(GetLengths(), math::multiplies<index_t>{}, index_t{1});
} }
template <typename IdxDiff> __host__ __device__ constexpr index_t GetElementSpaceSize() const
__host__ __device__ constexpr index_t CalculateOffsetDiff(const IdxDiff& idx_diff) const
{ {
return CalculateOffset(idx_diff); return hidden_lengths_[Number<0>{}];
} }
template <typename Idx> template <typename Idx>
__host__ __device__ constexpr bool IsUpperIndexValid(const Idx& idx) const __host__ __device__ constexpr index_t CalculateOffset(const Idx& idx) const
{ {
bool flag = true; static_assert(Idx::Size() == GetNumOfDimension(), "wrong! inconsistent # of dimension");
static_for<0, NDim, 1>{}(
[&](auto i) { flag = flag && idx[i] >= 0 && idx[i] < GetLength(i); });
return flag; #if 0 // debug
} return make_dynamic_tensor_coordinate(*this, idx).GetOffset();
}; #else
constexpr index_t ntransform = GetNumOfTransform();
constexpr index_t ndim_hidden = GetNumOfHiddenDimension();
constexpr index_t ndim_visible = GetNumOfVisibleDimension();
constexpr auto visible_dim_ids = GetVisibleDimensionIds();
template <typename LowTensorDescriptor, // DynamicNativeTensorDescriptor or MultiIndex<ndim_hidden> idx_hidden;
// DynamicTransformedTensorDescriptor
typename Transforms, // Tuple<MultIndexTransforms...>
typename LowDimensionIds, // Tuple<Sequence<...>>
typename UpDimensionIds> // Tuple<Sequence<...>>
struct DynamicTransformedTensorDescriptor
{
using LowerDesc = LowTensorDescriptor;
using UpperDesc = DynamicTransformedTensorDescriptor;
static constexpr index_t NTransform = Transforms::Size(); // initialize visible index
const LowerDesc low_tensor_desc_; auto idx_hidden_pick_visible = pick_container_element(idx_hidden, visible_dim_ids);
const Transforms transforms_; idx_hidden_pick_visible = idx;
__host__ __device__ static constexpr index_t GetNumOfLowerDimension() // calculate hidden index
{ static_for<ntransform - 1, -1, -1>{}([this, &idx_hidden](auto itran) {
return LowerDesc::GetNumOfDimension(); const auto& tran = this->GetTransforms().At(itran);
} constexpr auto dims_low = GetLowerDimensionIdss().At(itran);
constexpr auto dims_up = GetUpperDimensionIdss().At(itran);
__host__ __device__ static constexpr index_t GetNumOfUpperDimension() const auto idx_up = pick_container_element(idx_hidden, dims_up);
{ auto idx_low = pick_container_element(idx_hidden, dims_low);
index_t ndim_up = 0;
static_for<0, NTransform, 1>{}([&](auto i) constexpr { tran.CalculateLowerIndex(idx_low, idx_up);
constexpr auto tmp = UpDimensionIds{}.At(i);
ndim_up += decltype(tmp)::Size();
}); });
return ndim_up; return idx_hidden[Number<0>{}];
#endif
} }
static constexpr index_t NDimUp = GetNumOfUpperDimension(); // private:
static constexpr index_t NDimLow = GetNumOfLowerDimension(); __host__ __device__ constexpr const auto& GetTransforms() const { return transforms_; }
using UpperIndex = MultiIndex<NDimUp>;
using LowerIndex = MultiIndex<NDimLow>;
struct lambda_merge_sequences __host__ __device__ static constexpr auto GetLowerDimensionIdss()
{
template <typename... Xs>
__host__ __device__ constexpr auto operator()(Xs... xs) const
{ {
return merge_sequences(xs...); return LowerDimensionIdss{};
} }
};
struct lambda_merge_arrays __host__ __device__ static constexpr auto GetUpperDimensionIdss()
{ {
template <typename... Xs> return UpperDimensionIdss{};
__host__ __device__ constexpr auto operator()(Xs... xs) const }
__host__ __device__ static constexpr auto GetVisibleDimensionIds()
{ {
return container_cat(xs...); return VisibleDimensionIds{};
} }
};
__host__ __device__ explicit constexpr DynamicTransformedTensorDescriptor( __host__ __device__ static constexpr auto InitializeHiddenLengths(const Transforms& transforms,
const LowerDesc& low_tensor_desc, const Transforms& transforms) index_t element_space_size)
: low_tensor_desc_{low_tensor_desc}, transforms_{transforms}
{ {
static_assert(NTransform == Transforms::Size() && NTransform == LowDimensionIds::Size() && // zero initialization
NTransform == UpDimensionIds::Size(), HiddenIndex hidden_lengths = make_zero_multi_index<ndim_hidden_>();
"wrong! # of transformations not the same");
// sanity check: // this is the orignal tensor element space size
// LowDimensionIds should include all low-dimensions, hidden_lengths(Number<0>{}) = element_space_size;
// UpDimensionIds should include all up-dimensions
using unsorted_up_dimension_ids =
decltype(unpack(lambda_merge_sequences{}, UpDimensionIds{}));
using sorted_up_dimension_ids = // lengths for all other hidden dimensions
typename sequence_sort<unsorted_up_dimension_ids, math::less<index_t>>::type; static_for<0, ntransform_, 1>{}([&transforms, &hidden_lengths](auto itran) {
const auto& tran = transforms.At(itran);
static_assert(sorted_up_dimension_ids::Size() == NDimUp && constexpr auto up_dim_ids = UpperDimensionIdss{}.At(itran);
is_valid_sequence_map<sorted_up_dimension_ids>{},
"wrong! UpDimensionIds is not configured correctly");
using unsorted_low_dimension_ids = // lengths_hidden_pick_up contains a reference to lengths_hidden
decltype(unpack(lambda_merge_sequences{}, LowDimensionIds{})); auto hidden_lengths_pick_up = pick_container_element(hidden_lengths, up_dim_ids);
using sorted_low_dimension_ids = hidden_lengths_pick_up = tran.GetUpperLengths();
typename sequence_sort<unsorted_low_dimension_ids, math::less<index_t>>::type; });
static_assert(sorted_low_dimension_ids::Size() == NDimLow && return hidden_lengths;
is_valid_sequence_map<sorted_low_dimension_ids>{}, }
"wrong! LowDimensionIds is not configured correctly");
// TODO: sanity check: while a up-dimension could be associated with // private member variables
// multille const Transforms transforms_;
// transformation, a low-dimension should be associated with only one // TODO maybe hidden_lengths_ should use reference_wrapper (reference to transforms_'s member
// transformation // variable lengths_) to save space on stack?
const HiddenIndex hidden_lengths_;
// visible_lenths_ contains a reference to hidden_lengths_
const ContainerElementPicker<const HiddenIndex, VisibleDimensionIds> visible_lengths_;
#if 0
// friend class
friend Coordinate;
friend CoordinateStep;
// friend function to transform tensor descriptor
template <typename OldTensorDescriptor,
typename NewTransforms,
typename NewLowerDimensionOldVisibleIdss,
typename NewUpperDimensionNewVisibleIdss>
__host__ __device__ friend constexpr auto
transform_dynamic_tensor_descriptor(const OldTensorDescriptor& /* old_tensor_desc */,
const NewTransforms& /* new_transforms */,
NewLowerDimensionOldVisibleIdss,
NewUpperDimensionNewVisibleIdss);
// friend functions for making and moving tensor coordinate
template <typename VisibleIndex>
__host__ __device__ friend constexpr Coordinate
make_dynamic_tensor_coordinate(const DynamicTensorDescriptor& /* tensor_desc */,
const VisibleIndex& /* idx_visible */);
template <typename VisibleIndex>
__host__ __device__ friend constexpr CoordinateStep
make_dynamic_tensor_coordinate_step(const DynamicTensorDescriptor& /* tensor_desc */,
const VisibleIndex& /* idx_diff_visible */);
__host__ __device__ friend void
move_dynamic_tensor_coordinate(const DynamicTensorDescriptor& /* tensor_desc */,
Coordinate& /* coord */,
const CoordinateStep& /* coord_step */);
// friend functions for valid offset check
__host__ __device__ friend constexpr bool
coordinate_has_valid_offset_assuming_visible_index_is_valid(
const DynamicTensorDescriptor& tensor_desc, const Coordinate& coord);
__host__ __device__ friend constexpr bool
coordinate_has_valid_offset(const DynamicTensorDescriptor& tensor_desc,
const Coordinate& coord);
#endif
};
// TODO: sanity-check: GetLowerLengths of each transform should be template <index_t NDimHidden, typename VisibleDimensionIds>
// consistent with lengths struct DynamicTensorCoordinate
// of lower-tensor-descriptor {
} // private:
static constexpr index_t ndim_visible_ = VisibleDimensionIds::Size();
__host__ __device__ explicit constexpr DynamicTransformedTensorDescriptor() using HiddenIndex = MultiIndex<NDimHidden>;
: low_tensor_desc_{}, transforms_{} using VisibleIndex = MultiIndex<ndim_visible_>;
{
}
__host__ __device__ static constexpr index_t GetNumOfDimension() // public:
__host__ __device__ explicit constexpr DynamicTensorCoordinate(const HiddenIndex& idx_hidden)
: idx_hidden_{idx_hidden}, idx_visible_{idx_hidden_}
{ {
return GetNumOfUpperDimension();
} }
__host__ __device__ constexpr auto GetUpperLengths() const __host__ __device__ constexpr const auto& GetIndex() const { return GetVisibleIndex(); }
{
// sort upper-dimension-ids
constexpr auto unsorted_up_dimension_ids =
unpack(lambda_merge_sequences{}, UpDimensionIds{});
using sort_up_dimension_ids = sequence_unique_sort<decltype(unsorted_up_dimension_ids), __host__ __device__ constexpr index_t GetOffset() const { return idx_hidden_[Number<0>{}]; }
math::less<index_t>,
math::equal<index_t>>;
constexpr auto sorted2unsorted_map = typename sort_up_dimension_ids::sorted2unsorted_map{}; // private:
__host__ __device__ constexpr const auto& GetHiddenIndex() const { return idx_hidden_; }
// sort upper-lengths __host__ __device__ auto& GetHiddenIndex() { return idx_hidden_; }
const auto tuple_of_up_lengths =
transform_tuples([](const auto& tran) constexpr { return tran.GetUpperLengths(); },
transforms_);
const auto unsorted_up_lengths = unpack(lambda_merge_arrays{}, tuple_of_up_lengths); __host__ __device__ constexpr const auto& GetVisibleIndex() const { return idx_visible_; }
const auto sorted_up_lengths = __host__ __device__ auto& GetVisibleIndex() { return idx_visible_; }
container_reorder_given_new2old(unsorted_up_lengths, sorted2unsorted_map);
return sorted_up_lengths; // private member variables
} HiddenIndex idx_hidden_;
// idx_visible_ contains a reference to idx_hidden_
ContainerElementPicker<HiddenIndex, VisibleDimensionIds> idx_visible_;
__host__ __device__ constexpr auto GetLengths() const { return GetUpperLengths(); } #if 0
// friend functions for making and updating tensor coordinate
template <typename TensorDesc>
__host__ __device__ friend constexpr DynamicTensorCoordinate
make_dynamic_tensor_coordinate(const TensorDesc& /* tensor_desc */,
const VisibleIndex& /* idx_visible */);
template <index_t IDim> template <typename TensorDesc, typename TensorCoordStep>
__host__ __device__ constexpr index_t GetLength(Number<IDim>) const __host__ __device__ friend void move_dynamic_tensor_coordinate(
const TensorDesc& /* tensor_desc */,
DynamicTensorCoordinate& /* coord */,
const TensorCoordStep& /* coord_step */);
#endif
};
template <index_t NTransform, index_t NDimVisible>
struct DynamicTensorCoordinateStep
{
// private:
using VisibleIndex = MultiIndex<NDimVisible>;
// public:
__host__ __device__ explicit constexpr DynamicTensorCoordinateStep(
const VisibleIndex& idx_diff_visible, const Array<bool, NTransform>& do_transforms)
: idx_diff_visible_{idx_diff_visible}, do_transforms_{do_transforms}
{ {
return GetLengths()[Number<IDim>{}];
} }
__host__ __device__ constexpr index_t GetElementSize() const __host__ __device__ constexpr const auto& GetIndexDiff() const { return GetVisibleIndexDiff(); }
// private:
__host__ __device__ constexpr const auto& GetVisibleIndexDiff() const
{ {
return container_reduce(GetLengths(), math::multiplies<index_t>{}, index_t{1}); return idx_diff_visible_;
} }
__host__ __device__ constexpr index_t GetElementSpace() const // private:
const VisibleIndex idx_diff_visible_;
const Array<bool, NTransform> do_transforms_;
#if 0
// friend functions for updating tensor coordinate
template <typename TensorDesc>
__host__ __device__ friend constexpr DynamicTensorCoordinateStep
make_dynamic_tensor_coordinate_step(const TensorDesc& /* tensor_desc */,
const VisibleIndex& /* idx_visible */);
template <typename TensorDesc, index_t NDimHidden, typename VisibleDimensionIds>
__host__ __device__ friend void move_dynamic_tensor_coordinate(
const TensorDesc& /* tensor_desc */,
DynamicTensorCoordinate<NDimHidden, VisibleDimensionIds>& /* coord */,
const DynamicTensorCoordinateStep& /* coord_step */);
#endif
};
// TODO: How to fix this? It uses an struct instead of lambda because lambda
// doesn't have constructor, and to put it outside the scope where it is used
// (transform_dynamic_tensor_descriptor) because template cannot be defined inside a function
// template
template <typename NewTransforms>
struct lambda_get_up_dim_num
{
template <typename I>
__host__ __device__ constexpr auto operator()(I) const
{ {
return low_tensor_desc_.GetElementSpace(); using Tran = remove_reference_t<decltype(NewTransforms{}.At(I{}))>;
return Number<Tran::GetNumOfUpperDimension()>{};
} }
};
template <typename OldTensorDescriptor,
typename NewTransforms,
typename NewLowerDimensionOldVisibleIdss,
typename NewUpperDimensionNewVisibleIdss>
__host__ __device__ constexpr auto
transform_dynamic_tensor_descriptor(const OldTensorDescriptor& old_tensor_desc,
const NewTransforms& new_transforms,
NewLowerDimensionOldVisibleIdss,
NewUpperDimensionNewVisibleIdss)
{
// lower dimension's hidden idss
// convert lower dimension visible idss (tuple of sequences) to hidden idss (tuple of
// sequences)
constexpr auto low_dim_hidden_idss = transform_tuples(
// convert lower dimension visible ids (a sequence) to hidden ids (a sequence)
[](auto low_dim_visible_ids) constexpr {
return transform_sequences(
// convert lower dimension visible id to hidden id
[](auto low_dim_visible_id) constexpr {
return OldTensorDescriptor::GetVisibleDimensionIds()[low_dim_visible_id];
},
low_dim_visible_ids);
},
NewLowerDimensionOldVisibleIdss{});
constexpr index_t num_new_transform = NewTransforms::Size();
// upper dimension's hidden idss
constexpr index_t old_hidden_dim_number = OldTensorDescriptor::GetNumOfHiddenDimension();
constexpr auto up_dim_numbers =
generate_sequence(lambda_get_up_dim_num<NewTransforms>{}, Number<num_new_transform>{});
constexpr auto up_dim_numbers_scan = merge_sequences(
Sequence<0>{}, inclusive_scan_sequence(up_dim_numbers, math::plus<index_t>{}, Number<0>{}));
constexpr auto up_dim_hidden_idss =
generate_tuple([ old_hidden_dim_number, up_dim_numbers_scan ](auto i) constexpr {
return
typename arithmetic_sequence_gen<old_hidden_dim_number + up_dim_numbers_scan[i],
old_hidden_dim_number + up_dim_numbers_scan[i + 1],
1>::type{};
},
Number<num_new_transform>{});
// new visible dimension's hidden ids
constexpr auto unordered_new_visible_dim_hidden_ids =
unpack([](auto... xs) { return merge_sequences(xs...); }, up_dim_hidden_idss);
constexpr auto new_visible_dim_unordered2ordered = unpack(
[](auto... xs) { return merge_sequences(xs...); }, NewUpperDimensionNewVisibleIdss{});
constexpr auto new_visible_dim_hidden_ids =
unordered_new_visible_dim_hidden_ids.ReorderGivenOld2New(new_visible_dim_unordered2ordered);
// put everything together
const auto all_transforms = container_cat(old_tensor_desc.GetTransforms(), new_transforms);
constexpr auto all_low_dim_hidden_idss =
container_cat(OldTensorDescriptor::GetLowerDimensionIdss(), low_dim_hidden_idss);
constexpr auto all_up_dim_hidden_idss =
container_cat(OldTensorDescriptor::GetUpperDimensionIdss(), up_dim_hidden_idss);
return DynamicTensorDescriptor<decltype(all_transforms),
decltype(all_low_dim_hidden_idss),
decltype(all_up_dim_hidden_idss),
decltype(new_visible_dim_hidden_ids)>{
all_transforms, old_tensor_desc.GetElementSpaceSize()};
}
template <typename TensorDesc, typename VisibleIndex>
__host__ __device__ constexpr auto make_dynamic_tensor_coordinate(const TensorDesc& tensor_desc,
const VisibleIndex& idx_visible)
{
static_assert(TensorDesc::GetNumOfDimension() == VisibleIndex::Size(),
"wrong! # of dimension inconsistent");
__host__ __device__ constexpr auto GetLowerTensorDescriptor() const { return low_tensor_desc_; } constexpr index_t ntransform = TensorDesc::GetNumOfTransform();
constexpr index_t ndim_hidden = TensorDesc::GetNumOfHiddenDimension();
constexpr index_t ndim_visible = TensorDesc::GetNumOfVisibleDimension();
constexpr auto visible_dim_ids = TensorDesc::GetVisibleDimensionIds();
template <typename LowIdx, typename UpIdx> MultiIndex<ndim_hidden> idx_hidden;
__host__ __device__ void CalculateLowerIndex(LowIdx& idx_low, const UpIdx& idx_up) const
{ // initialize visible index
static_for<0, NTransform, 1>{}([&](auto itran) constexpr { auto idx_hidden_pick_visible = pick_container_element(idx_hidden, visible_dim_ids);
const auto tran = transforms_.At(itran); idx_hidden_pick_visible = idx_visible;
// calculate hidden index
static_for<ntransform, 0, -1>{}([&tensor_desc, &idx_hidden](auto itran_p1) {
auto itran = itran_p1 - Number<1>{};
const auto& tran = tensor_desc.GetTransforms().At(itran);
constexpr auto dims_low = TensorDesc::GetLowerDimensionIdss().At(itran);
constexpr auto dims_up = TensorDesc::GetUpperDimensionIdss().At(itran);
const auto idx_up_part = pick_container_element(idx_up, UpDimensionIds{}.At(itran)); const auto idx_up = pick_container_element(idx_hidden, dims_up);
auto idx_low_part = pick_container_element(idx_low, LowDimensionIds{}.At(itran)); auto idx_low = pick_container_element(idx_hidden, dims_low);
tran.CalculateLowerIndex(idx_low_part, idx_up_part); tran.CalculateLowerIndex(idx_low, idx_up);
}); });
}
template <typename LowIdxDiff, typename UpIdxDiff, typename LowIdx, typename UpIdx> // better to use std::move?
__host__ __device__ void CalculateLowerIndexDiff(LowIdxDiff& idx_low_diff, return DynamicTensorCoordinate<ndim_hidden, decltype(visible_dim_ids)>{idx_hidden};
const UpIdxDiff& idx_up_diff, }
const LowIdx& idx_low_old,
const UpIdx& idx_up_old) const
{
static_for<0, NTransform, 1>{}([&](auto itran) {
const auto tran = transforms_.At(itran);
const auto idx_up_diff_part = template <typename TensorDesc, typename VisibleIndex>
pick_container_element(idx_up_diff, UpDimensionIds{}.At(itran)); __host__ __device__ constexpr auto
make_dynamic_tensor_coordinate_step(const TensorDesc&, const VisibleIndex& idx_diff_visible)
{
static_assert(TensorDesc::GetNumOfDimension() == VisibleIndex::Size(),
"wrong! # of dimension inconsistent");
const auto idx_up_old_part = constexpr index_t ntransform = TensorDesc::GetNumOfTransform();
pick_container_element(idx_up_old, UpDimensionIds{}.At(itran)); constexpr index_t ndim_hidden = TensorDesc::GetNumOfHiddenDimension();
constexpr index_t ndim_visible = TensorDesc::GetNumOfVisibleDimension();
constexpr auto visible_dim_ids = TensorDesc::GetVisibleDimensionIds();
const auto idx_low_old_part = Array<bool, ntransform> do_transforms{false};
pick_container_element(idx_low_old, LowDimensionIds{}.At(itran));
auto idx_low_diff_part = Array<bool, ndim_hidden> non_zero_diff{false};
pick_container_element(idx_low_diff, LowDimensionIds{}.At(itran));
tran.CalculateLowerIndexDiff( auto non_zero_diff_pick_visible = pick_container_element(non_zero_diff, visible_dim_ids);
idx_low_diff_part, idx_up_diff_part, idx_low_old_part, idx_up_old_part);
static_for<0, ndim_visible, 1>{}([&non_zero_diff_pick_visible, &idx_diff_visible](auto i) {
non_zero_diff_pick_visible(i) = (idx_diff_visible[i] != 0);
}); });
}
template <typename UpIdx> static_for<ntransform - 1, -1, -1>{}([&do_transforms, &non_zero_diff](auto itran) {
__host__ __device__ constexpr auto CalculateLowerIndex(const UpIdx& idx_up) const constexpr auto dims_low = TensorDesc::GetLowerDimensionIdss().At(itran);
{ constexpr auto dims_up = TensorDesc::GetUpperDimensionIdss().At(itran);
LowerIndex idx_low;
CalculateLowerIndex(idx_low, idx_up); const auto non_zero_diff_pick_up = pick_container_element(non_zero_diff, dims_up);
auto non_zero_diff_pick_low = pick_container_element(non_zero_diff, dims_low);
return idx_low; // if any of upper index diff components is non-zero, then
} // 1) Need to do this transform
// 2) all components of lower index diff will assume to be non-zero and need to be
// computed
const bool idx_diff_up_has_non_zero =
container_reduce(non_zero_diff_pick_up, [](auto a, auto b) { return a or b; }, false);
template <typename UpIdxDiff, typename LowIdx, typename UpIdx> do_transforms(itran) = idx_diff_up_has_non_zero;
__host__ __device__ constexpr auto CalculateLowerIndexDiff(const UpIdxDiff& idx_up_diff,
const LowIdx& idx_low_old,
const UpIdx& idx_up_old) const
{
LowerIndex idx_low_diff;
CalculateLowerIndexDiff(idx_low_diff, idx_up_diff, idx_low_old, idx_up_old); static_for<0, dims_low.Size(), 1>{}(
[&non_zero_diff_pick_low, &idx_diff_up_has_non_zero](auto i) {
non_zero_diff_pick_low(i) = idx_diff_up_has_non_zero;
});
});
return idx_low_diff; return DynamicTensorCoordinateStep<ntransform, ndim_visible>{idx_diff_visible, do_transforms};
} }
__host__ __device__ constexpr index_t CalculateOffset(const UpperIndex& idx_up) const template <typename TensorDesc, typename TensorCoord, typename TensorCoordStep>
{ __host__ __device__ void move_dynamic_tensor_coordinate(const TensorDesc& tensor_desc,
return low_tensor_desc_.CalculateOffset(CalculateLowerIndex(idx_up)); TensorCoord& coord,
} const TensorCoordStep& coord_step)
{
constexpr index_t ndim_hidden = TensorDesc::GetNumOfHiddenDimension();
constexpr index_t ndim_visible = TensorDesc::GetNumOfVisibleDimension();
constexpr index_t ntransform = TensorDesc::GetNumOfTransform();
using HiddenIndex = MultiIndex<ndim_hidden>;
// this is what needs to be calculated
auto idx_diff_hidden = make_zero_multi_index<ndim_hidden>();
__host__ __device__ constexpr bool IsUpperIndexValid(const UpperIndex& idx_up) const // initialize visible index diff
// idx_diff_hidden_pick_visible contains reference to idx_diff_hidden
auto idx_diff_hidden_pick_visible =
pick_container_element(idx_diff_hidden, TensorDesc::GetVisibleDimensionIds());
idx_diff_hidden_pick_visible = coord_step.GetVisibleIndexDiff();
// this is what needs to be updated
auto& idx_hidden = coord.GetHiddenIndex();
// update visible index
auto idx_hidden_pick_visible =
pick_container_element(idx_hidden, TensorDesc::GetVisibleDimensionIds());
idx_hidden_pick_visible += coord_step.GetIndexDiff();
// update rest of hidden index
static_for<ntransform - 1, -1, -1>{}([&](auto itran) {
if(coord_step.do_transforms_[itran])
{ {
bool flag = true; const auto& tran = tensor_desc.GetTransforms().At(itran);
constexpr auto dims_low = TensorDesc::GetLowerDimensionIdss().At(itran);
constexpr auto dims_up = TensorDesc::GetUpperDimensionIdss().At(itran);
// this const is for ContainerElementPicker, Array itself may not be const
const auto idx_up = pick_container_element(idx_hidden, dims_up);
auto idx_low = pick_container_element(idx_hidden, dims_low);
const auto idx_diff_up = pick_container_element(idx_diff_hidden, dims_up);
auto idx_diff_low = pick_container_element(idx_diff_hidden, dims_low);
static_for<0, NDimUp, 1>{}( tran.CalculateLowerIndexDiff(idx_diff_low, idx_diff_up, idx_low, idx_up);
[&](auto i) { flag = flag && idx_up[i] >= 0 && idx_up[i] < GetLength(i); });
return flag; // update idx_low
idx_low += idx_diff_low;
} }
});
}
__host__ __device__ constexpr bool template <typename TensorDesc, typename TensorCoord>
IsValidUpperIndexMappedToValidLowerIndex(const UpperIndex& idx_up) const __host__ __device__ constexpr bool
{ coordinate_has_valid_offset_assuming_visible_index_is_valid(const TensorDesc& tensor_desc,
bool flag = true; const TensorCoord& coord)
{
bool valid = true;
static_for<0, NTransform, 1>{}([&](auto itran) { constexpr index_t ntransform = TensorDesc::GetNumOfTransform();
const auto tran = Transforms{}.At(itran);
// check a indtransformation if it does not always has a valid mapping const auto& idx_hidden = coord.GetHiddenIndex();
constexpr bool is_valid_up_always_mapped_to_valid_low =
decltype(tran)::IsValidUpperIndexAlwaysMappedToValidLowerIndex();
if constexpr(!is_valid_up_always_mapped_to_valid_low) static_for<ntransform - 1, -1, -1>{}([&tensor_desc, &idx_hidden, &valid](auto itran) {
const auto tran = tensor_desc.GetTransforms().At(itran);
// check validity, only if current transformation does not always has a valid mapping
if constexpr(!decltype(tran)::IsValidUpperIndexAlwaysMappedToValidLowerIndex())
{ {
const auto up_dims_part = UpDimensionIds{}.At(itran); const auto idx_up =
const auto idx_up_part = pick_container_element(idx_up, up_dims_part); pick_container_element(idx_hidden, TensorDesc::GetUpperDimensionIdss().At(itran));
flag = flag && tran.IsValidUpperIndexMappedToValidLowerIndex(idx_up_part); valid = valid && tran.IsValidUpperIndexMappedToValidLowerIndex(idx_up);
} }
}); });
return flag; return valid;
} }
};
template <typename TensorDesc, typename TensorCoord>
__host__ __device__ constexpr bool coordinate_has_valid_offset(const TensorDesc& tensor_desc,
const TensorCoord& coord)
{
// check visible index
const auto& idx_visible = coord.GetVisibleIndex();
bool is_visible_index_valid = true;
static_for<0, TensorDesc::GetNumOfDimension(), 1>{}(
[&is_visible_index_valid, &idx_visible, &tensor_desc](auto i) {
is_visible_index_valid =
is_visible_index_valid &&
(idx_visible[i] >= 0 && idx_visible[i] < tensor_desc.GetLength(i));
});
// check other hidden index
return is_visible_index_valid &&
coordinate_has_valid_offset_assuming_visible_index_is_valid(tensor_desc, coord);
}
} // namespace ck } // namespace ck
#endif #endif
...@@ -6,29 +6,46 @@ ...@@ -6,29 +6,46 @@
namespace ck { namespace ck {
template <typename Lengths, typename Strides> template <index_t N>
__host__ __device__ constexpr auto make_dynamic_native_tensor_descriptor(const Lengths& lengths, __host__ __device__ constexpr auto
const Strides& strides) make_dynamic_native_tensor_descriptor_packed(const MultiIndex<N>& lengths)
{ {
static_assert(Lengths::Size() == Strides::Size(), "wrong! Size not the same");
return DynamicNativeTensorDescriptor<Lengths::Size()>(lengths, strides); const auto transforms = make_tuple(DynamicUnMerge<N>{lengths});
constexpr auto low_dim_hidden_idss = make_tuple(Sequence<0>{});
constexpr auto up_dim_hidden_idss =
make_tuple(typename arithmetic_sequence_gen<1, N + 1, 1>::type{});
constexpr auto visible_dim_hidden_ids = typename arithmetic_sequence_gen<1, N + 1, 1>::type{};
const index_t element_space_size =
container_reduce(lengths, math::multiplies<index_t>{}, index_t{1});
return DynamicTensorDescriptor<decltype(transforms),
decltype(low_dim_hidden_idss),
decltype(up_dim_hidden_idss),
decltype(visible_dim_hidden_ids)>{transforms,
element_space_size};
} }
template <typename LowTensorDescriptor, template <index_t N>
typename Transforms,
typename LowDimensionIds,
typename UpDimensionIds>
__host__ __device__ constexpr auto __host__ __device__ constexpr auto
transform_dynamic_tensor_descriptor(const LowTensorDescriptor& low_tensor_desc, make_dynamic_native_tensor_descriptor(const MultiIndex<N>& lengths, const MultiIndex<N>& strides)
const Transforms& transforms,
LowDimensionIds,
UpDimensionIds)
{ {
return DynamicTransformedTensorDescriptor<LowTensorDescriptor, const auto transforms = make_tuple(DynamicEmbed<N>{lengths, strides});
Transforms, constexpr auto low_dim_hidden_idss = make_tuple(Sequence<0>{});
LowDimensionIds, constexpr auto up_dim_hidden_idss =
UpDimensionIds>{low_tensor_desc, transforms}; make_tuple(typename arithmetic_sequence_gen<1, N + 1, 1>::type{});
constexpr auto visible_dim_hidden_ids = typename arithmetic_sequence_gen<1, N + 1, 1>::type{};
index_t element_space_size = 1;
static_for<0, N, 1>{}([&](auto i) { element_space_size += (lengths[i] - 1) * strides[i]; });
return DynamicTensorDescriptor<decltype(transforms),
decltype(low_dim_hidden_idss),
decltype(up_dim_hidden_idss),
decltype(visible_dim_hidden_ids)>{transforms,
element_space_size};
} }
} // namespace ck } // namespace ck
......
#ifndef CK_DYNAMIC_TENSOR_DESCRIPTOR_HELPER_V1_HPP
#define CK_DYNAMIC_TENSOR_DESCRIPTOR_HELPER_V1_HPP
#include "common_header.hpp"
#include "dynamic_tensor_descriptor_v1.hpp"
namespace ck {
template <typename Lengths, typename Strides>
__host__ __device__ constexpr auto make_dynamic_native_tensor_descriptor_v1(const Lengths& lengths,
const Strides& strides)
{
static_assert(Lengths::Size() == Strides::Size(), "wrong! Size not the same");
return DynamicNativeTensorDescriptor_v1<Lengths::Size()>(lengths, strides);
}
template <typename LowTensorDescriptor,
typename Transforms,
typename LowDimensionIds,
typename UpDimensionIds>
__host__ __device__ constexpr auto
transform_dynamic_tensor_descriptor_v1(const LowTensorDescriptor& low_tensor_desc,
const Transforms& transforms,
LowDimensionIds,
UpDimensionIds)
{
return DynamicTransformedTensorDescriptor_v1<LowTensorDescriptor,
Transforms,
LowDimensionIds,
UpDimensionIds>{low_tensor_desc, transforms};
}
} // namespace ck
#endif
#ifndef CK_DYNAMIC_TENSOR_DESCRIPTOR_HELPER_V2_HPP
#define CK_DYNAMIC_TENSOR_DESCRIPTOR_HELPER_V2_HPP
#include "common_header.hpp"
#include "dynamic_tensor_descriptor_v2.hpp"
namespace ck {
template <index_t N>
__host__ __device__ constexpr auto
make_dynamic_native_tensor_descriptor_packed_v2(const MultiIndex<N>& lengths)
{
const auto transforms = make_tuple(DynamicUnMerge<N>{lengths});
constexpr auto low_dim_hidden_idss = make_tuple(Sequence<0>{});
constexpr auto up_dim_hidden_idss =
make_tuple(typename arithmetic_sequence_gen<1, N + 1, 1>::type{});
constexpr auto visible_dim_hidden_ids = typename arithmetic_sequence_gen<1, N + 1, 1>::type{};
const index_t element_space_size =
container_reduce(lengths, math::multiplies<index_t>{}, index_t{1});
return DynamicTensorDescriptor_v2<decltype(transforms),
decltype(low_dim_hidden_idss),
decltype(up_dim_hidden_idss),
decltype(visible_dim_hidden_ids)>{transforms,
element_space_size};
}
template <index_t N>
__host__ __device__ constexpr auto
make_dynamic_native_tensor_descriptor_v2(const MultiIndex<N>& lengths, const MultiIndex<N>& strides)
{
const auto transforms = make_tuple(DynamicEmbed<N>{lengths, strides});
constexpr auto low_dim_hidden_idss = make_tuple(Sequence<0>{});
constexpr auto up_dim_hidden_idss =
make_tuple(typename arithmetic_sequence_gen<1, N + 1, 1>::type{});
constexpr auto visible_dim_hidden_ids = typename arithmetic_sequence_gen<1, N + 1, 1>::type{};
index_t element_space_size = 1;
static_for<0, N, 1>{}([&](auto i) { element_space_size += (lengths[i] - 1) * strides[i]; });
return DynamicTensorDescriptor_v2<decltype(transforms),
decltype(low_dim_hidden_idss),
decltype(up_dim_hidden_idss),
decltype(visible_dim_hidden_ids)>{transforms,
element_space_size};
}
} // namespace ck
#endif
#ifndef CK_DYNAMIC_TENSOR_DESCRIPTOR_V1_HPP
#define CK_DYNAMIC_TENSOR_DESCRIPTOR_V1_HPP
#include "common_header.hpp"
#include "dynamic_multi_index_transform.hpp"
namespace ck {
template <index_t NDim>
struct DynamicNativeTensorDescriptor_v1
{
using Index = MultiIndex<NDim>;
const Index lengths_;
const Index strides_;
__host__ __device__ explicit constexpr DynamicNativeTensorDescriptor_v1(const Index& lengths,
const Index& strides)
: lengths_{lengths}, strides_{strides}
{
}
__host__ __device__ explicit constexpr DynamicNativeTensorDescriptor_v1()
: lengths_{make_zero_multi_index<NDim>()}, strides_{make_zero_multi_index<NDim>()}
{
}
__host__ __device__ static constexpr index_t GetNumOfDimension() { return NDim; }
__host__ __device__ constexpr auto GetLengths() const { return lengths_; }
__host__ __device__ constexpr auto GetStrides() const { return strides_; }
template <index_t IDim>
__host__ __device__ constexpr index_t GetLength(Number<IDim>) const
{
return lengths_[Number<IDim>{}];
}
template <index_t IDim>
__host__ __device__ constexpr index_t GetStride(Number<IDim>) const
{
return strides_[Number<IDim>{}];
}
__host__ __device__ constexpr index_t GetElementSize() const
{
return container_reduce(GetLengths(), math::multiplies<index_t>{}, index_t{1});
}
__host__ __device__ constexpr index_t GetElementSpace() const
{
index_t space = 1;
static_for<0, NDim, 1>{}([&](auto i) { space += (GetLength(i) - 1) * GetStride(i); });
return space;
}
template <typename Idx>
__host__ __device__ constexpr index_t CalculateOffset(const Idx& idx) const
{
index_t offset = 0;
static_for<0, NDim, 1>{}([&](auto i) { offset += idx[i] * GetStride(i); });
return offset;
}
template <typename IdxDiff>
__host__ __device__ constexpr index_t CalculateOffsetDiff(const IdxDiff& idx_diff) const
{
return CalculateOffset(idx_diff);
}
template <typename Idx>
__host__ __device__ constexpr bool IsUpperIndexValid(const Idx& idx) const
{
bool flag = true;
static_for<0, NDim, 1>{}(
[&](auto i) { flag = flag && idx[i] >= 0 && idx[i] < GetLength(i); });
return flag;
}
};
template <typename LowTensorDescriptor, // DynamicNativeTensorDescriptor_v1 or
// DynamicTransformedTensorDescriptor_v1
typename Transforms, // Tuple<MultIndexTransforms...>
typename LowDimensionIds, // Tuple<Sequence<...>>
typename UpDimensionIds> // Tuple<Sequence<...>>
struct DynamicTransformedTensorDescriptor_v1
{
using LowerDesc = LowTensorDescriptor;
using UpperDesc = DynamicTransformedTensorDescriptor_v1;
static constexpr index_t NTransform = Transforms::Size();
const LowerDesc low_tensor_desc_;
const Transforms transforms_;
__host__ __device__ static constexpr index_t GetNumOfLowerDimension()
{
return LowerDesc::GetNumOfDimension();
}
__host__ __device__ static constexpr index_t GetNumOfUpperDimension()
{
index_t ndim_up = 0;
static_for<0, NTransform, 1>{}([&](auto i) constexpr {
constexpr auto tmp = UpDimensionIds{}.At(i);
ndim_up += decltype(tmp)::Size();
});
return ndim_up;
}
static constexpr index_t NDimUp = GetNumOfUpperDimension();
static constexpr index_t NDimLow = GetNumOfLowerDimension();
using UpperIndex = MultiIndex<NDimUp>;
using LowerIndex = MultiIndex<NDimLow>;
struct lambda_merge_sequences
{
template <typename... Xs>
__host__ __device__ constexpr auto operator()(Xs... xs) const
{
return merge_sequences(xs...);
}
};
struct lambda_merge_arrays
{
template <typename... Xs>
__host__ __device__ constexpr auto operator()(Xs... xs) const
{
return container_cat(xs...);
}
};
__host__ __device__ explicit constexpr DynamicTransformedTensorDescriptor_v1(
const LowerDesc& low_tensor_desc, const Transforms& transforms)
: low_tensor_desc_{low_tensor_desc}, transforms_{transforms}
{
static_assert(NTransform == Transforms::Size() && NTransform == LowDimensionIds::Size() &&
NTransform == UpDimensionIds::Size(),
"wrong! # of transformations not the same");
// sanity check:
// LowDimensionIds should include all low-dimensions,
// UpDimensionIds should include all up-dimensions
using unsorted_up_dimension_ids =
decltype(unpack(lambda_merge_sequences{}, UpDimensionIds{}));
using sorted_up_dimension_ids =
typename sequence_sort<unsorted_up_dimension_ids, math::less<index_t>>::type;
static_assert(sorted_up_dimension_ids::Size() == NDimUp &&
is_valid_sequence_map<sorted_up_dimension_ids>{},
"wrong! UpDimensionIds is not configured correctly");
using unsorted_low_dimension_ids =
decltype(unpack(lambda_merge_sequences{}, LowDimensionIds{}));
using sorted_low_dimension_ids =
typename sequence_sort<unsorted_low_dimension_ids, math::less<index_t>>::type;
static_assert(sorted_low_dimension_ids::Size() == NDimLow &&
is_valid_sequence_map<sorted_low_dimension_ids>{},
"wrong! LowDimensionIds is not configured correctly");
// TODO: sanity check: while a up-dimension could be associated with
// multille
// transformation, a low-dimension should be associated with only one
// transformation
// TODO: sanity-check: GetLowerLengths of each transform should be
// consistent with lengths
// of lower-tensor-descriptor
}
__host__ __device__ explicit constexpr DynamicTransformedTensorDescriptor_v1()
: low_tensor_desc_{}, transforms_{}
{
}
__host__ __device__ static constexpr index_t GetNumOfDimension()
{
return GetNumOfUpperDimension();
}
__host__ __device__ constexpr auto GetUpperLengths() const
{
// sort upper-dimension-ids
constexpr auto unsorted_up_dimension_ids =
unpack(lambda_merge_sequences{}, UpDimensionIds{});
using sort_up_dimension_ids = sequence_unique_sort<decltype(unsorted_up_dimension_ids),
math::less<index_t>,
math::equal<index_t>>;
constexpr auto sorted2unsorted_map = typename sort_up_dimension_ids::sorted2unsorted_map{};
// sort upper-lengths
const auto tuple_of_up_lengths =
transform_tuples([](const auto& tran) constexpr { return tran.GetUpperLengths(); },
transforms_);
const auto unsorted_up_lengths = unpack(lambda_merge_arrays{}, tuple_of_up_lengths);
const auto sorted_up_lengths =
container_reorder_given_new2old(unsorted_up_lengths, sorted2unsorted_map);
return sorted_up_lengths;
}
__host__ __device__ constexpr auto GetLengths() const { return GetUpperLengths(); }
template <index_t IDim>
__host__ __device__ constexpr index_t GetLength(Number<IDim>) const
{
return GetLengths()[Number<IDim>{}];
}
__host__ __device__ constexpr index_t GetElementSize() const
{
return container_reduce(GetLengths(), math::multiplies<index_t>{}, index_t{1});
}
__host__ __device__ constexpr index_t GetElementSpace() const
{
return low_tensor_desc_.GetElementSpace();
}
__host__ __device__ constexpr auto GetLowerTensorDescriptor() const { return low_tensor_desc_; }
template <typename LowIdx, typename UpIdx>
__host__ __device__ void CalculateLowerIndex(LowIdx& idx_low, const UpIdx& idx_up) const
{
static_for<0, NTransform, 1>{}([&](auto itran) constexpr {
const auto tran = transforms_.At(itran);
const auto idx_up_part = pick_container_element(idx_up, UpDimensionIds{}.At(itran));
auto idx_low_part = pick_container_element(idx_low, LowDimensionIds{}.At(itran));
tran.CalculateLowerIndex(idx_low_part, idx_up_part);
});
}
template <typename LowIdxDiff, typename UpIdxDiff, typename LowIdx, typename UpIdx>
__host__ __device__ void CalculateLowerIndexDiff(LowIdxDiff& idx_low_diff,
const UpIdxDiff& idx_up_diff,
const LowIdx& idx_low_old,
const UpIdx& idx_up_old) const
{
static_for<0, NTransform, 1>{}([&](auto itran) {
const auto tran = transforms_.At(itran);
const auto idx_up_diff_part =
pick_container_element(idx_up_diff, UpDimensionIds{}.At(itran));
const auto idx_up_old_part =
pick_container_element(idx_up_old, UpDimensionIds{}.At(itran));
const auto idx_low_old_part =
pick_container_element(idx_low_old, LowDimensionIds{}.At(itran));
auto idx_low_diff_part =
pick_container_element(idx_low_diff, LowDimensionIds{}.At(itran));
tran.CalculateLowerIndexDiff(
idx_low_diff_part, idx_up_diff_part, idx_low_old_part, idx_up_old_part);
});
}
template <typename UpIdx>
__host__ __device__ constexpr auto CalculateLowerIndex(const UpIdx& idx_up) const
{
LowerIndex idx_low;
CalculateLowerIndex(idx_low, idx_up);
return idx_low;
}
template <typename UpIdxDiff, typename LowIdx, typename UpIdx>
__host__ __device__ constexpr auto CalculateLowerIndexDiff(const UpIdxDiff& idx_up_diff,
const LowIdx& idx_low_old,
const UpIdx& idx_up_old) const
{
LowerIndex idx_low_diff;
CalculateLowerIndexDiff(idx_low_diff, idx_up_diff, idx_low_old, idx_up_old);
return idx_low_diff;
}
__host__ __device__ constexpr index_t CalculateOffset(const UpperIndex& idx_up) const
{
return low_tensor_desc_.CalculateOffset(CalculateLowerIndex(idx_up));
}
__host__ __device__ constexpr bool IsUpperIndexValid(const UpperIndex& idx_up) const
{
bool flag = true;
static_for<0, NDimUp, 1>{}(
[&](auto i) { flag = flag && idx_up[i] >= 0 && idx_up[i] < GetLength(i); });
return flag;
}
__host__ __device__ constexpr bool
IsValidUpperIndexMappedToValidLowerIndex(const UpperIndex& idx_up) const
{
bool flag = true;
static_for<0, NTransform, 1>{}([&](auto itran) {
const auto tran = Transforms{}.At(itran);
// check a indtransformation if it does not always has a valid mapping
constexpr bool is_valid_up_always_mapped_to_valid_low =
decltype(tran)::IsValidUpperIndexAlwaysMappedToValidLowerIndex();
if constexpr(!is_valid_up_always_mapped_to_valid_low)
{
const auto up_dims_part = UpDimensionIds{}.At(itran);
const auto idx_up_part = pick_container_element(idx_up, up_dims_part);
flag = flag && tran.IsValidUpperIndexMappedToValidLowerIndex(idx_up_part);
}
});
return flag;
}
};
} // namespace ck
#endif
#ifndef CK_DYNAMIC_TENSOR_DESCRIPTOR_V2_HPP
#define CK_DYNAMIC_TENSOR_DESCRIPTOR_V2_HPP
#include "common_header.hpp"
#include "dynamic_multi_index_transform.hpp"
namespace ck {
template <index_t NDimHidden, typename VisibleDimensionIds>
struct DynamicTensorCoordinate_v2;
template <index_t NTransform, index_t NDimVisible>
struct DynamicTensorCoordinateStep_v2;
template <typename TensorDesc, typename VisibleIndex>
__host__ __device__ constexpr auto
make_dynamic_tensor_coordinate_v2(const TensorDesc& tensor_desc, const VisibleIndex& idx_visible);
template <typename TensorDesc, typename VisibleIndex>
__host__ __device__ constexpr auto
make_dynamic_tensor_coordinate_step_v2(const TensorDesc&, const VisibleIndex& idx_diff_visible);
template <typename TensorDesc, typename TensorCoord, typename TensorCoordStep>
__host__ __device__ void move_dynamic_tensor_coordinate_v2(const TensorDesc& tensor_desc,
TensorCoord& coord,
const TensorCoordStep& coord_step);
template <typename TensorDesc, typename TensorCoord>
__host__ __device__ constexpr bool
coordinate_has_valid_offset_assuming_visible_index_is_valid(const TensorDesc& tensor_desc,
const TensorCoord& coord);
template <typename TensorDesc, typename TensorCoord>
__host__ __device__ constexpr bool coordinate_has_valid_offset(const TensorDesc& tensor_desc,
const TensorCoord& coord);
// Transforms: Tuple<transforms...>
// LowerDimensionIdss : Tuple<Sequence<...>, ...>
// UpperDimensionIdss : Tuple<Sequence<...>, ...>
// VisibleDimensionIds> : Sequence<...>
template <typename Transforms,
typename LowerDimensionIdss,
typename UpperDimensionIdss,
typename VisibleDimensionIds>
struct DynamicTensorDescriptor_v2
{
// private:
__host__ __device__ static constexpr index_t GetNumOfTransform() { return Transforms::Size(); }
__host__ __device__ static constexpr index_t GetNumOfVisibleDimension()
{
return VisibleDimensionIds::Size();
}
__host__ __device__ static constexpr index_t GetNumOfHiddenDimension()
{
constexpr auto all_low_dim_ids =
unpack([](auto&&... xs) constexpr { return merge_sequences(xs...); },
LowerDimensionIdss{});
constexpr auto all_up_dim_ids =
unpack([](auto&&... xs) constexpr { return merge_sequences(xs...); },
UpperDimensionIdss{});
constexpr auto all_dim_ids = merge_sequences(all_low_dim_ids, all_up_dim_ids);
using unique_sort_all_dim_ids = typename sequence_unique_sort<decltype(all_dim_ids),
math::less<index_t>,
math::equal<index_t>>::type;
return unique_sort_all_dim_ids::Size();
}
constexpr static index_t ntransform_ = GetNumOfTransform();
constexpr static index_t ndim_visible_ = GetNumOfVisibleDimension();
constexpr static index_t ndim_hidden_ = GetNumOfHiddenDimension();
using VisibleIndex = MultiIndex<ndim_visible_>;
using HiddenIndex = MultiIndex<ndim_hidden_>;
using Coordinate = DynamicTensorCoordinate_v2<ndim_hidden_, VisibleDimensionIds>;
using CoordinateStep = DynamicTensorCoordinateStep_v2<ntransform_, ndim_visible_>;
// public:
__host__ __device__ explicit constexpr DynamicTensorDescriptor_v2(const Transforms& transforms,
index_t element_space_size)
: transforms_{transforms},
hidden_lengths_{InitializeHiddenLengths(transforms_, element_space_size)},
visible_lengths_{hidden_lengths_}
{
static_assert(Transforms::Size() == ntransform_ &&
LowerDimensionIdss::Size() == ntransform_ &&
UpperDimensionIdss::Size() == ntransform_,
"wrong! inconsistent # of transformations");
// TODO check dependency of dimensions is valid
}
__host__ __device__ explicit constexpr DynamicTensorDescriptor_v2()
: DynamicTensorDescriptor_v2(Transforms{}, index_t{0})
{
}
__host__ __device__ static constexpr index_t GetNumOfDimension()
{
return GetNumOfVisibleDimension();
}
template <index_t IDim>
__host__ __device__ constexpr index_t GetLength(Number<IDim>) const
{
return visible_lengths_[Number<IDim>{}];
}
__host__ __device__ constexpr const auto& GetLengths() const { return visible_lengths_; }
// maybe this result should be saved as a member variable
__host__ __device__ constexpr index_t GetElementSize() const
{
return container_reduce(GetLengths(), math::multiplies<index_t>{}, index_t{1});
}
__host__ __device__ constexpr index_t GetElementSpaceSize() const
{
return hidden_lengths_[Number<0>{}];
}
template <typename Idx>
__host__ __device__ constexpr index_t CalculateOffset(const Idx& idx) const
{
static_assert(Idx::Size() == GetNumOfDimension(), "wrong! inconsistent # of dimension");
#if 0 // debug
return make_dynamic_tensor_coordinate_v2(*this, idx).GetOffset();
#else
constexpr index_t ntransform = GetNumOfTransform();
constexpr index_t ndim_hidden = GetNumOfHiddenDimension();
constexpr index_t ndim_visible = GetNumOfVisibleDimension();
constexpr auto visible_dim_ids = GetVisibleDimensionIds();
MultiIndex<ndim_hidden> idx_hidden;
// initialize visible index
auto idx_hidden_pick_visible = pick_container_element(idx_hidden, visible_dim_ids);
idx_hidden_pick_visible = idx;
// calculate hidden index
static_for<ntransform - 1, -1, -1>{}([this, &idx_hidden](auto itran) {
const auto& tran = this->GetTransforms().At(itran);
constexpr auto dims_low = GetLowerDimensionIdss().At(itran);
constexpr auto dims_up = GetUpperDimensionIdss().At(itran);
const auto idx_up = pick_container_element(idx_hidden, dims_up);
auto idx_low = pick_container_element(idx_hidden, dims_low);
tran.CalculateLowerIndex(idx_low, idx_up);
});
return idx_hidden[Number<0>{}];
#endif
}
// private:
__host__ __device__ constexpr const auto& GetTransforms() const { return transforms_; }
__host__ __device__ static constexpr auto GetLowerDimensionIdss()
{
return LowerDimensionIdss{};
}
__host__ __device__ static constexpr auto GetUpperDimensionIdss()
{
return UpperDimensionIdss{};
}
__host__ __device__ static constexpr auto GetVisibleDimensionIds()
{
return VisibleDimensionIds{};
}
__host__ __device__ static constexpr auto InitializeHiddenLengths(const Transforms& transforms,
index_t element_space_size)
{
// zero initialization
HiddenIndex hidden_lengths = make_zero_multi_index<ndim_hidden_>();
// this is the orignal tensor element space size
hidden_lengths(Number<0>{}) = element_space_size;
// lengths for all other hidden dimensions
static_for<0, ntransform_, 1>{}([&transforms, &hidden_lengths](auto itran) {
const auto& tran = transforms.At(itran);
constexpr auto up_dim_ids = UpperDimensionIdss{}.At(itran);
// lengths_hidden_pick_up contains a reference to lengths_hidden
auto hidden_lengths_pick_up = pick_container_element(hidden_lengths, up_dim_ids);
hidden_lengths_pick_up = tran.GetUpperLengths();
});
return hidden_lengths;
}
// private member variables
const Transforms transforms_;
// TODO maybe hidden_lengths_ should use reference_wrapper (reference to transforms_'s member
// variable lengths_) to save space on stack?
const HiddenIndex hidden_lengths_;
// visible_lenths_ contains a reference to hidden_lengths_
const ContainerElementPicker<const HiddenIndex, VisibleDimensionIds> visible_lengths_;
#if 0
// friend class
friend Coordinate;
friend CoordinateStep;
// friend function to transform tensor descriptor
template <typename OldTensorDescriptor,
typename NewTransforms,
typename NewLowerDimensionOldVisibleIdss,
typename NewUpperDimensionNewVisibleIdss>
__host__ __device__ friend constexpr auto
transform_dynamic_tensor_descriptor_v2(const OldTensorDescriptor& /* old_tensor_desc */,
const NewTransforms& /* new_transforms */,
NewLowerDimensionOldVisibleIdss,
NewUpperDimensionNewVisibleIdss);
// friend functions for making and moving tensor coordinate
template <typename VisibleIndex>
__host__ __device__ friend constexpr Coordinate
make_dynamic_tensor_coordinate_v2(const DynamicTensorDescriptor_v2& /* tensor_desc */,
const VisibleIndex& /* idx_visible */);
template <typename VisibleIndex>
__host__ __device__ friend constexpr CoordinateStep
make_dynamic_tensor_coordinate_step_v2(const DynamicTensorDescriptor_v2& /* tensor_desc */,
const VisibleIndex& /* idx_diff_visible */);
__host__ __device__ friend void
move_dynamic_tensor_coordinate_v2(const DynamicTensorDescriptor_v2& /* tensor_desc */,
Coordinate& /* coord */,
const CoordinateStep& /* coord_step */);
// friend functions for valid offset check
__host__ __device__ friend constexpr bool
coordinate_has_valid_offset_assuming_visible_index_is_valid(
const DynamicTensorDescriptor_v2& tensor_desc, const Coordinate& coord);
__host__ __device__ friend constexpr bool
coordinate_has_valid_offset(const DynamicTensorDescriptor_v2& tensor_desc,
const Coordinate& coord);
#endif
};
template <index_t NDimHidden, typename VisibleDimensionIds>
struct DynamicTensorCoordinate_v2
{
// private:
static constexpr index_t ndim_visible_ = VisibleDimensionIds::Size();
using HiddenIndex = MultiIndex<NDimHidden>;
using VisibleIndex = MultiIndex<ndim_visible_>;
// public:
__host__ __device__ explicit constexpr DynamicTensorCoordinate_v2(const HiddenIndex& idx_hidden)
: idx_hidden_{idx_hidden}, idx_visible_{idx_hidden_}
{
}
__host__ __device__ constexpr const auto& GetIndex() const { return GetVisibleIndex(); }
__host__ __device__ constexpr index_t GetOffset() const { return idx_hidden_[Number<0>{}]; }
// private:
__host__ __device__ constexpr const auto& GetHiddenIndex() const { return idx_hidden_; }
__host__ __device__ auto& GetHiddenIndex() { return idx_hidden_; }
__host__ __device__ constexpr const auto& GetVisibleIndex() const { return idx_visible_; }
__host__ __device__ auto& GetVisibleIndex() { return idx_visible_; }
// private member variables
HiddenIndex idx_hidden_;
// idx_visible_ contains a reference to idx_hidden_
ContainerElementPicker<HiddenIndex, VisibleDimensionIds> idx_visible_;
#if 0
// friend functions for making and updating tensor coordinate
template <typename TensorDesc>
__host__ __device__ friend constexpr DynamicTensorCoordinate_v2
make_dynamic_tensor_coordinate_v2(const TensorDesc& /* tensor_desc */,
const VisibleIndex& /* idx_visible */);
template <typename TensorDesc, typename TensorCoordStep>
__host__ __device__ friend void move_dynamic_tensor_coordinate_v2(
const TensorDesc& /* tensor_desc */,
DynamicTensorCoordinate_v2& /* coord */,
const TensorCoordStep& /* coord_step */);
#endif
};
template <index_t NTransform, index_t NDimVisible>
struct DynamicTensorCoordinateStep_v2
{
// private:
using VisibleIndex = MultiIndex<NDimVisible>;
// public:
__host__ __device__ explicit constexpr DynamicTensorCoordinateStep_v2(
const VisibleIndex& idx_diff_visible, const Array<bool, NTransform>& do_transforms)
: idx_diff_visible_{idx_diff_visible}, do_transforms_{do_transforms}
{
}
__host__ __device__ constexpr const auto& GetIndexDiff() const { return GetVisibleIndexDiff(); }
// private:
__host__ __device__ constexpr const auto& GetVisibleIndexDiff() const
{
return idx_diff_visible_;
}
// private:
const VisibleIndex idx_diff_visible_;
const Array<bool, NTransform> do_transforms_;
#if 0
// friend functions for updating tensor coordinate
template <typename TensorDesc>
__host__ __device__ friend constexpr DynamicTensorCoordinateStep_v2
make_dynamic_tensor_coordinate_step_v2(const TensorDesc& /* tensor_desc */,
const VisibleIndex& /* idx_visible */);
template <typename TensorDesc, index_t NDimHidden, typename VisibleDimensionIds>
__host__ __device__ friend void move_dynamic_tensor_coordinate_v2(
const TensorDesc& /* tensor_desc */,
DynamicTensorCoordinate_v2<NDimHidden, VisibleDimensionIds>& /* coord */,
const DynamicTensorCoordinateStep_v2& /* coord_step */);
#endif
};
// TODO: How to fix this? It uses an struct instead of lambda because lambda
// doesn't have constructor, and to put it outside the scope where it is used
// (transform_dynamic_tensor_descriptor_v2) because template cannot be defined inside a function
// template
template <typename NewTransforms>
struct lambda_get_up_dim_num
{
template <typename I>
__host__ __device__ constexpr auto operator()(I) const
{
using Tran = remove_reference_t<decltype(NewTransforms{}.At(I{}))>;
return Number<Tran::GetNumOfUpperDimension()>{};
}
};
template <typename OldTensorDescriptor,
typename NewTransforms,
typename NewLowerDimensionOldVisibleIdss,
typename NewUpperDimensionNewVisibleIdss>
__host__ __device__ constexpr auto
transform_dynamic_tensor_descriptor_v2(const OldTensorDescriptor& old_tensor_desc,
const NewTransforms& new_transforms,
NewLowerDimensionOldVisibleIdss,
NewUpperDimensionNewVisibleIdss)
{
// lower dimension's hidden idss
// convert lower dimension visible idss (tuple of sequences) to hidden idss (tuple of
// sequences)
constexpr auto low_dim_hidden_idss = transform_tuples(
// convert lower dimension visible ids (a sequence) to hidden ids (a sequence)
[](auto low_dim_visible_ids) constexpr {
return transform_sequences(
// convert lower dimension visible id to hidden id
[](auto low_dim_visible_id) constexpr {
return OldTensorDescriptor::GetVisibleDimensionIds()[low_dim_visible_id];
},
low_dim_visible_ids);
},
NewLowerDimensionOldVisibleIdss{});
constexpr index_t num_new_transform = NewTransforms::Size();
// upper dimension's hidden idss
constexpr index_t old_hidden_dim_number = OldTensorDescriptor::GetNumOfHiddenDimension();
constexpr auto up_dim_numbers =
generate_sequence(lambda_get_up_dim_num<NewTransforms>{}, Number<num_new_transform>{});
constexpr auto up_dim_numbers_scan = merge_sequences(
Sequence<0>{}, inclusive_scan_sequence(up_dim_numbers, math::plus<index_t>{}, Number<0>{}));
constexpr auto up_dim_hidden_idss =
generate_tuple([ old_hidden_dim_number, up_dim_numbers_scan ](auto i) constexpr {
return
typename arithmetic_sequence_gen<old_hidden_dim_number + up_dim_numbers_scan[i],
old_hidden_dim_number + up_dim_numbers_scan[i + 1],
1>::type{};
},
Number<num_new_transform>{});
// new visible dimension's hidden ids
constexpr auto unordered_new_visible_dim_hidden_ids =
unpack([](auto... xs) { return merge_sequences(xs...); }, up_dim_hidden_idss);
constexpr auto new_visible_dim_unordered2ordered = unpack(
[](auto... xs) { return merge_sequences(xs...); }, NewUpperDimensionNewVisibleIdss{});
constexpr auto new_visible_dim_hidden_ids =
unordered_new_visible_dim_hidden_ids.ReorderGivenOld2New(new_visible_dim_unordered2ordered);
// put everything together
const auto all_transforms = container_cat(old_tensor_desc.GetTransforms(), new_transforms);
constexpr auto all_low_dim_hidden_idss =
container_cat(OldTensorDescriptor::GetLowerDimensionIdss(), low_dim_hidden_idss);
constexpr auto all_up_dim_hidden_idss =
container_cat(OldTensorDescriptor::GetUpperDimensionIdss(), up_dim_hidden_idss);
return DynamicTensorDescriptor_v2<decltype(all_transforms),
decltype(all_low_dim_hidden_idss),
decltype(all_up_dim_hidden_idss),
decltype(new_visible_dim_hidden_ids)>{
all_transforms, old_tensor_desc.GetElementSpaceSize()};
}
template <typename TensorDesc, typename VisibleIndex>
__host__ __device__ constexpr auto
make_dynamic_tensor_coordinate_v2(const TensorDesc& tensor_desc, const VisibleIndex& idx_visible)
{
static_assert(TensorDesc::GetNumOfDimension() == VisibleIndex::Size(),
"wrong! # of dimension inconsistent");
constexpr index_t ntransform = TensorDesc::GetNumOfTransform();
constexpr index_t ndim_hidden = TensorDesc::GetNumOfHiddenDimension();
constexpr index_t ndim_visible = TensorDesc::GetNumOfVisibleDimension();
constexpr auto visible_dim_ids = TensorDesc::GetVisibleDimensionIds();
MultiIndex<ndim_hidden> idx_hidden;
// initialize visible index
auto idx_hidden_pick_visible = pick_container_element(idx_hidden, visible_dim_ids);
idx_hidden_pick_visible = idx_visible;
// calculate hidden index
static_for<ntransform, 0, -1>{}([&tensor_desc, &idx_hidden](auto itran_p1) {
auto itran = itran_p1 - Number<1>{};
const auto& tran = tensor_desc.GetTransforms().At(itran);
constexpr auto dims_low = TensorDesc::GetLowerDimensionIdss().At(itran);
constexpr auto dims_up = TensorDesc::GetUpperDimensionIdss().At(itran);
const auto idx_up = pick_container_element(idx_hidden, dims_up);
auto idx_low = pick_container_element(idx_hidden, dims_low);
tran.CalculateLowerIndex(idx_low, idx_up);
});
// better to use std::move?
return DynamicTensorCoordinate_v2<ndim_hidden, decltype(visible_dim_ids)>{idx_hidden};
}
template <typename TensorDesc, typename VisibleIndex>
__host__ __device__ constexpr auto
make_dynamic_tensor_coordinate_step_v2(const TensorDesc&, const VisibleIndex& idx_diff_visible)
{
static_assert(TensorDesc::GetNumOfDimension() == VisibleIndex::Size(),
"wrong! # of dimension inconsistent");
constexpr index_t ntransform = TensorDesc::GetNumOfTransform();
constexpr index_t ndim_hidden = TensorDesc::GetNumOfHiddenDimension();
constexpr index_t ndim_visible = TensorDesc::GetNumOfVisibleDimension();
constexpr auto visible_dim_ids = TensorDesc::GetVisibleDimensionIds();
Array<bool, ntransform> do_transforms{false};
Array<bool, ndim_hidden> non_zero_diff{false};
auto non_zero_diff_pick_visible = pick_container_element(non_zero_diff, visible_dim_ids);
static_for<0, ndim_visible, 1>{}([&non_zero_diff_pick_visible, &idx_diff_visible](auto i) {
non_zero_diff_pick_visible(i) = (idx_diff_visible[i] != 0);
});
static_for<ntransform - 1, -1, -1>{}([&do_transforms, &non_zero_diff](auto itran) {
constexpr auto dims_low = TensorDesc::GetLowerDimensionIdss().At(itran);
constexpr auto dims_up = TensorDesc::GetUpperDimensionIdss().At(itran);
const auto non_zero_diff_pick_up = pick_container_element(non_zero_diff, dims_up);
auto non_zero_diff_pick_low = pick_container_element(non_zero_diff, dims_low);
// if any of upper index diff components is non-zero, then
// 1) Need to do this transform
// 2) all components of lower index diff will assume to be non-zero and need to be
// computed
const bool idx_diff_up_has_non_zero =
container_reduce(non_zero_diff_pick_up, [](auto a, auto b) { return a or b; }, false);
do_transforms(itran) = idx_diff_up_has_non_zero;
static_for<0, dims_low.Size(), 1>{}(
[&non_zero_diff_pick_low, &idx_diff_up_has_non_zero](auto i) {
non_zero_diff_pick_low(i) = idx_diff_up_has_non_zero;
});
});
return DynamicTensorCoordinateStep_v2<ntransform, ndim_visible>{idx_diff_visible,
do_transforms};
}
template <typename TensorDesc, typename TensorCoord, typename TensorCoordStep>
__host__ __device__ void move_dynamic_tensor_coordinate_v2(const TensorDesc& tensor_desc,
TensorCoord& coord,
const TensorCoordStep& coord_step)
{
constexpr index_t ndim_hidden = TensorDesc::GetNumOfHiddenDimension();
constexpr index_t ndim_visible = TensorDesc::GetNumOfVisibleDimension();
constexpr index_t ntransform = TensorDesc::GetNumOfTransform();
using HiddenIndex = MultiIndex<ndim_hidden>;
// this is what needs to be calculated
auto idx_diff_hidden = make_zero_multi_index<ndim_hidden>();
// initialize visible index diff
// idx_diff_hidden_pick_visible contains reference to idx_diff_hidden
auto idx_diff_hidden_pick_visible =
pick_container_element(idx_diff_hidden, TensorDesc::GetVisibleDimensionIds());
idx_diff_hidden_pick_visible = coord_step.GetVisibleIndexDiff();
// this is what needs to be updated
auto& idx_hidden = coord.GetHiddenIndex();
// update visible index
auto idx_hidden_pick_visible =
pick_container_element(idx_hidden, TensorDesc::GetVisibleDimensionIds());
idx_hidden_pick_visible += coord_step.GetIndexDiff();
// update rest of hidden index
static_for<ntransform - 1, -1, -1>{}([&](auto itran) {
if(coord_step.do_transforms_[itran])
{
const auto& tran = tensor_desc.GetTransforms().At(itran);
constexpr auto dims_low = TensorDesc::GetLowerDimensionIdss().At(itran);
constexpr auto dims_up = TensorDesc::GetUpperDimensionIdss().At(itran);
// this const is for ContainerElementPicker, Array itself may not be const
const auto idx_up = pick_container_element(idx_hidden, dims_up);
auto idx_low = pick_container_element(idx_hidden, dims_low);
const auto idx_diff_up = pick_container_element(idx_diff_hidden, dims_up);
auto idx_diff_low = pick_container_element(idx_diff_hidden, dims_low);
tran.CalculateLowerIndexDiff(idx_diff_low, idx_diff_up, idx_low, idx_up);
// update idx_low
idx_low += idx_diff_low;
}
});
}
template <typename TensorDesc, typename TensorCoord>
__host__ __device__ constexpr bool
coordinate_has_valid_offset_assuming_visible_index_is_valid(const TensorDesc& tensor_desc,
const TensorCoord& coord)
{
bool valid = true;
constexpr index_t ntransform = TensorDesc::GetNumOfTransform();
const auto& idx_hidden = coord.GetHiddenIndex();
static_for<ntransform - 1, -1, -1>{}([&tensor_desc, &idx_hidden, &valid](auto itran) {
const auto tran = tensor_desc.GetTransforms().At(itran);
// check validity, only if current transformation does not always has a valid mapping
if constexpr(!decltype(tran)::IsValidUpperIndexAlwaysMappedToValidLowerIndex())
{
const auto idx_up =
pick_container_element(idx_hidden, TensorDesc::GetUpperDimensionIdss().At(itran));
valid = valid && tran.IsValidUpperIndexMappedToValidLowerIndex(idx_up);
}
});
return valid;
}
template <typename TensorDesc, typename TensorCoord>
__host__ __device__ constexpr bool coordinate_has_valid_offset(const TensorDesc& tensor_desc,
const TensorCoord& coord)
{
// check visible index
const auto& idx_visible = coord.GetVisibleIndex();
bool is_visible_index_valid = true;
static_for<0, TensorDesc::GetNumOfDimension(), 1>{}(
[&is_visible_index_valid, &idx_visible, &tensor_desc](auto i) {
is_visible_index_valid =
is_visible_index_valid &&
(idx_visible[i] >= 0 && idx_visible[i] < tensor_desc.GetLength(i));
});
// check other hidden index
return is_visible_index_valid &&
coordinate_has_valid_offset_assuming_visible_index_is_valid(tensor_desc, coord);
}
} // namespace ck
#endif
...@@ -28,11 +28,11 @@ void device_dummy_dynamic_transform_v1(InDesc, ...@@ -28,11 +28,11 @@ void device_dummy_dynamic_transform_v1(InDesc,
using TDevice = typename conditional<is_same<half_float::half, T>::value, half_t, T>::type; using TDevice = typename conditional<is_same<half_float::half, T>::value, half_t, T>::type;
const auto in_nchw_desc = make_dynamic_native_tensor_descriptor( const auto in_nchw_desc = make_dynamic_native_tensor_descriptor_v1(
to_multi_index(InDesc::GetLengths()), to_multi_index(InDesc::GetStrides())); to_multi_index(InDesc::GetLengths()), to_multi_index(InDesc::GetStrides()));
const auto wei_kcyx_desc = make_dynamic_native_tensor_descriptor( const auto wei_kcyx_desc = make_dynamic_native_tensor_descriptor_v1(
to_multi_index(WeiDesc::GetLengths()), to_multi_index(WeiDesc::GetStrides())); to_multi_index(WeiDesc::GetLengths()), to_multi_index(WeiDesc::GetStrides()));
const auto out_nkhw_desc = make_dynamic_native_tensor_descriptor( const auto out_nkhw_desc = make_dynamic_native_tensor_descriptor_v1(
to_multi_index(OutDesc::GetLengths()), to_multi_index(OutDesc::GetStrides())); to_multi_index(OutDesc::GetLengths()), to_multi_index(OutDesc::GetStrides()));
const auto conv_strides = to_multi_index(ConvStrides{}); const auto conv_strides = to_multi_index(ConvStrides{});
...@@ -52,7 +52,7 @@ void device_dummy_dynamic_transform_v1(InDesc, ...@@ -52,7 +52,7 @@ void device_dummy_dynamic_transform_v1(InDesc,
const auto in_gemmk_gemmn_global_desc = tensor_descs.At(Number<0>{}); const auto in_gemmk_gemmn_global_desc = tensor_descs.At(Number<0>{});
auto in_gemmk_gemmn_coord = auto in_gemmk_gemmn_coord =
make_dynamic_tensor_coordinate<2>(in_gemmk_gemmn_global_desc, make_multi_index(0, 0)); make_dynamic_tensor_coordinate(in_gemmk_gemmn_global_desc, make_multi_index(0, 0));
for(index_t iter = 0; iter < 10; ++iter) for(index_t iter = 0; iter < 10; ++iter)
{ {
...@@ -112,9 +112,9 @@ void device_dummy_dynamic_transform_v1(InDesc, ...@@ -112,9 +112,9 @@ void device_dummy_dynamic_transform_v1(InDesc,
index_t* const, index_t* const,
float* const, float* const,
float* const, float* const,
const DynamicNativeTensorDescriptor<4>, const DynamicNativeTensorDescriptor_v1<4>,
const DynamicNativeTensorDescriptor<4>, const DynamicNativeTensorDescriptor_v1<4>,
const DynamicNativeTensorDescriptor<4>, const DynamicNativeTensorDescriptor_v1<4>,
const MultiIndex<2>, const MultiIndex<2>,
const MultiIndex<2>, const MultiIndex<2>,
const MultiIndex<2>, const MultiIndex<2>,
......
...@@ -28,11 +28,11 @@ void device_dummy_dynamic_transform_v2(InDesc, ...@@ -28,11 +28,11 @@ void device_dummy_dynamic_transform_v2(InDesc,
using TDevice = typename conditional<is_same<half_float::half, T>::value, half_t, T>::type; using TDevice = typename conditional<is_same<half_float::half, T>::value, half_t, T>::type;
const auto in_nchw_desc = make_dynamic_native_tensor_descriptor_v2<4>( const auto in_nchw_desc = make_dynamic_native_tensor_descriptor<4>(
to_multi_index(InDesc::GetLengths()), to_multi_index(InDesc::GetStrides())); to_multi_index(InDesc::GetLengths()), to_multi_index(InDesc::GetStrides()));
const auto wei_kcyx_desc = make_dynamic_native_tensor_descriptor_v2<4>( const auto wei_kcyx_desc = make_dynamic_native_tensor_descriptor<4>(
to_multi_index(WeiDesc::GetLengths()), to_multi_index(WeiDesc::GetStrides())); to_multi_index(WeiDesc::GetLengths()), to_multi_index(WeiDesc::GetStrides()));
const auto out_nkhw_desc = make_dynamic_native_tensor_descriptor_v2<4>( const auto out_nkhw_desc = make_dynamic_native_tensor_descriptor<4>(
to_multi_index(OutDesc::GetLengths()), to_multi_index(OutDesc::GetStrides())); to_multi_index(OutDesc::GetLengths()), to_multi_index(OutDesc::GetStrides()));
const auto conv_strides = to_multi_index(ConvStrides{}); const auto conv_strides = to_multi_index(ConvStrides{});
...@@ -52,12 +52,11 @@ void device_dummy_dynamic_transform_v2(InDesc, ...@@ -52,12 +52,11 @@ void device_dummy_dynamic_transform_v2(InDesc,
// test on cpu // test on cpu
{ {
auto in_gemmk_gemmn_gemmkpack_coord = make_dynamic_tensor_coordinate_v2( auto in_gemmk_gemmn_gemmkpack_coord = make_dynamic_tensor_coordinate(
in_gemmk_gemmn_gemmkpack_global_desc, make_multi_index(0, 0, 0)); in_gemmk_gemmn_gemmkpack_global_desc, make_multi_index(0, 0, 0));
const auto in_gemmk_gemmn_gemmkpack_coord_step_0_0_1 = const auto in_gemmk_gemmn_gemmkpack_coord_step_0_0_1 = make_dynamic_tensor_coordinate_step(
make_dynamic_tensor_coordinate_step_v2(in_gemmk_gemmn_gemmkpack_global_desc, in_gemmk_gemmn_gemmkpack_global_desc, make_multi_index(0, 0, 1));
make_multi_index(0, 0, 1));
print_array_v2("do_tansforms 0 0 1: ", print_array_v2("do_tansforms 0 0 1: ",
in_gemmk_gemmn_gemmkpack_coord_step_0_0_1.do_transforms_); in_gemmk_gemmn_gemmkpack_coord_step_0_0_1.do_transforms_);
...@@ -70,19 +69,18 @@ void device_dummy_dynamic_transform_v2(InDesc, ...@@ -70,19 +69,18 @@ void device_dummy_dynamic_transform_v2(InDesc,
printf("offset: %d\n", in_gemmk_gemmn_gemmkpack_coord.GetOffset()); printf("offset: %d\n", in_gemmk_gemmn_gemmkpack_coord.GetOffset());
printf("\n"); printf("\n");
move_dynamic_tensor_coordinate_v2(in_gemmk_gemmn_gemmkpack_global_desc, move_dynamic_tensor_coordinate(in_gemmk_gemmn_gemmkpack_global_desc,
in_gemmk_gemmn_gemmkpack_coord, in_gemmk_gemmn_gemmkpack_coord,
in_gemmk_gemmn_gemmkpack_coord_step_0_0_1); in_gemmk_gemmn_gemmkpack_coord_step_0_0_1);
} }
} }
{ {
auto in_gemmk_gemmn_gemmkpack_coord = make_dynamic_tensor_coordinate_v2( auto in_gemmk_gemmn_gemmkpack_coord = make_dynamic_tensor_coordinate(
in_gemmk_gemmn_gemmkpack_global_desc, make_multi_index(0, 0, 0)); in_gemmk_gemmn_gemmkpack_global_desc, make_multi_index(0, 0, 0));
const auto in_gemmk_gemmn_gemmkpack_coord_step_0_1_0 = const auto in_gemmk_gemmn_gemmkpack_coord_step_0_1_0 = make_dynamic_tensor_coordinate_step(
make_dynamic_tensor_coordinate_step_v2(in_gemmk_gemmn_gemmkpack_global_desc, in_gemmk_gemmn_gemmkpack_global_desc, make_multi_index(0, 1, 0));
make_multi_index(0, 1, 0));
print_array_v2("do_tansforms 0 1 0: ", print_array_v2("do_tansforms 0 1 0: ",
in_gemmk_gemmn_gemmkpack_coord_step_0_1_0.do_transforms_); in_gemmk_gemmn_gemmkpack_coord_step_0_1_0.do_transforms_);
...@@ -95,19 +93,18 @@ void device_dummy_dynamic_transform_v2(InDesc, ...@@ -95,19 +93,18 @@ void device_dummy_dynamic_transform_v2(InDesc,
printf("offset: %d\n", in_gemmk_gemmn_gemmkpack_coord.GetOffset()); printf("offset: %d\n", in_gemmk_gemmn_gemmkpack_coord.GetOffset());
printf("\n"); printf("\n");
move_dynamic_tensor_coordinate_v2(in_gemmk_gemmn_gemmkpack_global_desc, move_dynamic_tensor_coordinate(in_gemmk_gemmn_gemmkpack_global_desc,
in_gemmk_gemmn_gemmkpack_coord, in_gemmk_gemmn_gemmkpack_coord,
in_gemmk_gemmn_gemmkpack_coord_step_0_1_0); in_gemmk_gemmn_gemmkpack_coord_step_0_1_0);
} }
} }
{ {
auto in_gemmk_gemmn_gemmkpack_coord = make_dynamic_tensor_coordinate_v2( auto in_gemmk_gemmn_gemmkpack_coord = make_dynamic_tensor_coordinate(
in_gemmk_gemmn_gemmkpack_global_desc, make_multi_index(0, 0, 0)); in_gemmk_gemmn_gemmkpack_global_desc, make_multi_index(0, 0, 0));
const auto in_gemmk_gemmn_gemmkpack_coord_step_1_0_0 = const auto in_gemmk_gemmn_gemmkpack_coord_step_1_0_0 = make_dynamic_tensor_coordinate_step(
make_dynamic_tensor_coordinate_step_v2(in_gemmk_gemmn_gemmkpack_global_desc, in_gemmk_gemmn_gemmkpack_global_desc, make_multi_index(1, 0, 0));
make_multi_index(1, 0, 0));
print_array_v2("do_tansforms 1 0 0: ", print_array_v2("do_tansforms 1 0 0: ",
in_gemmk_gemmn_gemmkpack_coord_step_1_0_0.do_transforms_); in_gemmk_gemmn_gemmkpack_coord_step_1_0_0.do_transforms_);
...@@ -120,7 +117,7 @@ void device_dummy_dynamic_transform_v2(InDesc, ...@@ -120,7 +117,7 @@ void device_dummy_dynamic_transform_v2(InDesc,
printf("offset: %d\n", in_gemmk_gemmn_gemmkpack_coord.GetOffset()); printf("offset: %d\n", in_gemmk_gemmn_gemmkpack_coord.GetOffset());
printf("\n"); printf("\n");
move_dynamic_tensor_coordinate_v2(in_gemmk_gemmn_gemmkpack_global_desc, move_dynamic_tensor_coordinate(in_gemmk_gemmn_gemmkpack_global_desc,
in_gemmk_gemmn_gemmkpack_coord, in_gemmk_gemmn_gemmkpack_coord,
in_gemmk_gemmn_gemmkpack_coord_step_1_0_0); in_gemmk_gemmn_gemmkpack_coord_step_1_0_0);
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment