Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
506df423
Commit
506df423
authored
Oct 02, 2020
by
Chao Liu
Browse files
refactor
parent
b6bfde53
Changes
11
Show whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
1008 additions
and
1014 deletions
+1008
-1014
composable_kernel/include/kernel_algorithm/dummy_dynamic_transform_v1.hpp
...l/include/kernel_algorithm/dummy_dynamic_transform_v1.hpp
+4
-4
composable_kernel/include/kernel_algorithm/dummy_dynamic_transform_v2.hpp
...l/include/kernel_algorithm/dummy_dynamic_transform_v2.hpp
+48
-52
composable_kernel/include/tensor_description/dynamic_tensor_coordinate_v1.hpp
...clude/tensor_description/dynamic_tensor_coordinate_v1.hpp
+23
-21
composable_kernel/include/tensor_description/dynamic_tensor_descriptor.hpp
.../include/tensor_description/dynamic_tensor_descriptor.hpp
+495
-224
composable_kernel/include/tensor_description/dynamic_tensor_descriptor_helper.hpp
...e/tensor_description/dynamic_tensor_descriptor_helper.hpp
+34
-17
composable_kernel/include/tensor_description/dynamic_tensor_descriptor_helper_v1.hpp
...ensor_description/dynamic_tensor_descriptor_helper_v1.hpp
+35
-0
composable_kernel/include/tensor_description/dynamic_tensor_descriptor_helper_v2.hpp
...ensor_description/dynamic_tensor_descriptor_helper_v2.hpp
+0
-52
composable_kernel/include/tensor_description/dynamic_tensor_descriptor_v1.hpp
...clude/tensor_description/dynamic_tensor_descriptor_v1.hpp
+341
-0
composable_kernel/include/tensor_description/dynamic_tensor_descriptor_v2.hpp
...clude/tensor_description/dynamic_tensor_descriptor_v2.hpp
+0
-613
driver/include/device_dummy_dynamic_transform_v1.hpp
driver/include/device_dummy_dynamic_transform_v1.hpp
+7
-7
driver/include/device_dummy_dynamic_transform_v2.hpp
driver/include/device_dummy_dynamic_transform_v2.hpp
+21
-24
No files found.
composable_kernel/include/kernel_algorithm/dummy_dynamic_transform_v1.hpp
View file @
506df423
...
...
@@ -2,9 +2,9 @@
#define CK_DUMMY_DYNAMIC_TRANSFORM_V1_HPP
#include "common_header.hpp"
#include "dynamic_tensor_descriptor.hpp"
#include "dynamic_tensor_descriptor_helper.hpp"
#include "dynamic_tensor_coordinate.hpp"
#include "dynamic_tensor_descriptor
_v1
.hpp"
#include "dynamic_tensor_descriptor_helper
_v1
.hpp"
#include "dynamic_tensor_coordinate
_v1
.hpp"
namespace
ck
{
...
...
@@ -565,7 +565,7 @@ struct DummyDynamicTransform_v1
const
index_t
niter
=
p_wei_global
[
10
];
auto
in_gemmk_gemmn_coord
=
make_dynamic_tensor_coordinate
<
2
>
(
in_gemmk_gemmn_global_desc
,
idx
);
make_dynamic_tensor_coordinate
_v1
(
in_gemmk_gemmn_global_desc
,
idx
);
for
(
index_t
iter
=
0
;
iter
<
niter
;
++
iter
)
{
...
...
composable_kernel/include/kernel_algorithm/dummy_dynamic_transform_v2.hpp
View file @
506df423
...
...
@@ -2,16 +2,16 @@
#define CK_DUMMY_DYNAMIC_TRANSFORM_V2_HPP
#include "common_header.hpp"
#include "dynamic_tensor_descriptor
_v2
.hpp"
#include "dynamic_tensor_descriptor_helper
_v2
.hpp"
#include "dynamic_tensor_descriptor.hpp"
#include "dynamic_tensor_descriptor_helper.hpp"
namespace
ck
{
template
<
typename
...
Wei
,
typename
...
In
,
typename
...
Out
>
__host__
__device__
constexpr
auto
map_convolution_into_gemm_fwd_v4r4
(
const
DynamicTensorDescriptor
_v2
<
Wei
...
>&
wei_k_c_y_x_global_desc
,
const
DynamicTensorDescriptor
_v2
<
In
...
>&
in_n_c_hi_wi_global_desc
,
const
DynamicTensorDescriptor
_v2
<
Out
...
>&
out_n_k_ho_wo_global_desc
,
__host__
__device__
constexpr
auto
map_convolution_into_gemm_fwd_v4r4
(
const
DynamicTensorDescriptor
<
Wei
...
>&
wei_k_c_y_x_global_desc
,
const
DynamicTensorDescriptor
<
In
...
>&
in_n_c_hi_wi_global_desc
,
const
DynamicTensorDescriptor
<
Out
...
>&
out_n_k_ho_wo_global_desc
,
const
MultiIndex
<
2
>
conv_strides
,
const
MultiIndex
<
2
>
conv_dilations
,
const
MultiIndex
<
2
>
in_left_pads
,
...
...
@@ -47,8 +47,8 @@ __host__ __device__ constexpr auto map_convolution_into_gemm_fwd_v4r4(
const
index_t
InRightPadW
=
in_right_pads
[
I1
];
// input tensor
const
auto
in_n_c_hip_wip_global_desc
=
transform_dynamic_tensor_descriptor
_v2
(
transform_dynamic_tensor_descriptor
_v2
(
const
auto
in_n_c_hip_wip_global_desc
=
transform_dynamic_tensor_descriptor
(
transform_dynamic_tensor_descriptor
(
in_n_c_hi_wi_global_desc
,
make_tuple
(
DynamicPassThrough
{
N
},
DynamicPassThrough
{
C
},
...
...
@@ -66,7 +66,7 @@ __host__ __device__ constexpr auto map_convolution_into_gemm_fwd_v4r4(
const
index_t
Hip
=
in_n_c_hip_wip_global_desc
.
GetLength
(
I2
);
const
index_t
Wip
=
in_n_c_hip_wip_global_desc
.
GetLength
(
I3
);
const
auto
in_n_c_y_ho_x_wo_global_desc
=
transform_dynamic_tensor_descriptor
_v2
(
const
auto
in_n_c_y_ho_x_wo_global_desc
=
transform_dynamic_tensor_descriptor
(
in_n_c_hip_wip_global_desc
,
make_tuple
(
DynamicPassThrough
{
N
},
...
...
@@ -76,7 +76,7 @@ __host__ __device__ constexpr auto map_convolution_into_gemm_fwd_v4r4(
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{},
Sequence
<
3
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
,
3
>
{},
Sequence
<
4
,
5
>
{}));
const
auto
in_gemmktotal_gemmn_global_desc
=
transform_dynamic_tensor_descriptor
_v2
(
const
auto
in_gemmktotal_gemmn_global_desc
=
transform_dynamic_tensor_descriptor
(
in_n_c_y_ho_x_wo_global_desc
,
make_tuple
(
DynamicMerge
<
3
>
{
make_multi_index
(
C
,
Y
,
X
)},
DynamicMerge
<
3
>
{
make_multi_index
(
N
,
Ho
,
Wo
)}),
...
...
@@ -89,7 +89,7 @@ __host__ __device__ constexpr auto map_convolution_into_gemm_fwd_v4r4(
constexpr
index_t
GemmKPack
=
8
;
const
index_t
gemmk
=
gemmktotal
/
GemmKPack
;
const
auto
in_gemmk_gemmn_gemmkpack_global_desc
=
transform_dynamic_tensor_descriptor
_v2
(
const
auto
in_gemmk_gemmn_gemmkpack_global_desc
=
transform_dynamic_tensor_descriptor
(
in_gemmktotal_gemmn_global_desc
,
make_tuple
(
DynamicUnMerge
<
2
>
{
make_multi_index
(
gemmk
,
GemmKPack
)},
DynamicPassThrough
{
gemmn
}),
...
...
@@ -105,9 +105,9 @@ __host__ __device__ constexpr auto map_convolution_into_gemm_fwd_v4r4(
#if 0
template <typename... Wei, typename... In, typename... Out>
__host__ __device__ constexpr auto map_convolution_into_gemm_bwd_v4r1(
const DynamicTensorDescriptor
_v2
<Wei...>& wei_k_c_y_x_global_desc,
const DynamicTensorDescriptor
_v2
<In...>& in_n_c_hi_wi_global_desc,
const DynamicTensorDescriptor
_v2
<Out...>& out_n_k_ho_wo_global_desc,
const DynamicTensorDescriptor<Wei...>& wei_k_c_y_x_global_desc,
const DynamicTensorDescriptor<In...>& in_n_c_hi_wi_global_desc,
const DynamicTensorDescriptor<Out...>& out_n_k_ho_wo_global_desc,
const MultiIndex<2> conv_strides,
const MultiIndex<2> conv_dilations,
const MultiIndex<2> in_left_pads,
...
...
@@ -148,7 +148,7 @@ __host__ __device__ constexpr auto map_convolution_into_gemm_bwd_v4r1(
constexpr bool out_skip_out_of_bound_check = true;
#endif
constexpr
auto
out_n_k_ydot_htilda_xdot_wtilda_global_desc
=
transform_tensor_descriptor
_v2
(
constexpr
auto
out_n_k_ydot_htilda_xdot_wtilda_global_desc
=
transform_tensor_descriptor
(
out_n_k_ho_wo_global_desc
,
make_tuple
(
PassThrough
{
N
},
PassThrough
{
K
},
...
...
@@ -158,7 +158,7 @@ __host__ __device__ constexpr auto map_convolution_into_gemm_bwd_v4r1(
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
,
3
>
{},
Sequence
<
4
,
5
>
{}));
constexpr
auto
out_n_k_ydot_htildaslice_xdot_wtildaslice_global_desc
=
transform_tensor_descriptor
_v2
(
transform_tensor_descriptor
(
out_n_k_ydot_htilda_xdot_wtilda_global_desc
,
make_tuple
(
PassThrough
{
N
},
PassThrough
{
K
},
...
...
@@ -230,15 +230,14 @@ struct DummyDynamicTransform_v2_1
const
index_t
niter
=
p_wei_global
[
10
];
auto
in_gemmk_gemmn_coord
=
make_dynamic_tensor_coordinate_v2
(
in_gemmk_gemmn_global_desc
,
idx
);
auto
in_gemmk_gemmn_coord
=
make_dynamic_tensor_coordinate
(
in_gemmk_gemmn_global_desc
,
idx
);
const
auto
in_gemmk_gemmn_coord_step
=
make_dynamic_tensor_coordinate_step_v2
(
in_gemmk_gemmn_global_desc
,
make_multi_index
(
1
,
0
));
const
auto
in_gemmk_gemmn_coord_step
=
make_dynamic_tensor_coordinate_step
(
in_gemmk_gemmn_global_desc
,
make_multi_index
(
1
,
0
));
for
(
index_t
iter
=
0
;
iter
<
niter
;
++
iter
)
{
move_dynamic_tensor_coordinate
_v2
(
move_dynamic_tensor_coordinate
(
in_gemmk_gemmn_global_desc
,
in_gemmk_gemmn_coord
,
in_gemmk_gemmn_coord_step
);
// write
...
...
@@ -308,7 +307,7 @@ struct DummyDynamicTransform_v2_1
const
index_t
InRightPadW
=
in_right_pads
[
i1
];
#if 0
const auto in_n_c_hip_wip_global_desc = transform_dynamic_tensor_descriptor
_v2
(
const auto in_n_c_hip_wip_global_desc = transform_dynamic_tensor_descriptor(
move(in_n_c_hi_wi_global_desc),
make_tuple(DynamicPassThrough{N},
DynamicPassThrough{C},
...
...
@@ -317,7 +316,7 @@ struct DummyDynamicTransform_v2_1
make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2>{}, Sequence<3>{}),
make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2>{}, Sequence<3>{}));
#elif
0
const
auto
in_n_c_hip_wip_global_desc
=
transform_dynamic_tensor_descriptor
_v2
(
const
auto
in_n_c_hip_wip_global_desc
=
transform_dynamic_tensor_descriptor
(
move
(
in_n_c_hi_wi_global_desc
),
make_tuple
(
DynamicPassThrough
{
N
},
DynamicPassThrough
{
C
},
...
...
@@ -326,8 +325,8 @@ struct DummyDynamicTransform_v2_1
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{},
Sequence
<
3
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{},
Sequence
<
3
>
{}));
#else
const
auto
in_n_c_hip_wip_global_desc
=
transform_dynamic_tensor_descriptor
_v2
(
transform_dynamic_tensor_descriptor
_v2
(
const
auto
in_n_c_hip_wip_global_desc
=
transform_dynamic_tensor_descriptor
(
transform_dynamic_tensor_descriptor
(
move
(
in_n_c_hi_wi_global_desc
),
make_tuple
(
DynamicPassThrough
{
N
},
DynamicPassThrough
{
C
},
...
...
@@ -351,14 +350,14 @@ struct DummyDynamicTransform_v2_1
#if 1
const
index_t
niter
=
p_wei_global
[
10
];
auto
in_coord
=
make_dynamic_tensor_coordinate
_v2
(
in_n_c_hip_wip_global_desc
,
idx
);
auto
in_coord
=
make_dynamic_tensor_coordinate
(
in_n_c_hip_wip_global_desc
,
idx
);
const
auto
in_coord_step
=
make_dynamic_tensor_coordinate_step
_v2
(
const
auto
in_coord_step
=
make_dynamic_tensor_coordinate_step
(
in_n_c_hip_wip_global_desc
,
make_multi_index
(
1
,
0
,
0
,
0
));
for
(
index_t
iter
=
0
;
iter
<
niter
;
++
iter
)
{
move_dynamic_tensor_coordinate
_v2
(
in_n_c_hip_wip_global_desc
,
in_coord
,
in_coord_step
);
move_dynamic_tensor_coordinate
(
in_n_c_hip_wip_global_desc
,
in_coord
,
in_coord_step
);
// write
float
value
=
1
;
...
...
@@ -381,7 +380,7 @@ struct DummyDynamicTransform_v2_1
}
#else
// write
// auto in_coord = make_dynamic_tensor_coordinate
_v2
(in_n_c_hi_wi_global_desc, idx);
// auto in_coord = make_dynamic_tensor_coordinate(in_n_c_hi_wi_global_desc, idx);
p_out_global
[
in_n_c_hip_wip_global_desc
.
CalculateOffset
(
idx
)]
=
1
;
#endif
...
...
@@ -429,24 +428,21 @@ struct DummyDynamicTransform_v2_fwd_v4r4
const
index_t
niter
=
p_wei_global
[
10
];
auto
in_gemmk_gemmn_gemmkpack_coord
=
make_dynamic_tensor_coordinate
_v2
(
in_gemmk_gemmn_gemmkpack_global_desc
,
idx
);
make_dynamic_tensor_coordinate
(
in_gemmk_gemmn_gemmkpack_global_desc
,
idx
);
const
auto
in_gemmk_gemmn_gemmkpack_coord_step_0_0_1
=
make_dynamic_tensor_coordinate_step_v2
(
in_gemmk_gemmn_gemmkpack_global_desc
,
make_multi_index
(
0
,
0
,
1
));
const
auto
in_gemmk_gemmn_gemmkpack_coord_step_0_0_1
=
make_dynamic_tensor_coordinate_step
(
in_gemmk_gemmn_gemmkpack_global_desc
,
make_multi_index
(
0
,
0
,
1
));
const
auto
in_gemmk_gemmn_gemmkpack_coord_step_0_1_0
=
make_dynamic_tensor_coordinate_step_v2
(
in_gemmk_gemmn_gemmkpack_global_desc
,
make_multi_index
(
0
,
1
,
0
));
const
auto
in_gemmk_gemmn_gemmkpack_coord_step_0_1_0
=
make_dynamic_tensor_coordinate_step
(
in_gemmk_gemmn_gemmkpack_global_desc
,
make_multi_index
(
0
,
1
,
0
));
const
auto
in_gemmk_gemmn_gemmkpack_coord_step_1_0_0
=
make_dynamic_tensor_coordinate_step_v2
(
in_gemmk_gemmn_gemmkpack_global_desc
,
make_multi_index
(
1
,
0
,
0
));
const
auto
in_gemmk_gemmn_gemmkpack_coord_step_1_0_0
=
make_dynamic_tensor_coordinate_step
(
in_gemmk_gemmn_gemmkpack_global_desc
,
make_multi_index
(
1
,
0
,
0
));
// move (0, 0, 1)
for
(
index_t
iter
=
0
;
iter
<
niter
;
++
iter
)
{
move_dynamic_tensor_coordinate
_v2
(
in_gemmk_gemmn_gemmkpack_global_desc
,
move_dynamic_tensor_coordinate
(
in_gemmk_gemmn_gemmkpack_global_desc
,
in_gemmk_gemmn_gemmkpack_coord
,
in_gemmk_gemmn_gemmkpack_coord_step_0_0_1
);
...
...
@@ -478,7 +474,7 @@ struct DummyDynamicTransform_v2_fwd_v4r4
// move (0, 1, 0)
for
(
index_t
iter
=
0
;
iter
<
niter
;
++
iter
)
{
move_dynamic_tensor_coordinate
_v2
(
in_gemmk_gemmn_gemmkpack_global_desc
,
move_dynamic_tensor_coordinate
(
in_gemmk_gemmn_gemmkpack_global_desc
,
in_gemmk_gemmn_gemmkpack_coord
,
in_gemmk_gemmn_gemmkpack_coord_step_0_1_0
);
...
...
@@ -510,7 +506,7 @@ struct DummyDynamicTransform_v2_fwd_v4r4
// move (1, 0, 0)
for
(
index_t
iter
=
0
;
iter
<
niter
;
++
iter
)
{
move_dynamic_tensor_coordinate
_v2
(
in_gemmk_gemmn_gemmkpack_global_desc
,
move_dynamic_tensor_coordinate
(
in_gemmk_gemmn_gemmkpack_global_desc
,
in_gemmk_gemmn_gemmkpack_coord
,
in_gemmk_gemmn_gemmkpack_coord_step_1_0_0
);
...
...
composable_kernel/include/tensor_description/dynamic_tensor_coordinate.hpp
→
composable_kernel/include/tensor_description/dynamic_tensor_coordinate
_v1
.hpp
View file @
506df423
#ifndef CK_DYNAMIC_TENSOR_COORDINATE_HPP
#define CK_DYNAMIC_TENSOR_COORDINATE_HPP
#ifndef CK_DYNAMIC_TENSOR_COORDINATE_
V1_
HPP
#define CK_DYNAMIC_TENSOR_COORDINATE_
V1_
HPP
#include "common_header.hpp"
#include "dynamic_tensor_descriptor.hpp"
#include "dynamic_tensor_descriptor
_v1
.hpp"
namespace
ck
{
...
...
@@ -19,20 +19,20 @@ namespace ck {
// 1. Given step size in each dimension, update itself, or return a new tensor cooridnate, so user
// can freely move the "point of location" inside the tensor
// wrapper class for DynamicNativeTensorCoordinate and DynamicTransformedTensorCoordinate
// wrapper class for DynamicNativeTensorCoordinate
_v1
and DynamicTransformedTensorCoordinate
_v1
template
<
typename
TensorDesc
>
struct
DynamicTensorCoordinate
;
struct
DynamicTensorCoordinate
_v1
;
// tensor coordinate for native tensor
template
<
typename
TensorDesc
>
struct
DynamicNativeTensorCoordinate
struct
DynamicNativeTensorCoordinate
_v1
{
using
type
=
DynamicNativeTensorCoordinate
;
using
type
=
DynamicNativeTensorCoordinate
_v1
;
using
tensor_desc_type
=
TensorDesc
;
static
constexpr
index_t
NDim
=
tensor_desc_type
::
GetNumOfDimension
();
using
Index
=
MultiIndex
<
NDim
>
;
__host__
__device__
explicit
constexpr
DynamicNativeTensorCoordinate
(
__host__
__device__
explicit
constexpr
DynamicNativeTensorCoordinate
_v1
(
const
tensor_desc_type
&
tensor_desc
,
const
Index
&
idx
)
:
tensor_desc_
{
tensor_desc
},
idx_
{
idx
},
offset_
{
tensor_desc
.
CalculateOffset
(
idx
)}
{
...
...
@@ -118,17 +118,17 @@ struct DynamicNativeTensorCoordinate
// tensor coordinate for transformed tensor
template
<
typename
TensorDesc
>
struct
DynamicTransformedTensorCoordinate
struct
DynamicTransformedTensorCoordinate
_v1
{
static
constexpr
index_t
NDimUp
=
TensorDesc
::
GetNumOfDimension
();
using
UpperDesc
=
TensorDesc
;
using
UpperCoord
=
DynamicTransformedTensorCoordinate
;
using
UpperCoord
=
DynamicTransformedTensorCoordinate
_v1
;
using
UpperIndex
=
MultiIndex
<
NDimUp
>
;
using
LowerDesc
=
typename
UpperDesc
::
LowerDesc
;
using
LowerCoord
=
typename
DynamicTensorCoordinate
<
LowerDesc
>::
type
;
using
LowerCoord
=
typename
DynamicTensorCoordinate
_v1
<
LowerDesc
>::
type
;
__host__
__device__
explicit
constexpr
DynamicTransformedTensorCoordinate
(
__host__
__device__
explicit
constexpr
DynamicTransformedTensorCoordinate
_v1
(
const
UpperDesc
&
tensor_desc_up
,
const
UpperIndex
&
idx_up
)
:
tensor_desc_up_
{
tensor_desc_up
},
idx_up_
{
idx_up
},
...
...
@@ -240,30 +240,32 @@ struct DynamicTransformedTensorCoordinate
template
<
index_t
NDim
>
__host__
__device__
constexpr
auto
make_dynamic_tensor_coordinate
(
const
DynamicNativeTensorDescriptor
<
NDim
>&
tensor_desc
,
make_dynamic_tensor_coordinate
_v1
(
const
DynamicNativeTensorDescriptor
_v1
<
NDim
>&
tensor_desc
,
const
MultiIndex
<
NDim
>&
idx
)
{
return
DynamicNativeTensorCoordinate
<
DynamicNativeTensorDescriptor
<
NDim
>>
{
tensor_desc
,
idx
};
return
DynamicNativeTensorCoordinate_v1
<
DynamicNativeTensorDescriptor_v1
<
NDim
>>
{
tensor_desc
,
idx
};
}
template
<
index_t
NDim
,
typename
...
Ts
>
__host__
__device__
constexpr
auto
make_dynamic_tensor_coordinate
(
const
DynamicTransformedTensorDescriptor
<
Ts
...
>&
tensor_desc
,
make_dynamic_tensor_coordinate
_v1
(
const
DynamicTransformedTensorDescriptor
_v1
<
Ts
...
>&
tensor_desc
,
const
MultiIndex
<
NDim
>&
idx
)
{
static_assert
(
DynamicTransformedTensorDescriptor
<
Ts
...
>::
GetNumOfDimension
()
==
NDim
,
static_assert
(
DynamicTransformedTensorDescriptor
_v1
<
Ts
...
>::
GetNumOfDimension
()
==
NDim
,
"wrong! inconsistent # of dimensions"
);
return
DynamicTransformedTensorCoordinate
<
DynamicTransformedTensorDescriptor
<
Ts
...
>>
{
return
DynamicTransformedTensorCoordinate
_v1
<
DynamicTransformedTensorDescriptor
_v1
<
Ts
...
>>
{
tensor_desc
,
idx
};
}
template
<
typename
TensorDesc
>
struct
DynamicTensorCoordinate
struct
DynamicTensorCoordinate
_v1
{
static
constexpr
index_t
NDim
=
TensorDesc
::
GetNumOfDimension
();
using
type
=
decltype
(
make_dynamic_tensor_coordinate
<
NDim
>
(
TensorDesc
{},
MultiIndex
<
NDim
>
{}));
using
type
=
decltype
(
make_dynamic_tensor_coordinate_v1
<
NDim
>
(
TensorDesc
{},
MultiIndex
<
NDim
>
{}));
};
}
// namespace ck
...
...
composable_kernel/include/tensor_description/dynamic_tensor_descriptor.hpp
View file @
506df423
...
...
@@ -6,336 +6,607 @@
namespace
ck
{
template
<
index_t
NDim
>
struct
DynamicNativeTensorDescriptor
template
<
index_t
NDimHidden
,
typename
VisibleDimensionIds
>
struct
DynamicTensorCoordinate
;
template
<
index_t
NTransform
,
index_t
NDimVisible
>
struct
DynamicTensorCoordinateStep
;
template
<
typename
TensorDesc
,
typename
VisibleIndex
>
__host__
__device__
constexpr
auto
make_dynamic_tensor_coordinate
(
const
TensorDesc
&
tensor_desc
,
const
VisibleIndex
&
idx_visible
);
template
<
typename
TensorDesc
,
typename
VisibleIndex
>
__host__
__device__
constexpr
auto
make_dynamic_tensor_coordinate_step
(
const
TensorDesc
&
,
const
VisibleIndex
&
idx_diff_visible
);
template
<
typename
TensorDesc
,
typename
TensorCoord
,
typename
TensorCoordStep
>
__host__
__device__
void
move_dynamic_tensor_coordinate
(
const
TensorDesc
&
tensor_desc
,
TensorCoord
&
coord
,
const
TensorCoordStep
&
coord_step
);
template
<
typename
TensorDesc
,
typename
TensorCoord
>
__host__
__device__
constexpr
bool
coordinate_has_valid_offset_assuming_visible_index_is_valid
(
const
TensorDesc
&
tensor_desc
,
const
TensorCoord
&
coord
);
template
<
typename
TensorDesc
,
typename
TensorCoord
>
__host__
__device__
constexpr
bool
coordinate_has_valid_offset
(
const
TensorDesc
&
tensor_desc
,
const
TensorCoord
&
coord
);
// Transforms: Tuple<transforms...>
// LowerDimensionIdss : Tuple<Sequence<...>, ...>
// UpperDimensionIdss : Tuple<Sequence<...>, ...>
// VisibleDimensionIds> : Sequence<...>
template
<
typename
Transforms
,
typename
LowerDimensionIdss
,
typename
UpperDimensionIdss
,
typename
VisibleDimensionIds
>
struct
DynamicTensorDescriptor
{
using
Index
=
MultiIndex
<
NDim
>
;
// private:
__host__
__device__
static
constexpr
index_t
GetNumOfTransform
()
{
return
Transforms
::
Size
();
}
const
Index
lengths_
;
const
Index
strides_
;
__host__
__device__
explicit
constexpr
DynamicNativeTensorDescriptor
(
const
Index
&
lengths
,
const
Index
&
strides
)
:
lengths_
{
lengths
},
strides_
{
strides
}
__host__
__device__
static
constexpr
index_t
GetNumOfVisibleDimension
()
{
return
VisibleDimensionIds
::
Size
();
}
__host__
__device__
explicit
constexpr
DynamicNativeTensorDescriptor
()
:
lengths_
{
make_zero_multi_index
<
NDim
>
()},
strides_
{
make_zero_multi_index
<
NDim
>
()}
__host__
__device__
static
constexpr
index_t
GetNumOfHiddenDimension
()
{
}
constexpr
auto
all_low_dim_ids
=
unpack
([](
auto
&&
...
xs
)
constexpr
{
return
merge_sequences
(
xs
...);
},
LowerDimensionIdss
{});
__host__
__device__
static
constexpr
index_t
GetNumOfDimension
()
{
return
NDim
;
}
constexpr
auto
all_up_dim_ids
=
unpack
([](
auto
&&
...
xs
)
constexpr
{
return
merge_sequences
(
xs
...);
},
UpperDimensionIdss
{});
__host__
__device__
constexpr
auto
GetLengths
()
const
{
return
lengths_
;
}
constexpr
auto
all_dim_ids
=
merge_sequences
(
all_low_dim_ids
,
all_up_dim_ids
);
__host__
__device__
constexpr
auto
GetStrides
()
const
{
return
strides_
;
}
using
unique_sort_all_dim_ids
=
typename
sequence_unique_sort
<
decltype
(
all_dim_ids
),
math
::
less
<
index_t
>
,
math
::
equal
<
index_t
>>::
type
;
template
<
index_t
IDim
>
__host__
__device__
constexpr
index_t
GetLength
(
Number
<
IDim
>
)
const
{
return
lengths_
[
Number
<
IDim
>
{}];
return
unique_sort_all_dim_ids
::
Size
();
}
template
<
index_t
IDim
>
__host__
__device__
constexpr
index_t
GetStride
(
Number
<
IDim
>
)
const
constexpr
static
index_t
ntransform_
=
GetNumOfTransform
();
constexpr
static
index_t
ndim_visible_
=
GetNumOfVisibleDimension
();
constexpr
static
index_t
ndim_hidden_
=
GetNumOfHiddenDimension
();
using
VisibleIndex
=
MultiIndex
<
ndim_visible_
>
;
using
HiddenIndex
=
MultiIndex
<
ndim_hidden_
>
;
using
Coordinate
=
DynamicTensorCoordinate
<
ndim_hidden_
,
VisibleDimensionIds
>
;
using
CoordinateStep
=
DynamicTensorCoordinateStep
<
ntransform_
,
ndim_visible_
>
;
// public:
__host__
__device__
explicit
constexpr
DynamicTensorDescriptor
(
const
Transforms
&
transforms
,
index_t
element_space_size
)
:
transforms_
{
transforms
},
hidden_lengths_
{
InitializeHiddenLengths
(
transforms_
,
element_space_size
)},
visible_lengths_
{
hidden_lengths_
}
{
return
strides_
[
Number
<
IDim
>
{}];
static_assert
(
Transforms
::
Size
()
==
ntransform_
&&
LowerDimensionIdss
::
Size
()
==
ntransform_
&&
UpperDimensionIdss
::
Size
()
==
ntransform_
,
"wrong! inconsistent # of transformations"
);
// TODO check dependency of dimensions is valid
}
__host__
__device__
constexpr
index_t
GetElementSize
()
const
__host__
__device__
explicit
constexpr
DynamicTensorDescriptor
()
:
DynamicTensorDescriptor
(
Transforms
{},
index_t
{
0
})
{
return
container_reduce
(
GetLengths
(),
math
::
multiplies
<
index_t
>
{},
index_t
{
1
});
}
__host__
__device__
constexpr
index_t
Get
ElementSpace
()
const
__host__
__device__
static
constexpr
index_t
Get
NumOfDimension
()
{
index_t
space
=
1
;
static_for
<
0
,
NDim
,
1
>
{}([
&
](
auto
i
)
{
space
+=
(
GetLength
(
i
)
-
1
)
*
GetStride
(
i
);
});
return
space
;
return
GetNumOfVisibleDimension
();
}
template
<
typename
Idx
>
__host__
__device__
constexpr
index_t
CalculateOffset
(
const
Idx
&
idx
)
const
template
<
index_t
IDim
>
__host__
__device__
constexpr
index_t
GetLength
(
Number
<
IDim
>
)
const
{
index_t
offset
=
0
;
return
visible_lengths_
[
Number
<
IDim
>
{}];
}
static_for
<
0
,
NDim
,
1
>
{}([
&
](
auto
i
)
{
offset
+=
idx
[
i
]
*
GetStride
(
i
)
;
}
);
__host__
__device__
constexpr
const
auto
&
GetLengths
()
const
{
return
visible_lengths_
;
}
return
offset
;
// maybe this result should be saved as a member variable
__host__
__device__
constexpr
index_t
GetElementSize
()
const
{
return
container_reduce
(
GetLengths
(),
math
::
multiplies
<
index_t
>
{},
index_t
{
1
});
}
template
<
typename
IdxDiff
>
__host__
__device__
constexpr
index_t
CalculateOffsetDiff
(
const
IdxDiff
&
idx_diff
)
const
__host__
__device__
constexpr
index_t
GetElementSpaceSize
()
const
{
return
CalculateOffset
(
idx_diff
)
;
return
hidden_lengths_
[
Number
<
0
>
{}]
;
}
template
<
typename
Idx
>
__host__
__device__
constexpr
bool
IsUpperIndexValid
(
const
Idx
&
idx
)
const
__host__
__device__
constexpr
index_t
CalculateOffset
(
const
Idx
&
idx
)
const
{
bool
flag
=
true
;
static_for
<
0
,
NDim
,
1
>
{}(
[
&
](
auto
i
)
{
flag
=
flag
&&
idx
[
i
]
>=
0
&&
idx
[
i
]
<
GetLength
(
i
);
});
static_assert
(
Idx
::
Size
()
==
GetNumOfDimension
(),
"wrong! inconsistent # of dimension"
);
return
flag
;
}
};
#if 0 // debug
return make_dynamic_tensor_coordinate(*this, idx).GetOffset();
#else
constexpr
index_t
ntransform
=
GetNumOfTransform
();
constexpr
index_t
ndim_hidden
=
GetNumOfHiddenDimension
();
constexpr
index_t
ndim_visible
=
GetNumOfVisibleDimension
();
constexpr
auto
visible_dim_ids
=
GetVisibleDimensionIds
();
template
<
typename
LowTensorDescriptor
,
// DynamicNativeTensorDescriptor or
// DynamicTransformedTensorDescriptor
typename
Transforms
,
// Tuple<MultIndexTransforms...>
typename
LowDimensionIds
,
// Tuple<Sequence<...>>
typename
UpDimensionIds
>
// Tuple<Sequence<...>>
struct
DynamicTransformedTensorDescriptor
{
using
LowerDesc
=
LowTensorDescriptor
;
using
UpperDesc
=
DynamicTransformedTensorDescriptor
;
MultiIndex
<
ndim_hidden
>
idx_hidden
;
static
constexpr
index_t
NTransform
=
Transforms
::
Size
();
const
LowerDesc
low_tensor_desc_
;
const
Transforms
transforms_
;
// initialize visible index
auto
idx_hidden_pick_visible
=
pick_container_element
(
idx_hidden
,
visible_dim_ids
)
;
idx_hidden_pick_visible
=
idx
;
__host__
__device__
static
constexpr
index_t
GetNumOfLowerDimension
()
{
return
LowerDesc
::
GetNumOfDimension
();
}
// calculate hidden index
static_for
<
ntransform
-
1
,
-
1
,
-
1
>
{}([
this
,
&
idx_hidden
](
auto
itran
)
{
const
auto
&
tran
=
this
->
GetTransforms
().
At
(
itran
);
constexpr
auto
dims_low
=
GetLowerDimensionIdss
().
At
(
itran
);
constexpr
auto
dims_up
=
GetUpperDimensionIdss
().
At
(
itran
);
__host__
__device__
static
constexpr
index_t
GetNumOfUpperDimension
()
{
index_t
ndim_up
=
0
;
const
auto
idx_up
=
pick_container_element
(
idx_hidden
,
dims_up
);
auto
idx_low
=
pick_container_element
(
idx_hidden
,
dims_low
);
static_for
<
0
,
NTransform
,
1
>
{}([
&
](
auto
i
)
constexpr
{
constexpr
auto
tmp
=
UpDimensionIds
{}.
At
(
i
);
ndim_up
+=
decltype
(
tmp
)
::
Size
();
tran
.
CalculateLowerIndex
(
idx_low
,
idx_up
);
});
return
ndim_up
;
return
idx_hidden
[
Number
<
0
>
{}];
#endif
}
static
constexpr
index_t
NDimUp
=
GetNumOfUpperDimension
();
static
constexpr
index_t
NDimLow
=
GetNumOfLowerDimension
();
using
UpperIndex
=
MultiIndex
<
NDimUp
>
;
using
LowerIndex
=
MultiIndex
<
NDimLow
>
;
// private:
__host__
__device__
constexpr
const
auto
&
GetTransforms
()
const
{
return
transforms_
;
}
struct
lambda_merge_sequences
{
template
<
typename
...
Xs
>
__host__
__device__
constexpr
auto
operator
()(
Xs
...
xs
)
const
__host__
__device__
static
constexpr
auto
GetLowerDimensionIdss
()
{
return
merge_sequences
(
xs
...)
;
return
LowerDimensionIdss
{}
;
}
};
struct
lambda_merge_arrays
__host__
__device__
static
constexpr
auto
GetUpperDimensionIdss
()
{
template
<
typename
...
Xs
>
__host__
__device__
constexpr
auto
operator
()(
Xs
...
xs
)
const
return
UpperDimensionIdss
{};
}
__host__
__device__
static
constexpr
auto
GetVisibleDimensionIds
()
{
return
container_cat
(
xs
...)
;
return
VisibleDimensionIds
{}
;
}
};
__host__
__device__
explicit
constexpr
DynamicTransformedTensorDescriptor
(
const
LowerDesc
&
low_tensor_desc
,
const
Transforms
&
transforms
)
:
low_tensor_desc_
{
low_tensor_desc
},
transforms_
{
transforms
}
__host__
__device__
static
constexpr
auto
InitializeHiddenLengths
(
const
Transforms
&
transforms
,
index_t
element_space_size
)
{
static_assert
(
NTransform
==
Transforms
::
Size
()
&&
NTransform
==
LowDimensionIds
::
Size
()
&&
NTransform
==
UpDimensionIds
::
Size
(),
"wrong! # of transformations not the same"
);
// zero initialization
HiddenIndex
hidden_lengths
=
make_zero_multi_index
<
ndim_hidden_
>
();
// sanity check:
// LowDimensionIds should include all low-dimensions,
// UpDimensionIds should include all up-dimensions
using
unsorted_up_dimension_ids
=
decltype
(
unpack
(
lambda_merge_sequences
{},
UpDimensionIds
{}));
// this is the orignal tensor element space size
hidden_lengths
(
Number
<
0
>
{})
=
element_space_size
;
using
sorted_up_dimension_ids
=
typename
sequence_sort
<
unsorted_up_dimension_ids
,
math
::
less
<
index_t
>>::
type
;
// lengths for all other hidden dimensions
static_for
<
0
,
ntransform_
,
1
>
{}([
&
transforms
,
&
hidden_lengths
](
auto
itran
)
{
const
auto
&
tran
=
transforms
.
At
(
itran
);
static_assert
(
sorted_up_dimension_ids
::
Size
()
==
NDimUp
&&
is_valid_sequence_map
<
sorted_up_dimension_ids
>
{},
"wrong! UpDimensionIds is not configured correctly"
);
constexpr
auto
up_dim_ids
=
UpperDimensionIdss
{}.
At
(
itran
);
using
unsorted_low_dimension_ids
=
decltype
(
unpack
(
lambda_merge_sequences
{},
LowDimensionIds
{})
);
// lengths_hidden_pick_up contains a reference to lengths_hidden
auto
hidden_lengths_pick_up
=
pick_container_element
(
hidden_lengths
,
up_dim_ids
);
using
sorted_low_dimension_ids
=
typename
sequence_sort
<
unsorted_low_dimension_ids
,
math
::
less
<
index_t
>>::
type
;
hidden_lengths_pick_up
=
tran
.
GetUpperLengths
();
})
;
static_assert
(
sorted_low_dimension_ids
::
Size
()
==
NDimLow
&&
is_valid_sequence_map
<
sorted_low_dimension_ids
>
{},
"wrong! LowDimensionIds is not configured correctly"
);
return
hidden_lengths
;
}
// TODO: sanity check: while a up-dimension could be associated with
// multille
// transformation, a low-dimension should be associated with only one
// transformation
// private member variables
const
Transforms
transforms_
;
// TODO maybe hidden_lengths_ should use reference_wrapper (reference to transforms_'s member
// variable lengths_) to save space on stack?
const
HiddenIndex
hidden_lengths_
;
// visible_lenths_ contains a reference to hidden_lengths_
const
ContainerElementPicker
<
const
HiddenIndex
,
VisibleDimensionIds
>
visible_lengths_
;
#if 0
// friend class
friend Coordinate;
friend CoordinateStep;
// friend function to transform tensor descriptor
template <typename OldTensorDescriptor,
typename NewTransforms,
typename NewLowerDimensionOldVisibleIdss,
typename NewUpperDimensionNewVisibleIdss>
__host__ __device__ friend constexpr auto
transform_dynamic_tensor_descriptor(const OldTensorDescriptor& /* old_tensor_desc */,
const NewTransforms& /* new_transforms */,
NewLowerDimensionOldVisibleIdss,
NewUpperDimensionNewVisibleIdss);
// friend functions for making and moving tensor coordinate
template <typename VisibleIndex>
__host__ __device__ friend constexpr Coordinate
make_dynamic_tensor_coordinate(const DynamicTensorDescriptor& /* tensor_desc */,
const VisibleIndex& /* idx_visible */);
template <typename VisibleIndex>
__host__ __device__ friend constexpr CoordinateStep
make_dynamic_tensor_coordinate_step(const DynamicTensorDescriptor& /* tensor_desc */,
const VisibleIndex& /* idx_diff_visible */);
__host__ __device__ friend void
move_dynamic_tensor_coordinate(const DynamicTensorDescriptor& /* tensor_desc */,
Coordinate& /* coord */,
const CoordinateStep& /* coord_step */);
// friend functions for valid offset check
__host__ __device__ friend constexpr bool
coordinate_has_valid_offset_assuming_visible_index_is_valid(
const DynamicTensorDescriptor& tensor_desc, const Coordinate& coord);
__host__ __device__ friend constexpr bool
coordinate_has_valid_offset(const DynamicTensorDescriptor& tensor_desc,
const Coordinate& coord);
#endif
};
// TODO: sanity-check: GetLowerLengths of each transform should be
// consistent with lengths
// of lower-tensor-descriptor
}
template
<
index_t
NDimHidden
,
typename
VisibleDimensionIds
>
struct
DynamicTensorCoordinate
{
// private:
static
constexpr
index_t
ndim_visible_
=
VisibleDimensionIds
::
Size
();
__host__
__device__
explicit
constexpr
DynamicTransformedTensorDescriptor
()
:
low_tensor_desc_
{},
transforms_
{}
{
}
using
HiddenIndex
=
MultiIndex
<
NDimHidden
>
;
using
VisibleIndex
=
MultiIndex
<
ndim_visible_
>
;
__host__
__device__
static
constexpr
index_t
GetNumOfDimension
()
// public:
__host__
__device__
explicit
constexpr
DynamicTensorCoordinate
(
const
HiddenIndex
&
idx_hidden
)
:
idx_hidden_
{
idx_hidden
},
idx_visible_
{
idx_hidden_
}
{
return
GetNumOfUpperDimension
();
}
__host__
__device__
constexpr
auto
GetUpperLengths
()
const
{
// sort upper-dimension-ids
constexpr
auto
unsorted_up_dimension_ids
=
unpack
(
lambda_merge_sequences
{},
UpDimensionIds
{});
__host__
__device__
constexpr
const
auto
&
GetIndex
()
const
{
return
GetVisibleIndex
();
}
using
sort_up_dimension_ids
=
sequence_unique_sort
<
decltype
(
unsorted_up_dimension_ids
),
math
::
less
<
index_t
>
,
math
::
equal
<
index_t
>>
;
__host__
__device__
constexpr
index_t
GetOffset
()
const
{
return
idx_hidden_
[
Number
<
0
>
{}];
}
constexpr
auto
sorted2unsorted_map
=
typename
sort_up_dimension_ids
::
sorted2unsorted_map
{};
// private:
__host__
__device__
constexpr
const
auto
&
GetHiddenIndex
()
const
{
return
idx_hidden_
;
}
// sort upper-lengths
const
auto
tuple_of_up_lengths
=
transform_tuples
([](
const
auto
&
tran
)
constexpr
{
return
tran
.
GetUpperLengths
();
},
transforms_
);
__host__
__device__
auto
&
GetHiddenIndex
()
{
return
idx_hidden_
;
}
const
auto
unsorted_up_lengths
=
unpack
(
lambda_merge_arrays
{},
tuple_of_up_lengths
);
__host__
__device__
constexpr
const
auto
&
GetVisibleIndex
()
const
{
return
idx_visible_
;
}
const
auto
sorted_up_lengths
=
container_reorder_given_new2old
(
unsorted_up_lengths
,
sorted2unsorted_map
);
__host__
__device__
auto
&
GetVisibleIndex
()
{
return
idx_visible_
;
}
return
sorted_up_lengths
;
}
// private member variables
HiddenIndex
idx_hidden_
;
// idx_visible_ contains a reference to idx_hidden_
ContainerElementPicker
<
HiddenIndex
,
VisibleDimensionIds
>
idx_visible_
;
__host__
__device__
constexpr
auto
GetLengths
()
const
{
return
GetUpperLengths
();
}
#if 0
// friend functions for making and updating tensor coordinate
template <typename TensorDesc>
__host__ __device__ friend constexpr DynamicTensorCoordinate
make_dynamic_tensor_coordinate(const TensorDesc& /* tensor_desc */,
const VisibleIndex& /* idx_visible */);
template
<
index_t
IDim
>
__host__
__device__
constexpr
index_t
GetLength
(
Number
<
IDim
>
)
const
template <typename TensorDesc, typename TensorCoordStep>
__host__ __device__ friend void move_dynamic_tensor_coordinate(
const TensorDesc& /* tensor_desc */,
DynamicTensorCoordinate& /* coord */,
const TensorCoordStep& /* coord_step */);
#endif
};
template
<
index_t
NTransform
,
index_t
NDimVisible
>
struct
DynamicTensorCoordinateStep
{
// private:
using
VisibleIndex
=
MultiIndex
<
NDimVisible
>
;
// public:
__host__
__device__
explicit
constexpr
DynamicTensorCoordinateStep
(
const
VisibleIndex
&
idx_diff_visible
,
const
Array
<
bool
,
NTransform
>&
do_transforms
)
:
idx_diff_visible_
{
idx_diff_visible
},
do_transforms_
{
do_transforms
}
{
return
GetLengths
()[
Number
<
IDim
>
{}];
}
__host__
__device__
constexpr
index_t
GetElementSize
()
const
__host__
__device__
constexpr
const
auto
&
GetIndexDiff
()
const
{
return
GetVisibleIndexDiff
();
}
// private:
__host__
__device__
constexpr
const
auto
&
GetVisibleIndexDiff
()
const
{
return
container_reduce
(
GetLengths
(),
math
::
multiplies
<
index_t
>
{},
index_t
{
1
})
;
return
idx_diff_visible_
;
}
__host__
__device__
constexpr
index_t
GetElementSpace
()
const
// private:
const
VisibleIndex
idx_diff_visible_
;
const
Array
<
bool
,
NTransform
>
do_transforms_
;
#if 0
// friend functions for updating tensor coordinate
template <typename TensorDesc>
__host__ __device__ friend constexpr DynamicTensorCoordinateStep
make_dynamic_tensor_coordinate_step(const TensorDesc& /* tensor_desc */,
const VisibleIndex& /* idx_visible */);
template <typename TensorDesc, index_t NDimHidden, typename VisibleDimensionIds>
__host__ __device__ friend void move_dynamic_tensor_coordinate(
const TensorDesc& /* tensor_desc */,
DynamicTensorCoordinate<NDimHidden, VisibleDimensionIds>& /* coord */,
const DynamicTensorCoordinateStep& /* coord_step */);
#endif
};
// TODO: How to fix this? It uses an struct instead of lambda because lambda
// doesn't have constructor, and to put it outside the scope where it is used
// (transform_dynamic_tensor_descriptor) because template cannot be defined inside a function
// template
template
<
typename
NewTransforms
>
struct
lambda_get_up_dim_num
{
template
<
typename
I
>
__host__
__device__
constexpr
auto
operator
()(
I
)
const
{
return
low_tensor_desc_
.
GetElementSpace
();
using
Tran
=
remove_reference_t
<
decltype
(
NewTransforms
{}.
At
(
I
{}))
>
;
return
Number
<
Tran
::
GetNumOfUpperDimension
()
>
{};
}
};
template
<
typename
OldTensorDescriptor
,
typename
NewTransforms
,
typename
NewLowerDimensionOldVisibleIdss
,
typename
NewUpperDimensionNewVisibleIdss
>
__host__
__device__
constexpr
auto
transform_dynamic_tensor_descriptor
(
const
OldTensorDescriptor
&
old_tensor_desc
,
const
NewTransforms
&
new_transforms
,
NewLowerDimensionOldVisibleIdss
,
NewUpperDimensionNewVisibleIdss
)
{
// lower dimension's hidden idss
// convert lower dimension visible idss (tuple of sequences) to hidden idss (tuple of
// sequences)
constexpr
auto
low_dim_hidden_idss
=
transform_tuples
(
// convert lower dimension visible ids (a sequence) to hidden ids (a sequence)
[](
auto
low_dim_visible_ids
)
constexpr
{
return
transform_sequences
(
// convert lower dimension visible id to hidden id
[](
auto
low_dim_visible_id
)
constexpr
{
return
OldTensorDescriptor
::
GetVisibleDimensionIds
()[
low_dim_visible_id
];
},
low_dim_visible_ids
);
},
NewLowerDimensionOldVisibleIdss
{});
constexpr
index_t
num_new_transform
=
NewTransforms
::
Size
();
// upper dimension's hidden idss
constexpr
index_t
old_hidden_dim_number
=
OldTensorDescriptor
::
GetNumOfHiddenDimension
();
constexpr
auto
up_dim_numbers
=
generate_sequence
(
lambda_get_up_dim_num
<
NewTransforms
>
{},
Number
<
num_new_transform
>
{});
constexpr
auto
up_dim_numbers_scan
=
merge_sequences
(
Sequence
<
0
>
{},
inclusive_scan_sequence
(
up_dim_numbers
,
math
::
plus
<
index_t
>
{},
Number
<
0
>
{}));
constexpr
auto
up_dim_hidden_idss
=
generate_tuple
([
old_hidden_dim_number
,
up_dim_numbers_scan
](
auto
i
)
constexpr
{
return
typename
arithmetic_sequence_gen
<
old_hidden_dim_number
+
up_dim_numbers_scan
[
i
],
old_hidden_dim_number
+
up_dim_numbers_scan
[
i
+
1
],
1
>::
type
{};
},
Number
<
num_new_transform
>
{});
// new visible dimension's hidden ids
constexpr
auto
unordered_new_visible_dim_hidden_ids
=
unpack
([](
auto
...
xs
)
{
return
merge_sequences
(
xs
...);
},
up_dim_hidden_idss
);
constexpr
auto
new_visible_dim_unordered2ordered
=
unpack
(
[](
auto
...
xs
)
{
return
merge_sequences
(
xs
...);
},
NewUpperDimensionNewVisibleIdss
{});
constexpr
auto
new_visible_dim_hidden_ids
=
unordered_new_visible_dim_hidden_ids
.
ReorderGivenOld2New
(
new_visible_dim_unordered2ordered
);
// put everything together
const
auto
all_transforms
=
container_cat
(
old_tensor_desc
.
GetTransforms
(),
new_transforms
);
constexpr
auto
all_low_dim_hidden_idss
=
container_cat
(
OldTensorDescriptor
::
GetLowerDimensionIdss
(),
low_dim_hidden_idss
);
constexpr
auto
all_up_dim_hidden_idss
=
container_cat
(
OldTensorDescriptor
::
GetUpperDimensionIdss
(),
up_dim_hidden_idss
);
return
DynamicTensorDescriptor
<
decltype
(
all_transforms
),
decltype
(
all_low_dim_hidden_idss
),
decltype
(
all_up_dim_hidden_idss
),
decltype
(
new_visible_dim_hidden_ids
)
>
{
all_transforms
,
old_tensor_desc
.
GetElementSpaceSize
()};
}
template
<
typename
TensorDesc
,
typename
VisibleIndex
>
__host__
__device__
constexpr
auto
make_dynamic_tensor_coordinate
(
const
TensorDesc
&
tensor_desc
,
const
VisibleIndex
&
idx_visible
)
{
static_assert
(
TensorDesc
::
GetNumOfDimension
()
==
VisibleIndex
::
Size
(),
"wrong! # of dimension inconsistent"
);
__host__
__device__
constexpr
auto
GetLowerTensorDescriptor
()
const
{
return
low_tensor_desc_
;
}
constexpr
index_t
ntransform
=
TensorDesc
::
GetNumOfTransform
();
constexpr
index_t
ndim_hidden
=
TensorDesc
::
GetNumOfHiddenDimension
();
constexpr
index_t
ndim_visible
=
TensorDesc
::
GetNumOfVisibleDimension
();
constexpr
auto
visible_dim_ids
=
TensorDesc
::
GetVisibleDimensionIds
();
template
<
typename
LowIdx
,
typename
UpIdx
>
__host__
__device__
void
CalculateLowerIndex
(
LowIdx
&
idx_low
,
const
UpIdx
&
idx_up
)
const
{
static_for
<
0
,
NTransform
,
1
>
{}([
&
](
auto
itran
)
constexpr
{
const
auto
tran
=
transforms_
.
At
(
itran
);
MultiIndex
<
ndim_hidden
>
idx_hidden
;
// initialize visible index
auto
idx_hidden_pick_visible
=
pick_container_element
(
idx_hidden
,
visible_dim_ids
);
idx_hidden_pick_visible
=
idx_visible
;
// calculate hidden index
static_for
<
ntransform
,
0
,
-
1
>
{}([
&
tensor_desc
,
&
idx_hidden
](
auto
itran_p1
)
{
auto
itran
=
itran_p1
-
Number
<
1
>
{};
const
auto
&
tran
=
tensor_desc
.
GetTransforms
().
At
(
itran
);
constexpr
auto
dims_low
=
TensorDesc
::
GetLowerDimensionIdss
().
At
(
itran
);
constexpr
auto
dims_up
=
TensorDesc
::
GetUpperDimensionIdss
().
At
(
itran
);
const
auto
idx_up
_part
=
pick_container_element
(
idx_
up
,
UpDimensionIds
{}.
At
(
itran
)
);
auto
idx_low
_part
=
pick_container_element
(
idx_
low
,
LowDimensionIds
{}.
At
(
itran
)
);
const
auto
idx_up
=
pick_container_element
(
idx_
hidden
,
dims_up
);
auto
idx_low
=
pick_container_element
(
idx_
hidden
,
dims_low
);
tran
.
CalculateLowerIndex
(
idx_low
_part
,
idx_up
_part
);
tran
.
CalculateLowerIndex
(
idx_low
,
idx_up
);
});
}
template
<
typename
LowIdxDiff
,
typename
UpIdxDiff
,
typename
LowIdx
,
typename
UpIdx
>
__host__
__device__
void
CalculateLowerIndexDiff
(
LowIdxDiff
&
idx_low_diff
,
const
UpIdxDiff
&
idx_up_diff
,
const
LowIdx
&
idx_low_old
,
const
UpIdx
&
idx_up_old
)
const
{
static_for
<
0
,
NTransform
,
1
>
{}([
&
](
auto
itran
)
{
const
auto
tran
=
transforms_
.
At
(
itran
);
// better to use std::move?
return
DynamicTensorCoordinate
<
ndim_hidden
,
decltype
(
visible_dim_ids
)
>
{
idx_hidden
};
}
const
auto
idx_up_diff_part
=
pick_container_element
(
idx_up_diff
,
UpDimensionIds
{}.
At
(
itran
));
template
<
typename
TensorDesc
,
typename
VisibleIndex
>
__host__
__device__
constexpr
auto
make_dynamic_tensor_coordinate_step
(
const
TensorDesc
&
,
const
VisibleIndex
&
idx_diff_visible
)
{
static_assert
(
TensorDesc
::
GetNumOfDimension
()
==
VisibleIndex
::
Size
(),
"wrong! # of dimension inconsistent"
);
const
auto
idx_up_old_part
=
pick_container_element
(
idx_up_old
,
UpDimensionIds
{}.
At
(
itran
));
constexpr
index_t
ntransform
=
TensorDesc
::
GetNumOfTransform
();
constexpr
index_t
ndim_hidden
=
TensorDesc
::
GetNumOfHiddenDimension
();
constexpr
index_t
ndim_visible
=
TensorDesc
::
GetNumOfVisibleDimension
();
constexpr
auto
visible_dim_ids
=
TensorDesc
::
GetVisibleDimensionIds
();
const
auto
idx_low_old_part
=
pick_container_element
(
idx_low_old
,
LowDimensionIds
{}.
At
(
itran
));
Array
<
bool
,
ntransform
>
do_transforms
{
false
};
auto
idx_low_diff_part
=
pick_container_element
(
idx_low_diff
,
LowDimensionIds
{}.
At
(
itran
));
Array
<
bool
,
ndim_hidden
>
non_zero_diff
{
false
};
tran
.
CalculateLowerIndexDiff
(
idx_low_diff_part
,
idx_up_diff_part
,
idx_low_old_part
,
idx_up_old_part
);
auto
non_zero_diff_pick_visible
=
pick_container_element
(
non_zero_diff
,
visible_dim_ids
);
static_for
<
0
,
ndim_visible
,
1
>
{}([
&
non_zero_diff_pick_visible
,
&
idx_diff_visible
](
auto
i
)
{
non_zero_diff_pick_visible
(
i
)
=
(
idx_diff_visible
[
i
]
!=
0
);
});
}
template
<
typename
UpIdx
>
__host__
__device__
constexpr
auto
CalculateLowerIndex
(
const
UpIdx
&
idx_up
)
const
{
LowerIndex
idx_low
;
static_for
<
ntransform
-
1
,
-
1
,
-
1
>
{}([
&
do_transforms
,
&
non_zero_diff
](
auto
itran
)
{
constexpr
auto
dims_low
=
TensorDesc
::
GetLowerDimensionIdss
().
At
(
itran
);
constexpr
auto
dims_up
=
TensorDesc
::
GetUpperDimensionIdss
().
At
(
itran
);
CalculateLowerIndex
(
idx_low
,
idx_up
);
const
auto
non_zero_diff_pick_up
=
pick_container_element
(
non_zero_diff
,
dims_up
);
auto
non_zero_diff_pick_low
=
pick_container_element
(
non_zero_diff
,
dims_low
);
return
idx_low
;
}
// if any of upper index diff components is non-zero, then
// 1) Need to do this transform
// 2) all components of lower index diff will assume to be non-zero and need to be
// computed
const
bool
idx_diff_up_has_non_zero
=
container_reduce
(
non_zero_diff_pick_up
,
[](
auto
a
,
auto
b
)
{
return
a
or
b
;
},
false
);
template
<
typename
UpIdxDiff
,
typename
LowIdx
,
typename
UpIdx
>
__host__
__device__
constexpr
auto
CalculateLowerIndexDiff
(
const
UpIdxDiff
&
idx_up_diff
,
const
LowIdx
&
idx_low_old
,
const
UpIdx
&
idx_up_old
)
const
{
LowerIndex
idx_low_diff
;
do_transforms
(
itran
)
=
idx_diff_up_has_non_zero
;
CalculateLowerIndexDiff
(
idx_low_diff
,
idx_up_diff
,
idx_low_old
,
idx_up_old
);
static_for
<
0
,
dims_low
.
Size
(),
1
>
{}(
[
&
non_zero_diff_pick_low
,
&
idx_diff_up_has_non_zero
](
auto
i
)
{
non_zero_diff_pick_low
(
i
)
=
idx_diff_up_has_non_zero
;
});
});
return
idx_low_diff
;
}
return
DynamicTensorCoordinateStep
<
ntransform
,
ndim_visible
>
{
idx_diff_visible
,
do_transforms
}
;
}
__host__
__device__
constexpr
index_t
CalculateOffset
(
const
UpperIndex
&
idx_up
)
const
{
return
low_tensor_desc_
.
CalculateOffset
(
CalculateLowerIndex
(
idx_up
));
}
template
<
typename
TensorDesc
,
typename
TensorCoord
,
typename
TensorCoordStep
>
__host__
__device__
void
move_dynamic_tensor_coordinate
(
const
TensorDesc
&
tensor_desc
,
TensorCoord
&
coord
,
const
TensorCoordStep
&
coord_step
)
{
constexpr
index_t
ndim_hidden
=
TensorDesc
::
GetNumOfHiddenDimension
();
constexpr
index_t
ndim_visible
=
TensorDesc
::
GetNumOfVisibleDimension
();
constexpr
index_t
ntransform
=
TensorDesc
::
GetNumOfTransform
();
using
HiddenIndex
=
MultiIndex
<
ndim_hidden
>
;
// this is what needs to be calculated
auto
idx_diff_hidden
=
make_zero_multi_index
<
ndim_hidden
>
();
__host__
__device__
constexpr
bool
IsUpperIndexValid
(
const
UpperIndex
&
idx_up
)
const
// initialize visible index diff
// idx_diff_hidden_pick_visible contains reference to idx_diff_hidden
auto
idx_diff_hidden_pick_visible
=
pick_container_element
(
idx_diff_hidden
,
TensorDesc
::
GetVisibleDimensionIds
());
idx_diff_hidden_pick_visible
=
coord_step
.
GetVisibleIndexDiff
();
// this is what needs to be updated
auto
&
idx_hidden
=
coord
.
GetHiddenIndex
();
// update visible index
auto
idx_hidden_pick_visible
=
pick_container_element
(
idx_hidden
,
TensorDesc
::
GetVisibleDimensionIds
());
idx_hidden_pick_visible
+=
coord_step
.
GetIndexDiff
();
// update rest of hidden index
static_for
<
ntransform
-
1
,
-
1
,
-
1
>
{}([
&
](
auto
itran
)
{
if
(
coord_step
.
do_transforms_
[
itran
])
{
bool
flag
=
true
;
const
auto
&
tran
=
tensor_desc
.
GetTransforms
().
At
(
itran
);
constexpr
auto
dims_low
=
TensorDesc
::
GetLowerDimensionIdss
().
At
(
itran
);
constexpr
auto
dims_up
=
TensorDesc
::
GetUpperDimensionIdss
().
At
(
itran
);
// this const is for ContainerElementPicker, Array itself may not be const
const
auto
idx_up
=
pick_container_element
(
idx_hidden
,
dims_up
);
auto
idx_low
=
pick_container_element
(
idx_hidden
,
dims_low
);
const
auto
idx_diff_up
=
pick_container_element
(
idx_diff_hidden
,
dims_up
);
auto
idx_diff_low
=
pick_container_element
(
idx_diff_hidden
,
dims_low
);
static_for
<
0
,
NDimUp
,
1
>
{}(
[
&
](
auto
i
)
{
flag
=
flag
&&
idx_up
[
i
]
>=
0
&&
idx_up
[
i
]
<
GetLength
(
i
);
});
tran
.
CalculateLowerIndexDiff
(
idx_diff_low
,
idx_diff_up
,
idx_low
,
idx_up
);
return
flag
;
// update idx_low
idx_low
+=
idx_diff_low
;
}
});
}
__host__
__device__
constexpr
bool
IsValidUpperIndexMappedToValidLowerIndex
(
const
UpperIndex
&
idx_up
)
const
{
bool
flag
=
true
;
template
<
typename
TensorDesc
,
typename
TensorCoord
>
__host__
__device__
constexpr
bool
coordinate_has_valid_offset_assuming_visible_index_is_valid
(
const
TensorDesc
&
tensor_desc
,
const
TensorCoord
&
coord
)
{
bool
valid
=
true
;
static_for
<
0
,
NTransform
,
1
>
{}([
&
](
auto
itran
)
{
const
auto
tran
=
Transforms
{}.
At
(
itran
);
constexpr
index_t
ntransform
=
TensorDesc
::
GetNumOfTransform
();
// check a indtransformation if it does not always has a valid mapping
constexpr
bool
is_valid_up_always_mapped_to_valid_low
=
decltype
(
tran
)
::
IsValidUpperIndexAlwaysMappedToValidLowerIndex
();
const
auto
&
idx_hidden
=
coord
.
GetHiddenIndex
();
if
constexpr
(
!
is_valid_up_always_mapped_to_valid_low
)
static_for
<
ntransform
-
1
,
-
1
,
-
1
>
{}([
&
tensor_desc
,
&
idx_hidden
,
&
valid
](
auto
itran
)
{
const
auto
tran
=
tensor_desc
.
GetTransforms
().
At
(
itran
);
// check validity, only if current transformation does not always has a valid mapping
if
constexpr
(
!
decltype
(
tran
)
::
IsValidUpperIndexAlwaysMappedToValidLowerIndex
())
{
const
auto
up_dims_part
=
UpDimensionIds
{}.
At
(
itran
);
const
auto
idx_up_part
=
pick_container_element
(
idx_
up
,
up_dims_part
);
const
auto
idx_up
=
pick_container_element
(
idx_
hidden
,
TensorDesc
::
GetUpperDimensionIdss
().
At
(
itran
)
);
flag
=
flag
&&
tran
.
IsValidUpperIndexMappedToValidLowerIndex
(
idx_up
_part
);
valid
=
valid
&&
tran
.
IsValidUpperIndexMappedToValidLowerIndex
(
idx_up
);
}
});
return
flag
;
}
};
return
valid
;
}
template
<
typename
TensorDesc
,
typename
TensorCoord
>
__host__
__device__
constexpr
bool
coordinate_has_valid_offset
(
const
TensorDesc
&
tensor_desc
,
const
TensorCoord
&
coord
)
{
// check visible index
const
auto
&
idx_visible
=
coord
.
GetVisibleIndex
();
bool
is_visible_index_valid
=
true
;
static_for
<
0
,
TensorDesc
::
GetNumOfDimension
(),
1
>
{}(
[
&
is_visible_index_valid
,
&
idx_visible
,
&
tensor_desc
](
auto
i
)
{
is_visible_index_valid
=
is_visible_index_valid
&&
(
idx_visible
[
i
]
>=
0
&&
idx_visible
[
i
]
<
tensor_desc
.
GetLength
(
i
));
});
// check other hidden index
return
is_visible_index_valid
&&
coordinate_has_valid_offset_assuming_visible_index_is_valid
(
tensor_desc
,
coord
);
}
}
// namespace ck
#endif
composable_kernel/include/tensor_description/dynamic_tensor_descriptor_helper.hpp
View file @
506df423
...
...
@@ -6,29 +6,46 @@
namespace
ck
{
template
<
typename
Lengths
,
typename
Strides
>
__host__
__device__
constexpr
auto
make_dynamic_native_tensor_descriptor
(
const
Lengths
&
lengths
,
const
Strides
&
stride
s
)
template
<
index_t
N
>
__host__
__device__
constexpr
auto
make_dynamic_native_tensor_descriptor_packed
(
const
MultiIndex
<
N
>&
length
s
)
{
static_assert
(
Lengths
::
Size
()
==
Strides
::
Size
(),
"wrong! Size not the same"
);
return
DynamicNativeTensorDescriptor
<
Lengths
::
Size
()
>
(
lengths
,
strides
);
const
auto
transforms
=
make_tuple
(
DynamicUnMerge
<
N
>
{
lengths
});
constexpr
auto
low_dim_hidden_idss
=
make_tuple
(
Sequence
<
0
>
{});
constexpr
auto
up_dim_hidden_idss
=
make_tuple
(
typename
arithmetic_sequence_gen
<
1
,
N
+
1
,
1
>::
type
{});
constexpr
auto
visible_dim_hidden_ids
=
typename
arithmetic_sequence_gen
<
1
,
N
+
1
,
1
>::
type
{};
const
index_t
element_space_size
=
container_reduce
(
lengths
,
math
::
multiplies
<
index_t
>
{},
index_t
{
1
});
return
DynamicTensorDescriptor
<
decltype
(
transforms
),
decltype
(
low_dim_hidden_idss
),
decltype
(
up_dim_hidden_idss
),
decltype
(
visible_dim_hidden_ids
)
>
{
transforms
,
element_space_size
};
}
template
<
typename
LowTensorDescriptor
,
typename
Transforms
,
typename
LowDimensionIds
,
typename
UpDimensionIds
>
template
<
index_t
N
>
__host__
__device__
constexpr
auto
transform_dynamic_tensor_descriptor
(
const
LowTensorDescriptor
&
low_tensor_desc
,
const
Transforms
&
transforms
,
LowDimensionIds
,
UpDimensionIds
)
make_dynamic_native_tensor_descriptor
(
const
MultiIndex
<
N
>&
lengths
,
const
MultiIndex
<
N
>&
strides
)
{
return
DynamicTransformedTensorDescriptor
<
LowTensorDescriptor
,
Transforms
,
LowDimensionIds
,
UpDimensionIds
>
{
low_tensor_desc
,
transforms
};
const
auto
transforms
=
make_tuple
(
DynamicEmbed
<
N
>
{
lengths
,
strides
});
constexpr
auto
low_dim_hidden_idss
=
make_tuple
(
Sequence
<
0
>
{});
constexpr
auto
up_dim_hidden_idss
=
make_tuple
(
typename
arithmetic_sequence_gen
<
1
,
N
+
1
,
1
>::
type
{});
constexpr
auto
visible_dim_hidden_ids
=
typename
arithmetic_sequence_gen
<
1
,
N
+
1
,
1
>::
type
{};
index_t
element_space_size
=
1
;
static_for
<
0
,
N
,
1
>
{}([
&
](
auto
i
)
{
element_space_size
+=
(
lengths
[
i
]
-
1
)
*
strides
[
i
];
});
return
DynamicTensorDescriptor
<
decltype
(
transforms
),
decltype
(
low_dim_hidden_idss
),
decltype
(
up_dim_hidden_idss
),
decltype
(
visible_dim_hidden_ids
)
>
{
transforms
,
element_space_size
};
}
}
// namespace ck
...
...
composable_kernel/include/tensor_description/dynamic_tensor_descriptor_helper_v1.hpp
0 → 100644
View file @
506df423
#ifndef CK_DYNAMIC_TENSOR_DESCRIPTOR_HELPER_V1_HPP
#define CK_DYNAMIC_TENSOR_DESCRIPTOR_HELPER_V1_HPP
#include "common_header.hpp"
#include "dynamic_tensor_descriptor_v1.hpp"
namespace
ck
{
template
<
typename
Lengths
,
typename
Strides
>
__host__
__device__
constexpr
auto
make_dynamic_native_tensor_descriptor_v1
(
const
Lengths
&
lengths
,
const
Strides
&
strides
)
{
static_assert
(
Lengths
::
Size
()
==
Strides
::
Size
(),
"wrong! Size not the same"
);
return
DynamicNativeTensorDescriptor_v1
<
Lengths
::
Size
()
>
(
lengths
,
strides
);
}
template
<
typename
LowTensorDescriptor
,
typename
Transforms
,
typename
LowDimensionIds
,
typename
UpDimensionIds
>
__host__
__device__
constexpr
auto
transform_dynamic_tensor_descriptor_v1
(
const
LowTensorDescriptor
&
low_tensor_desc
,
const
Transforms
&
transforms
,
LowDimensionIds
,
UpDimensionIds
)
{
return
DynamicTransformedTensorDescriptor_v1
<
LowTensorDescriptor
,
Transforms
,
LowDimensionIds
,
UpDimensionIds
>
{
low_tensor_desc
,
transforms
};
}
}
// namespace ck
#endif
composable_kernel/include/tensor_description/dynamic_tensor_descriptor_helper_v2.hpp
deleted
100644 → 0
View file @
b6bfde53
#ifndef CK_DYNAMIC_TENSOR_DESCRIPTOR_HELPER_V2_HPP
#define CK_DYNAMIC_TENSOR_DESCRIPTOR_HELPER_V2_HPP
#include "common_header.hpp"
#include "dynamic_tensor_descriptor_v2.hpp"
namespace
ck
{
template
<
index_t
N
>
__host__
__device__
constexpr
auto
make_dynamic_native_tensor_descriptor_packed_v2
(
const
MultiIndex
<
N
>&
lengths
)
{
const
auto
transforms
=
make_tuple
(
DynamicUnMerge
<
N
>
{
lengths
});
constexpr
auto
low_dim_hidden_idss
=
make_tuple
(
Sequence
<
0
>
{});
constexpr
auto
up_dim_hidden_idss
=
make_tuple
(
typename
arithmetic_sequence_gen
<
1
,
N
+
1
,
1
>::
type
{});
constexpr
auto
visible_dim_hidden_ids
=
typename
arithmetic_sequence_gen
<
1
,
N
+
1
,
1
>::
type
{};
const
index_t
element_space_size
=
container_reduce
(
lengths
,
math
::
multiplies
<
index_t
>
{},
index_t
{
1
});
return
DynamicTensorDescriptor_v2
<
decltype
(
transforms
),
decltype
(
low_dim_hidden_idss
),
decltype
(
up_dim_hidden_idss
),
decltype
(
visible_dim_hidden_ids
)
>
{
transforms
,
element_space_size
};
}
template
<
index_t
N
>
__host__
__device__
constexpr
auto
make_dynamic_native_tensor_descriptor_v2
(
const
MultiIndex
<
N
>&
lengths
,
const
MultiIndex
<
N
>&
strides
)
{
const
auto
transforms
=
make_tuple
(
DynamicEmbed
<
N
>
{
lengths
,
strides
});
constexpr
auto
low_dim_hidden_idss
=
make_tuple
(
Sequence
<
0
>
{});
constexpr
auto
up_dim_hidden_idss
=
make_tuple
(
typename
arithmetic_sequence_gen
<
1
,
N
+
1
,
1
>::
type
{});
constexpr
auto
visible_dim_hidden_ids
=
typename
arithmetic_sequence_gen
<
1
,
N
+
1
,
1
>::
type
{};
index_t
element_space_size
=
1
;
static_for
<
0
,
N
,
1
>
{}([
&
](
auto
i
)
{
element_space_size
+=
(
lengths
[
i
]
-
1
)
*
strides
[
i
];
});
return
DynamicTensorDescriptor_v2
<
decltype
(
transforms
),
decltype
(
low_dim_hidden_idss
),
decltype
(
up_dim_hidden_idss
),
decltype
(
visible_dim_hidden_ids
)
>
{
transforms
,
element_space_size
};
}
}
// namespace ck
#endif
composable_kernel/include/tensor_description/dynamic_tensor_descriptor_v1.hpp
0 → 100644
View file @
506df423
#ifndef CK_DYNAMIC_TENSOR_DESCRIPTOR_V1_HPP
#define CK_DYNAMIC_TENSOR_DESCRIPTOR_V1_HPP
#include "common_header.hpp"
#include "dynamic_multi_index_transform.hpp"
namespace
ck
{
template
<
index_t
NDim
>
struct
DynamicNativeTensorDescriptor_v1
{
using
Index
=
MultiIndex
<
NDim
>
;
const
Index
lengths_
;
const
Index
strides_
;
__host__
__device__
explicit
constexpr
DynamicNativeTensorDescriptor_v1
(
const
Index
&
lengths
,
const
Index
&
strides
)
:
lengths_
{
lengths
},
strides_
{
strides
}
{
}
__host__
__device__
explicit
constexpr
DynamicNativeTensorDescriptor_v1
()
:
lengths_
{
make_zero_multi_index
<
NDim
>
()},
strides_
{
make_zero_multi_index
<
NDim
>
()}
{
}
__host__
__device__
static
constexpr
index_t
GetNumOfDimension
()
{
return
NDim
;
}
__host__
__device__
constexpr
auto
GetLengths
()
const
{
return
lengths_
;
}
__host__
__device__
constexpr
auto
GetStrides
()
const
{
return
strides_
;
}
template
<
index_t
IDim
>
__host__
__device__
constexpr
index_t
GetLength
(
Number
<
IDim
>
)
const
{
return
lengths_
[
Number
<
IDim
>
{}];
}
template
<
index_t
IDim
>
__host__
__device__
constexpr
index_t
GetStride
(
Number
<
IDim
>
)
const
{
return
strides_
[
Number
<
IDim
>
{}];
}
__host__
__device__
constexpr
index_t
GetElementSize
()
const
{
return
container_reduce
(
GetLengths
(),
math
::
multiplies
<
index_t
>
{},
index_t
{
1
});
}
__host__
__device__
constexpr
index_t
GetElementSpace
()
const
{
index_t
space
=
1
;
static_for
<
0
,
NDim
,
1
>
{}([
&
](
auto
i
)
{
space
+=
(
GetLength
(
i
)
-
1
)
*
GetStride
(
i
);
});
return
space
;
}
template
<
typename
Idx
>
__host__
__device__
constexpr
index_t
CalculateOffset
(
const
Idx
&
idx
)
const
{
index_t
offset
=
0
;
static_for
<
0
,
NDim
,
1
>
{}([
&
](
auto
i
)
{
offset
+=
idx
[
i
]
*
GetStride
(
i
);
});
return
offset
;
}
template
<
typename
IdxDiff
>
__host__
__device__
constexpr
index_t
CalculateOffsetDiff
(
const
IdxDiff
&
idx_diff
)
const
{
return
CalculateOffset
(
idx_diff
);
}
template
<
typename
Idx
>
__host__
__device__
constexpr
bool
IsUpperIndexValid
(
const
Idx
&
idx
)
const
{
bool
flag
=
true
;
static_for
<
0
,
NDim
,
1
>
{}(
[
&
](
auto
i
)
{
flag
=
flag
&&
idx
[
i
]
>=
0
&&
idx
[
i
]
<
GetLength
(
i
);
});
return
flag
;
}
};
template
<
typename
LowTensorDescriptor
,
// DynamicNativeTensorDescriptor_v1 or
// DynamicTransformedTensorDescriptor_v1
typename
Transforms
,
// Tuple<MultIndexTransforms...>
typename
LowDimensionIds
,
// Tuple<Sequence<...>>
typename
UpDimensionIds
>
// Tuple<Sequence<...>>
struct
DynamicTransformedTensorDescriptor_v1
{
using
LowerDesc
=
LowTensorDescriptor
;
using
UpperDesc
=
DynamicTransformedTensorDescriptor_v1
;
static
constexpr
index_t
NTransform
=
Transforms
::
Size
();
const
LowerDesc
low_tensor_desc_
;
const
Transforms
transforms_
;
__host__
__device__
static
constexpr
index_t
GetNumOfLowerDimension
()
{
return
LowerDesc
::
GetNumOfDimension
();
}
__host__
__device__
static
constexpr
index_t
GetNumOfUpperDimension
()
{
index_t
ndim_up
=
0
;
static_for
<
0
,
NTransform
,
1
>
{}([
&
](
auto
i
)
constexpr
{
constexpr
auto
tmp
=
UpDimensionIds
{}.
At
(
i
);
ndim_up
+=
decltype
(
tmp
)
::
Size
();
});
return
ndim_up
;
}
static
constexpr
index_t
NDimUp
=
GetNumOfUpperDimension
();
static
constexpr
index_t
NDimLow
=
GetNumOfLowerDimension
();
using
UpperIndex
=
MultiIndex
<
NDimUp
>
;
using
LowerIndex
=
MultiIndex
<
NDimLow
>
;
struct
lambda_merge_sequences
{
template
<
typename
...
Xs
>
__host__
__device__
constexpr
auto
operator
()(
Xs
...
xs
)
const
{
return
merge_sequences
(
xs
...);
}
};
struct
lambda_merge_arrays
{
template
<
typename
...
Xs
>
__host__
__device__
constexpr
auto
operator
()(
Xs
...
xs
)
const
{
return
container_cat
(
xs
...);
}
};
__host__
__device__
explicit
constexpr
DynamicTransformedTensorDescriptor_v1
(
const
LowerDesc
&
low_tensor_desc
,
const
Transforms
&
transforms
)
:
low_tensor_desc_
{
low_tensor_desc
},
transforms_
{
transforms
}
{
static_assert
(
NTransform
==
Transforms
::
Size
()
&&
NTransform
==
LowDimensionIds
::
Size
()
&&
NTransform
==
UpDimensionIds
::
Size
(),
"wrong! # of transformations not the same"
);
// sanity check:
// LowDimensionIds should include all low-dimensions,
// UpDimensionIds should include all up-dimensions
using
unsorted_up_dimension_ids
=
decltype
(
unpack
(
lambda_merge_sequences
{},
UpDimensionIds
{}));
using
sorted_up_dimension_ids
=
typename
sequence_sort
<
unsorted_up_dimension_ids
,
math
::
less
<
index_t
>>::
type
;
static_assert
(
sorted_up_dimension_ids
::
Size
()
==
NDimUp
&&
is_valid_sequence_map
<
sorted_up_dimension_ids
>
{},
"wrong! UpDimensionIds is not configured correctly"
);
using
unsorted_low_dimension_ids
=
decltype
(
unpack
(
lambda_merge_sequences
{},
LowDimensionIds
{}));
using
sorted_low_dimension_ids
=
typename
sequence_sort
<
unsorted_low_dimension_ids
,
math
::
less
<
index_t
>>::
type
;
static_assert
(
sorted_low_dimension_ids
::
Size
()
==
NDimLow
&&
is_valid_sequence_map
<
sorted_low_dimension_ids
>
{},
"wrong! LowDimensionIds is not configured correctly"
);
// TODO: sanity check: while a up-dimension could be associated with
// multille
// transformation, a low-dimension should be associated with only one
// transformation
// TODO: sanity-check: GetLowerLengths of each transform should be
// consistent with lengths
// of lower-tensor-descriptor
}
__host__
__device__
explicit
constexpr
DynamicTransformedTensorDescriptor_v1
()
:
low_tensor_desc_
{},
transforms_
{}
{
}
__host__
__device__
static
constexpr
index_t
GetNumOfDimension
()
{
return
GetNumOfUpperDimension
();
}
__host__
__device__
constexpr
auto
GetUpperLengths
()
const
{
// sort upper-dimension-ids
constexpr
auto
unsorted_up_dimension_ids
=
unpack
(
lambda_merge_sequences
{},
UpDimensionIds
{});
using
sort_up_dimension_ids
=
sequence_unique_sort
<
decltype
(
unsorted_up_dimension_ids
),
math
::
less
<
index_t
>
,
math
::
equal
<
index_t
>>
;
constexpr
auto
sorted2unsorted_map
=
typename
sort_up_dimension_ids
::
sorted2unsorted_map
{};
// sort upper-lengths
const
auto
tuple_of_up_lengths
=
transform_tuples
([](
const
auto
&
tran
)
constexpr
{
return
tran
.
GetUpperLengths
();
},
transforms_
);
const
auto
unsorted_up_lengths
=
unpack
(
lambda_merge_arrays
{},
tuple_of_up_lengths
);
const
auto
sorted_up_lengths
=
container_reorder_given_new2old
(
unsorted_up_lengths
,
sorted2unsorted_map
);
return
sorted_up_lengths
;
}
__host__
__device__
constexpr
auto
GetLengths
()
const
{
return
GetUpperLengths
();
}
template
<
index_t
IDim
>
__host__
__device__
constexpr
index_t
GetLength
(
Number
<
IDim
>
)
const
{
return
GetLengths
()[
Number
<
IDim
>
{}];
}
__host__
__device__
constexpr
index_t
GetElementSize
()
const
{
return
container_reduce
(
GetLengths
(),
math
::
multiplies
<
index_t
>
{},
index_t
{
1
});
}
__host__
__device__
constexpr
index_t
GetElementSpace
()
const
{
return
low_tensor_desc_
.
GetElementSpace
();
}
__host__
__device__
constexpr
auto
GetLowerTensorDescriptor
()
const
{
return
low_tensor_desc_
;
}
template
<
typename
LowIdx
,
typename
UpIdx
>
__host__
__device__
void
CalculateLowerIndex
(
LowIdx
&
idx_low
,
const
UpIdx
&
idx_up
)
const
{
static_for
<
0
,
NTransform
,
1
>
{}([
&
](
auto
itran
)
constexpr
{
const
auto
tran
=
transforms_
.
At
(
itran
);
const
auto
idx_up_part
=
pick_container_element
(
idx_up
,
UpDimensionIds
{}.
At
(
itran
));
auto
idx_low_part
=
pick_container_element
(
idx_low
,
LowDimensionIds
{}.
At
(
itran
));
tran
.
CalculateLowerIndex
(
idx_low_part
,
idx_up_part
);
});
}
template
<
typename
LowIdxDiff
,
typename
UpIdxDiff
,
typename
LowIdx
,
typename
UpIdx
>
__host__
__device__
void
CalculateLowerIndexDiff
(
LowIdxDiff
&
idx_low_diff
,
const
UpIdxDiff
&
idx_up_diff
,
const
LowIdx
&
idx_low_old
,
const
UpIdx
&
idx_up_old
)
const
{
static_for
<
0
,
NTransform
,
1
>
{}([
&
](
auto
itran
)
{
const
auto
tran
=
transforms_
.
At
(
itran
);
const
auto
idx_up_diff_part
=
pick_container_element
(
idx_up_diff
,
UpDimensionIds
{}.
At
(
itran
));
const
auto
idx_up_old_part
=
pick_container_element
(
idx_up_old
,
UpDimensionIds
{}.
At
(
itran
));
const
auto
idx_low_old_part
=
pick_container_element
(
idx_low_old
,
LowDimensionIds
{}.
At
(
itran
));
auto
idx_low_diff_part
=
pick_container_element
(
idx_low_diff
,
LowDimensionIds
{}.
At
(
itran
));
tran
.
CalculateLowerIndexDiff
(
idx_low_diff_part
,
idx_up_diff_part
,
idx_low_old_part
,
idx_up_old_part
);
});
}
template
<
typename
UpIdx
>
__host__
__device__
constexpr
auto
CalculateLowerIndex
(
const
UpIdx
&
idx_up
)
const
{
LowerIndex
idx_low
;
CalculateLowerIndex
(
idx_low
,
idx_up
);
return
idx_low
;
}
template
<
typename
UpIdxDiff
,
typename
LowIdx
,
typename
UpIdx
>
__host__
__device__
constexpr
auto
CalculateLowerIndexDiff
(
const
UpIdxDiff
&
idx_up_diff
,
const
LowIdx
&
idx_low_old
,
const
UpIdx
&
idx_up_old
)
const
{
LowerIndex
idx_low_diff
;
CalculateLowerIndexDiff
(
idx_low_diff
,
idx_up_diff
,
idx_low_old
,
idx_up_old
);
return
idx_low_diff
;
}
__host__
__device__
constexpr
index_t
CalculateOffset
(
const
UpperIndex
&
idx_up
)
const
{
return
low_tensor_desc_
.
CalculateOffset
(
CalculateLowerIndex
(
idx_up
));
}
__host__
__device__
constexpr
bool
IsUpperIndexValid
(
const
UpperIndex
&
idx_up
)
const
{
bool
flag
=
true
;
static_for
<
0
,
NDimUp
,
1
>
{}(
[
&
](
auto
i
)
{
flag
=
flag
&&
idx_up
[
i
]
>=
0
&&
idx_up
[
i
]
<
GetLength
(
i
);
});
return
flag
;
}
__host__
__device__
constexpr
bool
IsValidUpperIndexMappedToValidLowerIndex
(
const
UpperIndex
&
idx_up
)
const
{
bool
flag
=
true
;
static_for
<
0
,
NTransform
,
1
>
{}([
&
](
auto
itran
)
{
const
auto
tran
=
Transforms
{}.
At
(
itran
);
// check a indtransformation if it does not always has a valid mapping
constexpr
bool
is_valid_up_always_mapped_to_valid_low
=
decltype
(
tran
)
::
IsValidUpperIndexAlwaysMappedToValidLowerIndex
();
if
constexpr
(
!
is_valid_up_always_mapped_to_valid_low
)
{
const
auto
up_dims_part
=
UpDimensionIds
{}.
At
(
itran
);
const
auto
idx_up_part
=
pick_container_element
(
idx_up
,
up_dims_part
);
flag
=
flag
&&
tran
.
IsValidUpperIndexMappedToValidLowerIndex
(
idx_up_part
);
}
});
return
flag
;
}
};
}
// namespace ck
#endif
composable_kernel/include/tensor_description/dynamic_tensor_descriptor_v2.hpp
deleted
100644 → 0
View file @
b6bfde53
#ifndef CK_DYNAMIC_TENSOR_DESCRIPTOR_V2_HPP
#define CK_DYNAMIC_TENSOR_DESCRIPTOR_V2_HPP
#include "common_header.hpp"
#include "dynamic_multi_index_transform.hpp"
namespace
ck
{
template
<
index_t
NDimHidden
,
typename
VisibleDimensionIds
>
struct
DynamicTensorCoordinate_v2
;
template
<
index_t
NTransform
,
index_t
NDimVisible
>
struct
DynamicTensorCoordinateStep_v2
;
template
<
typename
TensorDesc
,
typename
VisibleIndex
>
__host__
__device__
constexpr
auto
make_dynamic_tensor_coordinate_v2
(
const
TensorDesc
&
tensor_desc
,
const
VisibleIndex
&
idx_visible
);
template
<
typename
TensorDesc
,
typename
VisibleIndex
>
__host__
__device__
constexpr
auto
make_dynamic_tensor_coordinate_step_v2
(
const
TensorDesc
&
,
const
VisibleIndex
&
idx_diff_visible
);
template
<
typename
TensorDesc
,
typename
TensorCoord
,
typename
TensorCoordStep
>
__host__
__device__
void
move_dynamic_tensor_coordinate_v2
(
const
TensorDesc
&
tensor_desc
,
TensorCoord
&
coord
,
const
TensorCoordStep
&
coord_step
);
template
<
typename
TensorDesc
,
typename
TensorCoord
>
__host__
__device__
constexpr
bool
coordinate_has_valid_offset_assuming_visible_index_is_valid
(
const
TensorDesc
&
tensor_desc
,
const
TensorCoord
&
coord
);
template
<
typename
TensorDesc
,
typename
TensorCoord
>
__host__
__device__
constexpr
bool
coordinate_has_valid_offset
(
const
TensorDesc
&
tensor_desc
,
const
TensorCoord
&
coord
);
// Transforms: Tuple<transforms...>
// LowerDimensionIdss : Tuple<Sequence<...>, ...>
// UpperDimensionIdss : Tuple<Sequence<...>, ...>
// VisibleDimensionIds> : Sequence<...>
template
<
typename
Transforms
,
typename
LowerDimensionIdss
,
typename
UpperDimensionIdss
,
typename
VisibleDimensionIds
>
struct
DynamicTensorDescriptor_v2
{
// private:
__host__
__device__
static
constexpr
index_t
GetNumOfTransform
()
{
return
Transforms
::
Size
();
}
__host__
__device__
static
constexpr
index_t
GetNumOfVisibleDimension
()
{
return
VisibleDimensionIds
::
Size
();
}
__host__
__device__
static
constexpr
index_t
GetNumOfHiddenDimension
()
{
constexpr
auto
all_low_dim_ids
=
unpack
([](
auto
&&
...
xs
)
constexpr
{
return
merge_sequences
(
xs
...);
},
LowerDimensionIdss
{});
constexpr
auto
all_up_dim_ids
=
unpack
([](
auto
&&
...
xs
)
constexpr
{
return
merge_sequences
(
xs
...);
},
UpperDimensionIdss
{});
constexpr
auto
all_dim_ids
=
merge_sequences
(
all_low_dim_ids
,
all_up_dim_ids
);
using
unique_sort_all_dim_ids
=
typename
sequence_unique_sort
<
decltype
(
all_dim_ids
),
math
::
less
<
index_t
>
,
math
::
equal
<
index_t
>>::
type
;
return
unique_sort_all_dim_ids
::
Size
();
}
constexpr
static
index_t
ntransform_
=
GetNumOfTransform
();
constexpr
static
index_t
ndim_visible_
=
GetNumOfVisibleDimension
();
constexpr
static
index_t
ndim_hidden_
=
GetNumOfHiddenDimension
();
using
VisibleIndex
=
MultiIndex
<
ndim_visible_
>
;
using
HiddenIndex
=
MultiIndex
<
ndim_hidden_
>
;
using
Coordinate
=
DynamicTensorCoordinate_v2
<
ndim_hidden_
,
VisibleDimensionIds
>
;
using
CoordinateStep
=
DynamicTensorCoordinateStep_v2
<
ntransform_
,
ndim_visible_
>
;
// public:
__host__
__device__
explicit
constexpr
DynamicTensorDescriptor_v2
(
const
Transforms
&
transforms
,
index_t
element_space_size
)
:
transforms_
{
transforms
},
hidden_lengths_
{
InitializeHiddenLengths
(
transforms_
,
element_space_size
)},
visible_lengths_
{
hidden_lengths_
}
{
static_assert
(
Transforms
::
Size
()
==
ntransform_
&&
LowerDimensionIdss
::
Size
()
==
ntransform_
&&
UpperDimensionIdss
::
Size
()
==
ntransform_
,
"wrong! inconsistent # of transformations"
);
// TODO check dependency of dimensions is valid
}
__host__
__device__
explicit
constexpr
DynamicTensorDescriptor_v2
()
:
DynamicTensorDescriptor_v2
(
Transforms
{},
index_t
{
0
})
{
}
__host__
__device__
static
constexpr
index_t
GetNumOfDimension
()
{
return
GetNumOfVisibleDimension
();
}
template
<
index_t
IDim
>
__host__
__device__
constexpr
index_t
GetLength
(
Number
<
IDim
>
)
const
{
return
visible_lengths_
[
Number
<
IDim
>
{}];
}
__host__
__device__
constexpr
const
auto
&
GetLengths
()
const
{
return
visible_lengths_
;
}
// maybe this result should be saved as a member variable
__host__
__device__
constexpr
index_t
GetElementSize
()
const
{
return
container_reduce
(
GetLengths
(),
math
::
multiplies
<
index_t
>
{},
index_t
{
1
});
}
__host__
__device__
constexpr
index_t
GetElementSpaceSize
()
const
{
return
hidden_lengths_
[
Number
<
0
>
{}];
}
template
<
typename
Idx
>
__host__
__device__
constexpr
index_t
CalculateOffset
(
const
Idx
&
idx
)
const
{
static_assert
(
Idx
::
Size
()
==
GetNumOfDimension
(),
"wrong! inconsistent # of dimension"
);
#if 0 // debug
return make_dynamic_tensor_coordinate_v2(*this, idx).GetOffset();
#else
constexpr
index_t
ntransform
=
GetNumOfTransform
();
constexpr
index_t
ndim_hidden
=
GetNumOfHiddenDimension
();
constexpr
index_t
ndim_visible
=
GetNumOfVisibleDimension
();
constexpr
auto
visible_dim_ids
=
GetVisibleDimensionIds
();
MultiIndex
<
ndim_hidden
>
idx_hidden
;
// initialize visible index
auto
idx_hidden_pick_visible
=
pick_container_element
(
idx_hidden
,
visible_dim_ids
);
idx_hidden_pick_visible
=
idx
;
// calculate hidden index
static_for
<
ntransform
-
1
,
-
1
,
-
1
>
{}([
this
,
&
idx_hidden
](
auto
itran
)
{
const
auto
&
tran
=
this
->
GetTransforms
().
At
(
itran
);
constexpr
auto
dims_low
=
GetLowerDimensionIdss
().
At
(
itran
);
constexpr
auto
dims_up
=
GetUpperDimensionIdss
().
At
(
itran
);
const
auto
idx_up
=
pick_container_element
(
idx_hidden
,
dims_up
);
auto
idx_low
=
pick_container_element
(
idx_hidden
,
dims_low
);
tran
.
CalculateLowerIndex
(
idx_low
,
idx_up
);
});
return
idx_hidden
[
Number
<
0
>
{}];
#endif
}
// private:
__host__
__device__
constexpr
const
auto
&
GetTransforms
()
const
{
return
transforms_
;
}
__host__
__device__
static
constexpr
auto
GetLowerDimensionIdss
()
{
return
LowerDimensionIdss
{};
}
__host__
__device__
static
constexpr
auto
GetUpperDimensionIdss
()
{
return
UpperDimensionIdss
{};
}
__host__
__device__
static
constexpr
auto
GetVisibleDimensionIds
()
{
return
VisibleDimensionIds
{};
}
__host__
__device__
static
constexpr
auto
InitializeHiddenLengths
(
const
Transforms
&
transforms
,
index_t
element_space_size
)
{
// zero initialization
HiddenIndex
hidden_lengths
=
make_zero_multi_index
<
ndim_hidden_
>
();
// this is the orignal tensor element space size
hidden_lengths
(
Number
<
0
>
{})
=
element_space_size
;
// lengths for all other hidden dimensions
static_for
<
0
,
ntransform_
,
1
>
{}([
&
transforms
,
&
hidden_lengths
](
auto
itran
)
{
const
auto
&
tran
=
transforms
.
At
(
itran
);
constexpr
auto
up_dim_ids
=
UpperDimensionIdss
{}.
At
(
itran
);
// lengths_hidden_pick_up contains a reference to lengths_hidden
auto
hidden_lengths_pick_up
=
pick_container_element
(
hidden_lengths
,
up_dim_ids
);
hidden_lengths_pick_up
=
tran
.
GetUpperLengths
();
});
return
hidden_lengths
;
}
// private member variables
const
Transforms
transforms_
;
// TODO maybe hidden_lengths_ should use reference_wrapper (reference to transforms_'s member
// variable lengths_) to save space on stack?
const
HiddenIndex
hidden_lengths_
;
// visible_lenths_ contains a reference to hidden_lengths_
const
ContainerElementPicker
<
const
HiddenIndex
,
VisibleDimensionIds
>
visible_lengths_
;
#if 0
// friend class
friend Coordinate;
friend CoordinateStep;
// friend function to transform tensor descriptor
template <typename OldTensorDescriptor,
typename NewTransforms,
typename NewLowerDimensionOldVisibleIdss,
typename NewUpperDimensionNewVisibleIdss>
__host__ __device__ friend constexpr auto
transform_dynamic_tensor_descriptor_v2(const OldTensorDescriptor& /* old_tensor_desc */,
const NewTransforms& /* new_transforms */,
NewLowerDimensionOldVisibleIdss,
NewUpperDimensionNewVisibleIdss);
// friend functions for making and moving tensor coordinate
template <typename VisibleIndex>
__host__ __device__ friend constexpr Coordinate
make_dynamic_tensor_coordinate_v2(const DynamicTensorDescriptor_v2& /* tensor_desc */,
const VisibleIndex& /* idx_visible */);
template <typename VisibleIndex>
__host__ __device__ friend constexpr CoordinateStep
make_dynamic_tensor_coordinate_step_v2(const DynamicTensorDescriptor_v2& /* tensor_desc */,
const VisibleIndex& /* idx_diff_visible */);
__host__ __device__ friend void
move_dynamic_tensor_coordinate_v2(const DynamicTensorDescriptor_v2& /* tensor_desc */,
Coordinate& /* coord */,
const CoordinateStep& /* coord_step */);
// friend functions for valid offset check
__host__ __device__ friend constexpr bool
coordinate_has_valid_offset_assuming_visible_index_is_valid(
const DynamicTensorDescriptor_v2& tensor_desc, const Coordinate& coord);
__host__ __device__ friend constexpr bool
coordinate_has_valid_offset(const DynamicTensorDescriptor_v2& tensor_desc,
const Coordinate& coord);
#endif
};
template
<
index_t
NDimHidden
,
typename
VisibleDimensionIds
>
struct
DynamicTensorCoordinate_v2
{
// private:
static
constexpr
index_t
ndim_visible_
=
VisibleDimensionIds
::
Size
();
using
HiddenIndex
=
MultiIndex
<
NDimHidden
>
;
using
VisibleIndex
=
MultiIndex
<
ndim_visible_
>
;
// public:
__host__
__device__
explicit
constexpr
DynamicTensorCoordinate_v2
(
const
HiddenIndex
&
idx_hidden
)
:
idx_hidden_
{
idx_hidden
},
idx_visible_
{
idx_hidden_
}
{
}
__host__
__device__
constexpr
const
auto
&
GetIndex
()
const
{
return
GetVisibleIndex
();
}
__host__
__device__
constexpr
index_t
GetOffset
()
const
{
return
idx_hidden_
[
Number
<
0
>
{}];
}
// private:
__host__
__device__
constexpr
const
auto
&
GetHiddenIndex
()
const
{
return
idx_hidden_
;
}
__host__
__device__
auto
&
GetHiddenIndex
()
{
return
idx_hidden_
;
}
__host__
__device__
constexpr
const
auto
&
GetVisibleIndex
()
const
{
return
idx_visible_
;
}
__host__
__device__
auto
&
GetVisibleIndex
()
{
return
idx_visible_
;
}
// private member variables
HiddenIndex
idx_hidden_
;
// idx_visible_ contains a reference to idx_hidden_
ContainerElementPicker
<
HiddenIndex
,
VisibleDimensionIds
>
idx_visible_
;
#if 0
// friend functions for making and updating tensor coordinate
template <typename TensorDesc>
__host__ __device__ friend constexpr DynamicTensorCoordinate_v2
make_dynamic_tensor_coordinate_v2(const TensorDesc& /* tensor_desc */,
const VisibleIndex& /* idx_visible */);
template <typename TensorDesc, typename TensorCoordStep>
__host__ __device__ friend void move_dynamic_tensor_coordinate_v2(
const TensorDesc& /* tensor_desc */,
DynamicTensorCoordinate_v2& /* coord */,
const TensorCoordStep& /* coord_step */);
#endif
};
template
<
index_t
NTransform
,
index_t
NDimVisible
>
struct
DynamicTensorCoordinateStep_v2
{
// private:
using
VisibleIndex
=
MultiIndex
<
NDimVisible
>
;
// public:
__host__
__device__
explicit
constexpr
DynamicTensorCoordinateStep_v2
(
const
VisibleIndex
&
idx_diff_visible
,
const
Array
<
bool
,
NTransform
>&
do_transforms
)
:
idx_diff_visible_
{
idx_diff_visible
},
do_transforms_
{
do_transforms
}
{
}
__host__
__device__
constexpr
const
auto
&
GetIndexDiff
()
const
{
return
GetVisibleIndexDiff
();
}
// private:
__host__
__device__
constexpr
const
auto
&
GetVisibleIndexDiff
()
const
{
return
idx_diff_visible_
;
}
// private:
const
VisibleIndex
idx_diff_visible_
;
const
Array
<
bool
,
NTransform
>
do_transforms_
;
#if 0
// friend functions for updating tensor coordinate
template <typename TensorDesc>
__host__ __device__ friend constexpr DynamicTensorCoordinateStep_v2
make_dynamic_tensor_coordinate_step_v2(const TensorDesc& /* tensor_desc */,
const VisibleIndex& /* idx_visible */);
template <typename TensorDesc, index_t NDimHidden, typename VisibleDimensionIds>
__host__ __device__ friend void move_dynamic_tensor_coordinate_v2(
const TensorDesc& /* tensor_desc */,
DynamicTensorCoordinate_v2<NDimHidden, VisibleDimensionIds>& /* coord */,
const DynamicTensorCoordinateStep_v2& /* coord_step */);
#endif
};
// TODO: How to fix this? It uses an struct instead of lambda because lambda
// doesn't have constructor, and to put it outside the scope where it is used
// (transform_dynamic_tensor_descriptor_v2) because template cannot be defined inside a function
// template
template
<
typename
NewTransforms
>
struct
lambda_get_up_dim_num
{
template
<
typename
I
>
__host__
__device__
constexpr
auto
operator
()(
I
)
const
{
using
Tran
=
remove_reference_t
<
decltype
(
NewTransforms
{}.
At
(
I
{}))
>
;
return
Number
<
Tran
::
GetNumOfUpperDimension
()
>
{};
}
};
template
<
typename
OldTensorDescriptor
,
typename
NewTransforms
,
typename
NewLowerDimensionOldVisibleIdss
,
typename
NewUpperDimensionNewVisibleIdss
>
__host__
__device__
constexpr
auto
transform_dynamic_tensor_descriptor_v2
(
const
OldTensorDescriptor
&
old_tensor_desc
,
const
NewTransforms
&
new_transforms
,
NewLowerDimensionOldVisibleIdss
,
NewUpperDimensionNewVisibleIdss
)
{
// lower dimension's hidden idss
// convert lower dimension visible idss (tuple of sequences) to hidden idss (tuple of
// sequences)
constexpr
auto
low_dim_hidden_idss
=
transform_tuples
(
// convert lower dimension visible ids (a sequence) to hidden ids (a sequence)
[](
auto
low_dim_visible_ids
)
constexpr
{
return
transform_sequences
(
// convert lower dimension visible id to hidden id
[](
auto
low_dim_visible_id
)
constexpr
{
return
OldTensorDescriptor
::
GetVisibleDimensionIds
()[
low_dim_visible_id
];
},
low_dim_visible_ids
);
},
NewLowerDimensionOldVisibleIdss
{});
constexpr
index_t
num_new_transform
=
NewTransforms
::
Size
();
// upper dimension's hidden idss
constexpr
index_t
old_hidden_dim_number
=
OldTensorDescriptor
::
GetNumOfHiddenDimension
();
constexpr
auto
up_dim_numbers
=
generate_sequence
(
lambda_get_up_dim_num
<
NewTransforms
>
{},
Number
<
num_new_transform
>
{});
constexpr
auto
up_dim_numbers_scan
=
merge_sequences
(
Sequence
<
0
>
{},
inclusive_scan_sequence
(
up_dim_numbers
,
math
::
plus
<
index_t
>
{},
Number
<
0
>
{}));
constexpr
auto
up_dim_hidden_idss
=
generate_tuple
([
old_hidden_dim_number
,
up_dim_numbers_scan
](
auto
i
)
constexpr
{
return
typename
arithmetic_sequence_gen
<
old_hidden_dim_number
+
up_dim_numbers_scan
[
i
],
old_hidden_dim_number
+
up_dim_numbers_scan
[
i
+
1
],
1
>::
type
{};
},
Number
<
num_new_transform
>
{});
// new visible dimension's hidden ids
constexpr
auto
unordered_new_visible_dim_hidden_ids
=
unpack
([](
auto
...
xs
)
{
return
merge_sequences
(
xs
...);
},
up_dim_hidden_idss
);
constexpr
auto
new_visible_dim_unordered2ordered
=
unpack
(
[](
auto
...
xs
)
{
return
merge_sequences
(
xs
...);
},
NewUpperDimensionNewVisibleIdss
{});
constexpr
auto
new_visible_dim_hidden_ids
=
unordered_new_visible_dim_hidden_ids
.
ReorderGivenOld2New
(
new_visible_dim_unordered2ordered
);
// put everything together
const
auto
all_transforms
=
container_cat
(
old_tensor_desc
.
GetTransforms
(),
new_transforms
);
constexpr
auto
all_low_dim_hidden_idss
=
container_cat
(
OldTensorDescriptor
::
GetLowerDimensionIdss
(),
low_dim_hidden_idss
);
constexpr
auto
all_up_dim_hidden_idss
=
container_cat
(
OldTensorDescriptor
::
GetUpperDimensionIdss
(),
up_dim_hidden_idss
);
return
DynamicTensorDescriptor_v2
<
decltype
(
all_transforms
),
decltype
(
all_low_dim_hidden_idss
),
decltype
(
all_up_dim_hidden_idss
),
decltype
(
new_visible_dim_hidden_ids
)
>
{
all_transforms
,
old_tensor_desc
.
GetElementSpaceSize
()};
}
template
<
typename
TensorDesc
,
typename
VisibleIndex
>
__host__
__device__
constexpr
auto
make_dynamic_tensor_coordinate_v2
(
const
TensorDesc
&
tensor_desc
,
const
VisibleIndex
&
idx_visible
)
{
static_assert
(
TensorDesc
::
GetNumOfDimension
()
==
VisibleIndex
::
Size
(),
"wrong! # of dimension inconsistent"
);
constexpr
index_t
ntransform
=
TensorDesc
::
GetNumOfTransform
();
constexpr
index_t
ndim_hidden
=
TensorDesc
::
GetNumOfHiddenDimension
();
constexpr
index_t
ndim_visible
=
TensorDesc
::
GetNumOfVisibleDimension
();
constexpr
auto
visible_dim_ids
=
TensorDesc
::
GetVisibleDimensionIds
();
MultiIndex
<
ndim_hidden
>
idx_hidden
;
// initialize visible index
auto
idx_hidden_pick_visible
=
pick_container_element
(
idx_hidden
,
visible_dim_ids
);
idx_hidden_pick_visible
=
idx_visible
;
// calculate hidden index
static_for
<
ntransform
,
0
,
-
1
>
{}([
&
tensor_desc
,
&
idx_hidden
](
auto
itran_p1
)
{
auto
itran
=
itran_p1
-
Number
<
1
>
{};
const
auto
&
tran
=
tensor_desc
.
GetTransforms
().
At
(
itran
);
constexpr
auto
dims_low
=
TensorDesc
::
GetLowerDimensionIdss
().
At
(
itran
);
constexpr
auto
dims_up
=
TensorDesc
::
GetUpperDimensionIdss
().
At
(
itran
);
const
auto
idx_up
=
pick_container_element
(
idx_hidden
,
dims_up
);
auto
idx_low
=
pick_container_element
(
idx_hidden
,
dims_low
);
tran
.
CalculateLowerIndex
(
idx_low
,
idx_up
);
});
// better to use std::move?
return
DynamicTensorCoordinate_v2
<
ndim_hidden
,
decltype
(
visible_dim_ids
)
>
{
idx_hidden
};
}
template
<
typename
TensorDesc
,
typename
VisibleIndex
>
__host__
__device__
constexpr
auto
make_dynamic_tensor_coordinate_step_v2
(
const
TensorDesc
&
,
const
VisibleIndex
&
idx_diff_visible
)
{
static_assert
(
TensorDesc
::
GetNumOfDimension
()
==
VisibleIndex
::
Size
(),
"wrong! # of dimension inconsistent"
);
constexpr
index_t
ntransform
=
TensorDesc
::
GetNumOfTransform
();
constexpr
index_t
ndim_hidden
=
TensorDesc
::
GetNumOfHiddenDimension
();
constexpr
index_t
ndim_visible
=
TensorDesc
::
GetNumOfVisibleDimension
();
constexpr
auto
visible_dim_ids
=
TensorDesc
::
GetVisibleDimensionIds
();
Array
<
bool
,
ntransform
>
do_transforms
{
false
};
Array
<
bool
,
ndim_hidden
>
non_zero_diff
{
false
};
auto
non_zero_diff_pick_visible
=
pick_container_element
(
non_zero_diff
,
visible_dim_ids
);
static_for
<
0
,
ndim_visible
,
1
>
{}([
&
non_zero_diff_pick_visible
,
&
idx_diff_visible
](
auto
i
)
{
non_zero_diff_pick_visible
(
i
)
=
(
idx_diff_visible
[
i
]
!=
0
);
});
static_for
<
ntransform
-
1
,
-
1
,
-
1
>
{}([
&
do_transforms
,
&
non_zero_diff
](
auto
itran
)
{
constexpr
auto
dims_low
=
TensorDesc
::
GetLowerDimensionIdss
().
At
(
itran
);
constexpr
auto
dims_up
=
TensorDesc
::
GetUpperDimensionIdss
().
At
(
itran
);
const
auto
non_zero_diff_pick_up
=
pick_container_element
(
non_zero_diff
,
dims_up
);
auto
non_zero_diff_pick_low
=
pick_container_element
(
non_zero_diff
,
dims_low
);
// if any of upper index diff components is non-zero, then
// 1) Need to do this transform
// 2) all components of lower index diff will assume to be non-zero and need to be
// computed
const
bool
idx_diff_up_has_non_zero
=
container_reduce
(
non_zero_diff_pick_up
,
[](
auto
a
,
auto
b
)
{
return
a
or
b
;
},
false
);
do_transforms
(
itran
)
=
idx_diff_up_has_non_zero
;
static_for
<
0
,
dims_low
.
Size
(),
1
>
{}(
[
&
non_zero_diff_pick_low
,
&
idx_diff_up_has_non_zero
](
auto
i
)
{
non_zero_diff_pick_low
(
i
)
=
idx_diff_up_has_non_zero
;
});
});
return
DynamicTensorCoordinateStep_v2
<
ntransform
,
ndim_visible
>
{
idx_diff_visible
,
do_transforms
};
}
template
<
typename
TensorDesc
,
typename
TensorCoord
,
typename
TensorCoordStep
>
__host__
__device__
void
move_dynamic_tensor_coordinate_v2
(
const
TensorDesc
&
tensor_desc
,
TensorCoord
&
coord
,
const
TensorCoordStep
&
coord_step
)
{
constexpr
index_t
ndim_hidden
=
TensorDesc
::
GetNumOfHiddenDimension
();
constexpr
index_t
ndim_visible
=
TensorDesc
::
GetNumOfVisibleDimension
();
constexpr
index_t
ntransform
=
TensorDesc
::
GetNumOfTransform
();
using
HiddenIndex
=
MultiIndex
<
ndim_hidden
>
;
// this is what needs to be calculated
auto
idx_diff_hidden
=
make_zero_multi_index
<
ndim_hidden
>
();
// initialize visible index diff
// idx_diff_hidden_pick_visible contains reference to idx_diff_hidden
auto
idx_diff_hidden_pick_visible
=
pick_container_element
(
idx_diff_hidden
,
TensorDesc
::
GetVisibleDimensionIds
());
idx_diff_hidden_pick_visible
=
coord_step
.
GetVisibleIndexDiff
();
// this is what needs to be updated
auto
&
idx_hidden
=
coord
.
GetHiddenIndex
();
// update visible index
auto
idx_hidden_pick_visible
=
pick_container_element
(
idx_hidden
,
TensorDesc
::
GetVisibleDimensionIds
());
idx_hidden_pick_visible
+=
coord_step
.
GetIndexDiff
();
// update rest of hidden index
static_for
<
ntransform
-
1
,
-
1
,
-
1
>
{}([
&
](
auto
itran
)
{
if
(
coord_step
.
do_transforms_
[
itran
])
{
const
auto
&
tran
=
tensor_desc
.
GetTransforms
().
At
(
itran
);
constexpr
auto
dims_low
=
TensorDesc
::
GetLowerDimensionIdss
().
At
(
itran
);
constexpr
auto
dims_up
=
TensorDesc
::
GetUpperDimensionIdss
().
At
(
itran
);
// this const is for ContainerElementPicker, Array itself may not be const
const
auto
idx_up
=
pick_container_element
(
idx_hidden
,
dims_up
);
auto
idx_low
=
pick_container_element
(
idx_hidden
,
dims_low
);
const
auto
idx_diff_up
=
pick_container_element
(
idx_diff_hidden
,
dims_up
);
auto
idx_diff_low
=
pick_container_element
(
idx_diff_hidden
,
dims_low
);
tran
.
CalculateLowerIndexDiff
(
idx_diff_low
,
idx_diff_up
,
idx_low
,
idx_up
);
// update idx_low
idx_low
+=
idx_diff_low
;
}
});
}
template
<
typename
TensorDesc
,
typename
TensorCoord
>
__host__
__device__
constexpr
bool
coordinate_has_valid_offset_assuming_visible_index_is_valid
(
const
TensorDesc
&
tensor_desc
,
const
TensorCoord
&
coord
)
{
bool
valid
=
true
;
constexpr
index_t
ntransform
=
TensorDesc
::
GetNumOfTransform
();
const
auto
&
idx_hidden
=
coord
.
GetHiddenIndex
();
static_for
<
ntransform
-
1
,
-
1
,
-
1
>
{}([
&
tensor_desc
,
&
idx_hidden
,
&
valid
](
auto
itran
)
{
const
auto
tran
=
tensor_desc
.
GetTransforms
().
At
(
itran
);
// check validity, only if current transformation does not always has a valid mapping
if
constexpr
(
!
decltype
(
tran
)
::
IsValidUpperIndexAlwaysMappedToValidLowerIndex
())
{
const
auto
idx_up
=
pick_container_element
(
idx_hidden
,
TensorDesc
::
GetUpperDimensionIdss
().
At
(
itran
));
valid
=
valid
&&
tran
.
IsValidUpperIndexMappedToValidLowerIndex
(
idx_up
);
}
});
return
valid
;
}
template
<
typename
TensorDesc
,
typename
TensorCoord
>
__host__
__device__
constexpr
bool
coordinate_has_valid_offset
(
const
TensorDesc
&
tensor_desc
,
const
TensorCoord
&
coord
)
{
// check visible index
const
auto
&
idx_visible
=
coord
.
GetVisibleIndex
();
bool
is_visible_index_valid
=
true
;
static_for
<
0
,
TensorDesc
::
GetNumOfDimension
(),
1
>
{}(
[
&
is_visible_index_valid
,
&
idx_visible
,
&
tensor_desc
](
auto
i
)
{
is_visible_index_valid
=
is_visible_index_valid
&&
(
idx_visible
[
i
]
>=
0
&&
idx_visible
[
i
]
<
tensor_desc
.
GetLength
(
i
));
});
// check other hidden index
return
is_visible_index_valid
&&
coordinate_has_valid_offset_assuming_visible_index_is_valid
(
tensor_desc
,
coord
);
}
}
// namespace ck
#endif
driver/include/device_dummy_dynamic_transform_v1.hpp
View file @
506df423
...
...
@@ -28,11 +28,11 @@ void device_dummy_dynamic_transform_v1(InDesc,
using
TDevice
=
typename
conditional
<
is_same
<
half_float
::
half
,
T
>::
value
,
half_t
,
T
>::
type
;
const
auto
in_nchw_desc
=
make_dynamic_native_tensor_descriptor
(
const
auto
in_nchw_desc
=
make_dynamic_native_tensor_descriptor
_v1
(
to_multi_index
(
InDesc
::
GetLengths
()),
to_multi_index
(
InDesc
::
GetStrides
()));
const
auto
wei_kcyx_desc
=
make_dynamic_native_tensor_descriptor
(
const
auto
wei_kcyx_desc
=
make_dynamic_native_tensor_descriptor
_v1
(
to_multi_index
(
WeiDesc
::
GetLengths
()),
to_multi_index
(
WeiDesc
::
GetStrides
()));
const
auto
out_nkhw_desc
=
make_dynamic_native_tensor_descriptor
(
const
auto
out_nkhw_desc
=
make_dynamic_native_tensor_descriptor
_v1
(
to_multi_index
(
OutDesc
::
GetLengths
()),
to_multi_index
(
OutDesc
::
GetStrides
()));
const
auto
conv_strides
=
to_multi_index
(
ConvStrides
{});
...
...
@@ -52,7 +52,7 @@ void device_dummy_dynamic_transform_v1(InDesc,
const
auto
in_gemmk_gemmn_global_desc
=
tensor_descs
.
At
(
Number
<
0
>
{});
auto
in_gemmk_gemmn_coord
=
make_dynamic_tensor_coordinate
<
2
>
(
in_gemmk_gemmn_global_desc
,
make_multi_index
(
0
,
0
));
make_dynamic_tensor_coordinate
(
in_gemmk_gemmn_global_desc
,
make_multi_index
(
0
,
0
));
for
(
index_t
iter
=
0
;
iter
<
10
;
++
iter
)
{
...
...
@@ -112,9 +112,9 @@ void device_dummy_dynamic_transform_v1(InDesc,
index_t
*
const
,
float
*
const
,
float
*
const
,
const
DynamicNativeTensorDescriptor
<
4
>
,
const
DynamicNativeTensorDescriptor
<
4
>
,
const
DynamicNativeTensorDescriptor
<
4
>
,
const
DynamicNativeTensorDescriptor
_v1
<
4
>
,
const
DynamicNativeTensorDescriptor
_v1
<
4
>
,
const
DynamicNativeTensorDescriptor
_v1
<
4
>
,
const
MultiIndex
<
2
>
,
const
MultiIndex
<
2
>
,
const
MultiIndex
<
2
>
,
...
...
driver/include/device_dummy_dynamic_transform_v2.hpp
View file @
506df423
...
...
@@ -28,11 +28,11 @@ void device_dummy_dynamic_transform_v2(InDesc,
using
TDevice
=
typename
conditional
<
is_same
<
half_float
::
half
,
T
>::
value
,
half_t
,
T
>::
type
;
const
auto
in_nchw_desc
=
make_dynamic_native_tensor_descriptor
_v2
<
4
>
(
const
auto
in_nchw_desc
=
make_dynamic_native_tensor_descriptor
<
4
>
(
to_multi_index
(
InDesc
::
GetLengths
()),
to_multi_index
(
InDesc
::
GetStrides
()));
const
auto
wei_kcyx_desc
=
make_dynamic_native_tensor_descriptor
_v2
<
4
>
(
const
auto
wei_kcyx_desc
=
make_dynamic_native_tensor_descriptor
<
4
>
(
to_multi_index
(
WeiDesc
::
GetLengths
()),
to_multi_index
(
WeiDesc
::
GetStrides
()));
const
auto
out_nkhw_desc
=
make_dynamic_native_tensor_descriptor
_v2
<
4
>
(
const
auto
out_nkhw_desc
=
make_dynamic_native_tensor_descriptor
<
4
>
(
to_multi_index
(
OutDesc
::
GetLengths
()),
to_multi_index
(
OutDesc
::
GetStrides
()));
const
auto
conv_strides
=
to_multi_index
(
ConvStrides
{});
...
...
@@ -52,12 +52,11 @@ void device_dummy_dynamic_transform_v2(InDesc,
// test on cpu
{
auto
in_gemmk_gemmn_gemmkpack_coord
=
make_dynamic_tensor_coordinate
_v2
(
auto
in_gemmk_gemmn_gemmkpack_coord
=
make_dynamic_tensor_coordinate
(
in_gemmk_gemmn_gemmkpack_global_desc
,
make_multi_index
(
0
,
0
,
0
));
const
auto
in_gemmk_gemmn_gemmkpack_coord_step_0_0_1
=
make_dynamic_tensor_coordinate_step_v2
(
in_gemmk_gemmn_gemmkpack_global_desc
,
make_multi_index
(
0
,
0
,
1
));
const
auto
in_gemmk_gemmn_gemmkpack_coord_step_0_0_1
=
make_dynamic_tensor_coordinate_step
(
in_gemmk_gemmn_gemmkpack_global_desc
,
make_multi_index
(
0
,
0
,
1
));
print_array_v2
(
"do_tansforms 0 0 1: "
,
in_gemmk_gemmn_gemmkpack_coord_step_0_0_1
.
do_transforms_
);
...
...
@@ -70,19 +69,18 @@ void device_dummy_dynamic_transform_v2(InDesc,
printf
(
"offset: %d
\n
"
,
in_gemmk_gemmn_gemmkpack_coord
.
GetOffset
());
printf
(
"
\n
"
);
move_dynamic_tensor_coordinate
_v2
(
in_gemmk_gemmn_gemmkpack_global_desc
,
move_dynamic_tensor_coordinate
(
in_gemmk_gemmn_gemmkpack_global_desc
,
in_gemmk_gemmn_gemmkpack_coord
,
in_gemmk_gemmn_gemmkpack_coord_step_0_0_1
);
}
}
{
auto
in_gemmk_gemmn_gemmkpack_coord
=
make_dynamic_tensor_coordinate
_v2
(
auto
in_gemmk_gemmn_gemmkpack_coord
=
make_dynamic_tensor_coordinate
(
in_gemmk_gemmn_gemmkpack_global_desc
,
make_multi_index
(
0
,
0
,
0
));
const
auto
in_gemmk_gemmn_gemmkpack_coord_step_0_1_0
=
make_dynamic_tensor_coordinate_step_v2
(
in_gemmk_gemmn_gemmkpack_global_desc
,
make_multi_index
(
0
,
1
,
0
));
const
auto
in_gemmk_gemmn_gemmkpack_coord_step_0_1_0
=
make_dynamic_tensor_coordinate_step
(
in_gemmk_gemmn_gemmkpack_global_desc
,
make_multi_index
(
0
,
1
,
0
));
print_array_v2
(
"do_tansforms 0 1 0: "
,
in_gemmk_gemmn_gemmkpack_coord_step_0_1_0
.
do_transforms_
);
...
...
@@ -95,19 +93,18 @@ void device_dummy_dynamic_transform_v2(InDesc,
printf
(
"offset: %d
\n
"
,
in_gemmk_gemmn_gemmkpack_coord
.
GetOffset
());
printf
(
"
\n
"
);
move_dynamic_tensor_coordinate
_v2
(
in_gemmk_gemmn_gemmkpack_global_desc
,
move_dynamic_tensor_coordinate
(
in_gemmk_gemmn_gemmkpack_global_desc
,
in_gemmk_gemmn_gemmkpack_coord
,
in_gemmk_gemmn_gemmkpack_coord_step_0_1_0
);
}
}
{
auto
in_gemmk_gemmn_gemmkpack_coord
=
make_dynamic_tensor_coordinate
_v2
(
auto
in_gemmk_gemmn_gemmkpack_coord
=
make_dynamic_tensor_coordinate
(
in_gemmk_gemmn_gemmkpack_global_desc
,
make_multi_index
(
0
,
0
,
0
));
const
auto
in_gemmk_gemmn_gemmkpack_coord_step_1_0_0
=
make_dynamic_tensor_coordinate_step_v2
(
in_gemmk_gemmn_gemmkpack_global_desc
,
make_multi_index
(
1
,
0
,
0
));
const
auto
in_gemmk_gemmn_gemmkpack_coord_step_1_0_0
=
make_dynamic_tensor_coordinate_step
(
in_gemmk_gemmn_gemmkpack_global_desc
,
make_multi_index
(
1
,
0
,
0
));
print_array_v2
(
"do_tansforms 1 0 0: "
,
in_gemmk_gemmn_gemmkpack_coord_step_1_0_0
.
do_transforms_
);
...
...
@@ -120,7 +117,7 @@ void device_dummy_dynamic_transform_v2(InDesc,
printf
(
"offset: %d
\n
"
,
in_gemmk_gemmn_gemmkpack_coord
.
GetOffset
());
printf
(
"
\n
"
);
move_dynamic_tensor_coordinate
_v2
(
in_gemmk_gemmn_gemmkpack_global_desc
,
move_dynamic_tensor_coordinate
(
in_gemmk_gemmn_gemmkpack_global_desc
,
in_gemmk_gemmn_gemmkpack_coord
,
in_gemmk_gemmn_gemmkpack_coord_step_1_0_0
);
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment