Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
506df423
Commit
506df423
authored
Oct 02, 2020
by
Chao Liu
Browse files
refactor
parent
b6bfde53
Changes
11
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
1008 additions
and
1014 deletions
+1008
-1014
composable_kernel/include/kernel_algorithm/dummy_dynamic_transform_v1.hpp
...l/include/kernel_algorithm/dummy_dynamic_transform_v1.hpp
+4
-4
composable_kernel/include/kernel_algorithm/dummy_dynamic_transform_v2.hpp
...l/include/kernel_algorithm/dummy_dynamic_transform_v2.hpp
+48
-52
composable_kernel/include/tensor_description/dynamic_tensor_coordinate_v1.hpp
...clude/tensor_description/dynamic_tensor_coordinate_v1.hpp
+23
-21
composable_kernel/include/tensor_description/dynamic_tensor_descriptor.hpp
.../include/tensor_description/dynamic_tensor_descriptor.hpp
+495
-224
composable_kernel/include/tensor_description/dynamic_tensor_descriptor_helper.hpp
...e/tensor_description/dynamic_tensor_descriptor_helper.hpp
+34
-17
composable_kernel/include/tensor_description/dynamic_tensor_descriptor_helper_v1.hpp
...ensor_description/dynamic_tensor_descriptor_helper_v1.hpp
+35
-0
composable_kernel/include/tensor_description/dynamic_tensor_descriptor_helper_v2.hpp
...ensor_description/dynamic_tensor_descriptor_helper_v2.hpp
+0
-52
composable_kernel/include/tensor_description/dynamic_tensor_descriptor_v1.hpp
...clude/tensor_description/dynamic_tensor_descriptor_v1.hpp
+341
-0
composable_kernel/include/tensor_description/dynamic_tensor_descriptor_v2.hpp
...clude/tensor_description/dynamic_tensor_descriptor_v2.hpp
+0
-613
driver/include/device_dummy_dynamic_transform_v1.hpp
driver/include/device_dummy_dynamic_transform_v1.hpp
+7
-7
driver/include/device_dummy_dynamic_transform_v2.hpp
driver/include/device_dummy_dynamic_transform_v2.hpp
+21
-24
No files found.
composable_kernel/include/kernel_algorithm/dummy_dynamic_transform_v1.hpp
View file @
506df423
...
@@ -2,9 +2,9 @@
...
@@ -2,9 +2,9 @@
#define CK_DUMMY_DYNAMIC_TRANSFORM_V1_HPP
#define CK_DUMMY_DYNAMIC_TRANSFORM_V1_HPP
#include "common_header.hpp"
#include "common_header.hpp"
#include "dynamic_tensor_descriptor.hpp"
#include "dynamic_tensor_descriptor
_v1
.hpp"
#include "dynamic_tensor_descriptor_helper.hpp"
#include "dynamic_tensor_descriptor_helper
_v1
.hpp"
#include "dynamic_tensor_coordinate.hpp"
#include "dynamic_tensor_coordinate
_v1
.hpp"
namespace
ck
{
namespace
ck
{
...
@@ -565,7 +565,7 @@ struct DummyDynamicTransform_v1
...
@@ -565,7 +565,7 @@ struct DummyDynamicTransform_v1
const
index_t
niter
=
p_wei_global
[
10
];
const
index_t
niter
=
p_wei_global
[
10
];
auto
in_gemmk_gemmn_coord
=
auto
in_gemmk_gemmn_coord
=
make_dynamic_tensor_coordinate
<
2
>
(
in_gemmk_gemmn_global_desc
,
idx
);
make_dynamic_tensor_coordinate
_v1
(
in_gemmk_gemmn_global_desc
,
idx
);
for
(
index_t
iter
=
0
;
iter
<
niter
;
++
iter
)
for
(
index_t
iter
=
0
;
iter
<
niter
;
++
iter
)
{
{
...
...
composable_kernel/include/kernel_algorithm/dummy_dynamic_transform_v2.hpp
View file @
506df423
...
@@ -2,20 +2,20 @@
...
@@ -2,20 +2,20 @@
#define CK_DUMMY_DYNAMIC_TRANSFORM_V2_HPP
#define CK_DUMMY_DYNAMIC_TRANSFORM_V2_HPP
#include "common_header.hpp"
#include "common_header.hpp"
#include "dynamic_tensor_descriptor
_v2
.hpp"
#include "dynamic_tensor_descriptor.hpp"
#include "dynamic_tensor_descriptor_helper
_v2
.hpp"
#include "dynamic_tensor_descriptor_helper.hpp"
namespace
ck
{
namespace
ck
{
template
<
typename
...
Wei
,
typename
...
In
,
typename
...
Out
>
template
<
typename
...
Wei
,
typename
...
In
,
typename
...
Out
>
__host__
__device__
constexpr
auto
map_convolution_into_gemm_fwd_v4r4
(
__host__
__device__
constexpr
auto
const
DynamicTensorDescriptor
_v2
<
Wei
...
>&
wei_k_c_y_x_global_desc
,
map_convolution_into_gemm_fwd_v4r4
(
const
DynamicTensorDescriptor
<
Wei
...
>&
wei_k_c_y_x_global_desc
,
const
DynamicTensorDescriptor
_v2
<
In
...
>&
in_n_c_hi_wi_global_desc
,
const
DynamicTensorDescriptor
<
In
...
>&
in_n_c_hi_wi_global_desc
,
const
DynamicTensorDescriptor
_v2
<
Out
...
>&
out_n_k_ho_wo_global_desc
,
const
DynamicTensorDescriptor
<
Out
...
>&
out_n_k_ho_wo_global_desc
,
const
MultiIndex
<
2
>
conv_strides
,
const
MultiIndex
<
2
>
conv_strides
,
const
MultiIndex
<
2
>
conv_dilations
,
const
MultiIndex
<
2
>
conv_dilations
,
const
MultiIndex
<
2
>
in_left_pads
,
const
MultiIndex
<
2
>
in_left_pads
,
const
MultiIndex
<
2
>
in_right_pads
)
const
MultiIndex
<
2
>
in_right_pads
)
{
{
constexpr
auto
I0
=
Number
<
0
>
{};
constexpr
auto
I0
=
Number
<
0
>
{};
constexpr
auto
I1
=
Number
<
1
>
{};
constexpr
auto
I1
=
Number
<
1
>
{};
...
@@ -47,8 +47,8 @@ __host__ __device__ constexpr auto map_convolution_into_gemm_fwd_v4r4(
...
@@ -47,8 +47,8 @@ __host__ __device__ constexpr auto map_convolution_into_gemm_fwd_v4r4(
const
index_t
InRightPadW
=
in_right_pads
[
I1
];
const
index_t
InRightPadW
=
in_right_pads
[
I1
];
// input tensor
// input tensor
const
auto
in_n_c_hip_wip_global_desc
=
transform_dynamic_tensor_descriptor
_v2
(
const
auto
in_n_c_hip_wip_global_desc
=
transform_dynamic_tensor_descriptor
(
transform_dynamic_tensor_descriptor
_v2
(
transform_dynamic_tensor_descriptor
(
in_n_c_hi_wi_global_desc
,
in_n_c_hi_wi_global_desc
,
make_tuple
(
DynamicPassThrough
{
N
},
make_tuple
(
DynamicPassThrough
{
N
},
DynamicPassThrough
{
C
},
DynamicPassThrough
{
C
},
...
@@ -66,7 +66,7 @@ __host__ __device__ constexpr auto map_convolution_into_gemm_fwd_v4r4(
...
@@ -66,7 +66,7 @@ __host__ __device__ constexpr auto map_convolution_into_gemm_fwd_v4r4(
const
index_t
Hip
=
in_n_c_hip_wip_global_desc
.
GetLength
(
I2
);
const
index_t
Hip
=
in_n_c_hip_wip_global_desc
.
GetLength
(
I2
);
const
index_t
Wip
=
in_n_c_hip_wip_global_desc
.
GetLength
(
I3
);
const
index_t
Wip
=
in_n_c_hip_wip_global_desc
.
GetLength
(
I3
);
const
auto
in_n_c_y_ho_x_wo_global_desc
=
transform_dynamic_tensor_descriptor
_v2
(
const
auto
in_n_c_y_ho_x_wo_global_desc
=
transform_dynamic_tensor_descriptor
(
in_n_c_hip_wip_global_desc
,
in_n_c_hip_wip_global_desc
,
make_tuple
(
make_tuple
(
DynamicPassThrough
{
N
},
DynamicPassThrough
{
N
},
...
@@ -76,7 +76,7 @@ __host__ __device__ constexpr auto map_convolution_into_gemm_fwd_v4r4(
...
@@ -76,7 +76,7 @@ __host__ __device__ constexpr auto map_convolution_into_gemm_fwd_v4r4(
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{},
Sequence
<
3
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{},
Sequence
<
3
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
,
3
>
{},
Sequence
<
4
,
5
>
{}));
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
,
3
>
{},
Sequence
<
4
,
5
>
{}));
const
auto
in_gemmktotal_gemmn_global_desc
=
transform_dynamic_tensor_descriptor
_v2
(
const
auto
in_gemmktotal_gemmn_global_desc
=
transform_dynamic_tensor_descriptor
(
in_n_c_y_ho_x_wo_global_desc
,
in_n_c_y_ho_x_wo_global_desc
,
make_tuple
(
DynamicMerge
<
3
>
{
make_multi_index
(
C
,
Y
,
X
)},
make_tuple
(
DynamicMerge
<
3
>
{
make_multi_index
(
C
,
Y
,
X
)},
DynamicMerge
<
3
>
{
make_multi_index
(
N
,
Ho
,
Wo
)}),
DynamicMerge
<
3
>
{
make_multi_index
(
N
,
Ho
,
Wo
)}),
...
@@ -89,7 +89,7 @@ __host__ __device__ constexpr auto map_convolution_into_gemm_fwd_v4r4(
...
@@ -89,7 +89,7 @@ __host__ __device__ constexpr auto map_convolution_into_gemm_fwd_v4r4(
constexpr
index_t
GemmKPack
=
8
;
constexpr
index_t
GemmKPack
=
8
;
const
index_t
gemmk
=
gemmktotal
/
GemmKPack
;
const
index_t
gemmk
=
gemmktotal
/
GemmKPack
;
const
auto
in_gemmk_gemmn_gemmkpack_global_desc
=
transform_dynamic_tensor_descriptor
_v2
(
const
auto
in_gemmk_gemmn_gemmkpack_global_desc
=
transform_dynamic_tensor_descriptor
(
in_gemmktotal_gemmn_global_desc
,
in_gemmktotal_gemmn_global_desc
,
make_tuple
(
DynamicUnMerge
<
2
>
{
make_multi_index
(
gemmk
,
GemmKPack
)},
make_tuple
(
DynamicUnMerge
<
2
>
{
make_multi_index
(
gemmk
,
GemmKPack
)},
DynamicPassThrough
{
gemmn
}),
DynamicPassThrough
{
gemmn
}),
...
@@ -105,9 +105,9 @@ __host__ __device__ constexpr auto map_convolution_into_gemm_fwd_v4r4(
...
@@ -105,9 +105,9 @@ __host__ __device__ constexpr auto map_convolution_into_gemm_fwd_v4r4(
#if 0
#if 0
template <typename... Wei, typename... In, typename... Out>
template <typename... Wei, typename... In, typename... Out>
__host__ __device__ constexpr auto map_convolution_into_gemm_bwd_v4r1(
__host__ __device__ constexpr auto map_convolution_into_gemm_bwd_v4r1(
const DynamicTensorDescriptor
_v2
<Wei...>& wei_k_c_y_x_global_desc,
const DynamicTensorDescriptor<Wei...>& wei_k_c_y_x_global_desc,
const DynamicTensorDescriptor
_v2
<In...>& in_n_c_hi_wi_global_desc,
const DynamicTensorDescriptor<In...>& in_n_c_hi_wi_global_desc,
const DynamicTensorDescriptor
_v2
<Out...>& out_n_k_ho_wo_global_desc,
const DynamicTensorDescriptor<Out...>& out_n_k_ho_wo_global_desc,
const MultiIndex<2> conv_strides,
const MultiIndex<2> conv_strides,
const MultiIndex<2> conv_dilations,
const MultiIndex<2> conv_dilations,
const MultiIndex<2> in_left_pads,
const MultiIndex<2> in_left_pads,
...
@@ -148,7 +148,7 @@ __host__ __device__ constexpr auto map_convolution_into_gemm_bwd_v4r1(
...
@@ -148,7 +148,7 @@ __host__ __device__ constexpr auto map_convolution_into_gemm_bwd_v4r1(
constexpr bool out_skip_out_of_bound_check = true;
constexpr bool out_skip_out_of_bound_check = true;
#endif
#endif
constexpr
auto
out_n_k_ydot_htilda_xdot_wtilda_global_desc
=
transform_tensor_descriptor
_v2
(
constexpr
auto
out_n_k_ydot_htilda_xdot_wtilda_global_desc
=
transform_tensor_descriptor
(
out_n_k_ho_wo_global_desc
,
out_n_k_ho_wo_global_desc
,
make_tuple
(
PassThrough
{
N
},
make_tuple
(
PassThrough
{
N
},
PassThrough
{
K
},
PassThrough
{
K
},
...
@@ -158,7 +158,7 @@ __host__ __device__ constexpr auto map_convolution_into_gemm_bwd_v4r1(
...
@@ -158,7 +158,7 @@ __host__ __device__ constexpr auto map_convolution_into_gemm_bwd_v4r1(
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
,
3
>
{},
Sequence
<
4
,
5
>
{}));
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
,
3
>
{},
Sequence
<
4
,
5
>
{}));
constexpr
auto
out_n_k_ydot_htildaslice_xdot_wtildaslice_global_desc
=
constexpr
auto
out_n_k_ydot_htildaslice_xdot_wtildaslice_global_desc
=
transform_tensor_descriptor
_v2
(
transform_tensor_descriptor
(
out_n_k_ydot_htilda_xdot_wtilda_global_desc
,
out_n_k_ydot_htilda_xdot_wtilda_global_desc
,
make_tuple
(
PassThrough
{
N
},
make_tuple
(
PassThrough
{
N
},
PassThrough
{
K
},
PassThrough
{
K
},
...
@@ -230,15 +230,14 @@ struct DummyDynamicTransform_v2_1
...
@@ -230,15 +230,14 @@ struct DummyDynamicTransform_v2_1
const
index_t
niter
=
p_wei_global
[
10
];
const
index_t
niter
=
p_wei_global
[
10
];
auto
in_gemmk_gemmn_coord
=
auto
in_gemmk_gemmn_coord
=
make_dynamic_tensor_coordinate
(
in_gemmk_gemmn_global_desc
,
idx
);
make_dynamic_tensor_coordinate_v2
(
in_gemmk_gemmn_global_desc
,
idx
);
const
auto
in_gemmk_gemmn_coord_step
=
make_dynamic_tensor_coordinate_step_v2
(
const
auto
in_gemmk_gemmn_coord_step
=
in_gemmk_gemmn_global_desc
,
make_multi_index
(
1
,
0
));
make_dynamic_tensor_coordinate_step
(
in_gemmk_gemmn_global_desc
,
make_multi_index
(
1
,
0
));
for
(
index_t
iter
=
0
;
iter
<
niter
;
++
iter
)
for
(
index_t
iter
=
0
;
iter
<
niter
;
++
iter
)
{
{
move_dynamic_tensor_coordinate
_v2
(
move_dynamic_tensor_coordinate
(
in_gemmk_gemmn_global_desc
,
in_gemmk_gemmn_coord
,
in_gemmk_gemmn_coord_step
);
in_gemmk_gemmn_global_desc
,
in_gemmk_gemmn_coord
,
in_gemmk_gemmn_coord_step
);
// write
// write
...
@@ -308,7 +307,7 @@ struct DummyDynamicTransform_v2_1
...
@@ -308,7 +307,7 @@ struct DummyDynamicTransform_v2_1
const
index_t
InRightPadW
=
in_right_pads
[
i1
];
const
index_t
InRightPadW
=
in_right_pads
[
i1
];
#if 0
#if 0
const auto in_n_c_hip_wip_global_desc = transform_dynamic_tensor_descriptor
_v2
(
const auto in_n_c_hip_wip_global_desc = transform_dynamic_tensor_descriptor(
move(in_n_c_hi_wi_global_desc),
move(in_n_c_hi_wi_global_desc),
make_tuple(DynamicPassThrough{N},
make_tuple(DynamicPassThrough{N},
DynamicPassThrough{C},
DynamicPassThrough{C},
...
@@ -317,7 +316,7 @@ struct DummyDynamicTransform_v2_1
...
@@ -317,7 +316,7 @@ struct DummyDynamicTransform_v2_1
make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2>{}, Sequence<3>{}),
make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2>{}, Sequence<3>{}),
make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2>{}, Sequence<3>{}));
make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2>{}, Sequence<3>{}));
#elif
0
#elif
0
const
auto
in_n_c_hip_wip_global_desc
=
transform_dynamic_tensor_descriptor
_v2
(
const
auto
in_n_c_hip_wip_global_desc
=
transform_dynamic_tensor_descriptor
(
move
(
in_n_c_hi_wi_global_desc
),
move
(
in_n_c_hi_wi_global_desc
),
make_tuple
(
DynamicPassThrough
{
N
},
make_tuple
(
DynamicPassThrough
{
N
},
DynamicPassThrough
{
C
},
DynamicPassThrough
{
C
},
...
@@ -326,8 +325,8 @@ struct DummyDynamicTransform_v2_1
...
@@ -326,8 +325,8 @@ struct DummyDynamicTransform_v2_1
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{},
Sequence
<
3
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{},
Sequence
<
3
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{},
Sequence
<
3
>
{}));
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{},
Sequence
<
3
>
{}));
#else
#else
const
auto
in_n_c_hip_wip_global_desc
=
transform_dynamic_tensor_descriptor
_v2
(
const
auto
in_n_c_hip_wip_global_desc
=
transform_dynamic_tensor_descriptor
(
transform_dynamic_tensor_descriptor
_v2
(
transform_dynamic_tensor_descriptor
(
move
(
in_n_c_hi_wi_global_desc
),
move
(
in_n_c_hi_wi_global_desc
),
make_tuple
(
DynamicPassThrough
{
N
},
make_tuple
(
DynamicPassThrough
{
N
},
DynamicPassThrough
{
C
},
DynamicPassThrough
{
C
},
...
@@ -351,14 +350,14 @@ struct DummyDynamicTransform_v2_1
...
@@ -351,14 +350,14 @@ struct DummyDynamicTransform_v2_1
#if 1
#if 1
const
index_t
niter
=
p_wei_global
[
10
];
const
index_t
niter
=
p_wei_global
[
10
];
auto
in_coord
=
make_dynamic_tensor_coordinate
_v2
(
in_n_c_hip_wip_global_desc
,
idx
);
auto
in_coord
=
make_dynamic_tensor_coordinate
(
in_n_c_hip_wip_global_desc
,
idx
);
const
auto
in_coord_step
=
make_dynamic_tensor_coordinate_step
_v2
(
const
auto
in_coord_step
=
make_dynamic_tensor_coordinate_step
(
in_n_c_hip_wip_global_desc
,
make_multi_index
(
1
,
0
,
0
,
0
));
in_n_c_hip_wip_global_desc
,
make_multi_index
(
1
,
0
,
0
,
0
));
for
(
index_t
iter
=
0
;
iter
<
niter
;
++
iter
)
for
(
index_t
iter
=
0
;
iter
<
niter
;
++
iter
)
{
{
move_dynamic_tensor_coordinate
_v2
(
in_n_c_hip_wip_global_desc
,
in_coord
,
in_coord_step
);
move_dynamic_tensor_coordinate
(
in_n_c_hip_wip_global_desc
,
in_coord
,
in_coord_step
);
// write
// write
float
value
=
1
;
float
value
=
1
;
...
@@ -381,7 +380,7 @@ struct DummyDynamicTransform_v2_1
...
@@ -381,7 +380,7 @@ struct DummyDynamicTransform_v2_1
}
}
#else
#else
// write
// write
// auto in_coord = make_dynamic_tensor_coordinate
_v2
(in_n_c_hi_wi_global_desc, idx);
// auto in_coord = make_dynamic_tensor_coordinate(in_n_c_hi_wi_global_desc, idx);
p_out_global
[
in_n_c_hip_wip_global_desc
.
CalculateOffset
(
idx
)]
=
1
;
p_out_global
[
in_n_c_hip_wip_global_desc
.
CalculateOffset
(
idx
)]
=
1
;
#endif
#endif
...
@@ -429,26 +428,23 @@ struct DummyDynamicTransform_v2_fwd_v4r4
...
@@ -429,26 +428,23 @@ struct DummyDynamicTransform_v2_fwd_v4r4
const
index_t
niter
=
p_wei_global
[
10
];
const
index_t
niter
=
p_wei_global
[
10
];
auto
in_gemmk_gemmn_gemmkpack_coord
=
auto
in_gemmk_gemmn_gemmkpack_coord
=
make_dynamic_tensor_coordinate
_v2
(
in_gemmk_gemmn_gemmkpack_global_desc
,
idx
);
make_dynamic_tensor_coordinate
(
in_gemmk_gemmn_gemmkpack_global_desc
,
idx
);
const
auto
in_gemmk_gemmn_gemmkpack_coord_step_0_0_1
=
const
auto
in_gemmk_gemmn_gemmkpack_coord_step_0_0_1
=
make_dynamic_tensor_coordinate_step
(
make_dynamic_tensor_coordinate_step_v2
(
in_gemmk_gemmn_gemmkpack_global_desc
,
in_gemmk_gemmn_gemmkpack_global_desc
,
make_multi_index
(
0
,
0
,
1
));
make_multi_index
(
0
,
0
,
1
));
const
auto
in_gemmk_gemmn_gemmkpack_coord_step_0_1_0
=
const
auto
in_gemmk_gemmn_gemmkpack_coord_step_0_1_0
=
make_dynamic_tensor_coordinate_step
(
make_dynamic_tensor_coordinate_step_v2
(
in_gemmk_gemmn_gemmkpack_global_desc
,
in_gemmk_gemmn_gemmkpack_global_desc
,
make_multi_index
(
0
,
1
,
0
));
make_multi_index
(
0
,
1
,
0
));
const
auto
in_gemmk_gemmn_gemmkpack_coord_step_1_0_0
=
const
auto
in_gemmk_gemmn_gemmkpack_coord_step_1_0_0
=
make_dynamic_tensor_coordinate_step
(
make_dynamic_tensor_coordinate_step_v2
(
in_gemmk_gemmn_gemmkpack_global_desc
,
in_gemmk_gemmn_gemmkpack_global_desc
,
make_multi_index
(
1
,
0
,
0
));
make_multi_index
(
1
,
0
,
0
));
// move (0, 0, 1)
// move (0, 0, 1)
for
(
index_t
iter
=
0
;
iter
<
niter
;
++
iter
)
for
(
index_t
iter
=
0
;
iter
<
niter
;
++
iter
)
{
{
move_dynamic_tensor_coordinate
_v2
(
in_gemmk_gemmn_gemmkpack_global_desc
,
move_dynamic_tensor_coordinate
(
in_gemmk_gemmn_gemmkpack_global_desc
,
in_gemmk_gemmn_gemmkpack_coord
,
in_gemmk_gemmn_gemmkpack_coord
,
in_gemmk_gemmn_gemmkpack_coord_step_0_0_1
);
in_gemmk_gemmn_gemmkpack_coord_step_0_0_1
);
// write
// write
float
value
=
1
;
float
value
=
1
;
...
@@ -478,9 +474,9 @@ struct DummyDynamicTransform_v2_fwd_v4r4
...
@@ -478,9 +474,9 @@ struct DummyDynamicTransform_v2_fwd_v4r4
// move (0, 1, 0)
// move (0, 1, 0)
for
(
index_t
iter
=
0
;
iter
<
niter
;
++
iter
)
for
(
index_t
iter
=
0
;
iter
<
niter
;
++
iter
)
{
{
move_dynamic_tensor_coordinate
_v2
(
in_gemmk_gemmn_gemmkpack_global_desc
,
move_dynamic_tensor_coordinate
(
in_gemmk_gemmn_gemmkpack_global_desc
,
in_gemmk_gemmn_gemmkpack_coord
,
in_gemmk_gemmn_gemmkpack_coord
,
in_gemmk_gemmn_gemmkpack_coord_step_0_1_0
);
in_gemmk_gemmn_gemmkpack_coord_step_0_1_0
);
// write
// write
float
value
=
1
;
float
value
=
1
;
...
@@ -510,9 +506,9 @@ struct DummyDynamicTransform_v2_fwd_v4r4
...
@@ -510,9 +506,9 @@ struct DummyDynamicTransform_v2_fwd_v4r4
// move (1, 0, 0)
// move (1, 0, 0)
for
(
index_t
iter
=
0
;
iter
<
niter
;
++
iter
)
for
(
index_t
iter
=
0
;
iter
<
niter
;
++
iter
)
{
{
move_dynamic_tensor_coordinate
_v2
(
in_gemmk_gemmn_gemmkpack_global_desc
,
move_dynamic_tensor_coordinate
(
in_gemmk_gemmn_gemmkpack_global_desc
,
in_gemmk_gemmn_gemmkpack_coord
,
in_gemmk_gemmn_gemmkpack_coord
,
in_gemmk_gemmn_gemmkpack_coord_step_1_0_0
);
in_gemmk_gemmn_gemmkpack_coord_step_1_0_0
);
// write
// write
float
value
=
1
;
float
value
=
1
;
...
...
composable_kernel/include/tensor_description/dynamic_tensor_coordinate.hpp
→
composable_kernel/include/tensor_description/dynamic_tensor_coordinate
_v1
.hpp
View file @
506df423
#ifndef CK_DYNAMIC_TENSOR_COORDINATE_HPP
#ifndef CK_DYNAMIC_TENSOR_COORDINATE_
V1_
HPP
#define CK_DYNAMIC_TENSOR_COORDINATE_HPP
#define CK_DYNAMIC_TENSOR_COORDINATE_
V1_
HPP
#include "common_header.hpp"
#include "common_header.hpp"
#include "dynamic_tensor_descriptor.hpp"
#include "dynamic_tensor_descriptor
_v1
.hpp"
namespace
ck
{
namespace
ck
{
...
@@ -19,20 +19,20 @@ namespace ck {
...
@@ -19,20 +19,20 @@ namespace ck {
// 1. Given step size in each dimension, update itself, or return a new tensor cooridnate, so user
// 1. Given step size in each dimension, update itself, or return a new tensor cooridnate, so user
// can freely move the "point of location" inside the tensor
// can freely move the "point of location" inside the tensor
// wrapper class for DynamicNativeTensorCoordinate and DynamicTransformedTensorCoordinate
// wrapper class for DynamicNativeTensorCoordinate
_v1
and DynamicTransformedTensorCoordinate
_v1
template
<
typename
TensorDesc
>
template
<
typename
TensorDesc
>
struct
DynamicTensorCoordinate
;
struct
DynamicTensorCoordinate
_v1
;
// tensor coordinate for native tensor
// tensor coordinate for native tensor
template
<
typename
TensorDesc
>
template
<
typename
TensorDesc
>
struct
DynamicNativeTensorCoordinate
struct
DynamicNativeTensorCoordinate
_v1
{
{
using
type
=
DynamicNativeTensorCoordinate
;
using
type
=
DynamicNativeTensorCoordinate
_v1
;
using
tensor_desc_type
=
TensorDesc
;
using
tensor_desc_type
=
TensorDesc
;
static
constexpr
index_t
NDim
=
tensor_desc_type
::
GetNumOfDimension
();
static
constexpr
index_t
NDim
=
tensor_desc_type
::
GetNumOfDimension
();
using
Index
=
MultiIndex
<
NDim
>
;
using
Index
=
MultiIndex
<
NDim
>
;
__host__
__device__
explicit
constexpr
DynamicNativeTensorCoordinate
(
__host__
__device__
explicit
constexpr
DynamicNativeTensorCoordinate
_v1
(
const
tensor_desc_type
&
tensor_desc
,
const
Index
&
idx
)
const
tensor_desc_type
&
tensor_desc
,
const
Index
&
idx
)
:
tensor_desc_
{
tensor_desc
},
idx_
{
idx
},
offset_
{
tensor_desc
.
CalculateOffset
(
idx
)}
:
tensor_desc_
{
tensor_desc
},
idx_
{
idx
},
offset_
{
tensor_desc
.
CalculateOffset
(
idx
)}
{
{
...
@@ -118,17 +118,17 @@ struct DynamicNativeTensorCoordinate
...
@@ -118,17 +118,17 @@ struct DynamicNativeTensorCoordinate
// tensor coordinate for transformed tensor
// tensor coordinate for transformed tensor
template
<
typename
TensorDesc
>
template
<
typename
TensorDesc
>
struct
DynamicTransformedTensorCoordinate
struct
DynamicTransformedTensorCoordinate
_v1
{
{
static
constexpr
index_t
NDimUp
=
TensorDesc
::
GetNumOfDimension
();
static
constexpr
index_t
NDimUp
=
TensorDesc
::
GetNumOfDimension
();
using
UpperDesc
=
TensorDesc
;
using
UpperDesc
=
TensorDesc
;
using
UpperCoord
=
DynamicTransformedTensorCoordinate
;
using
UpperCoord
=
DynamicTransformedTensorCoordinate
_v1
;
using
UpperIndex
=
MultiIndex
<
NDimUp
>
;
using
UpperIndex
=
MultiIndex
<
NDimUp
>
;
using
LowerDesc
=
typename
UpperDesc
::
LowerDesc
;
using
LowerDesc
=
typename
UpperDesc
::
LowerDesc
;
using
LowerCoord
=
typename
DynamicTensorCoordinate
<
LowerDesc
>::
type
;
using
LowerCoord
=
typename
DynamicTensorCoordinate
_v1
<
LowerDesc
>::
type
;
__host__
__device__
explicit
constexpr
DynamicTransformedTensorCoordinate
(
__host__
__device__
explicit
constexpr
DynamicTransformedTensorCoordinate
_v1
(
const
UpperDesc
&
tensor_desc_up
,
const
UpperIndex
&
idx_up
)
const
UpperDesc
&
tensor_desc_up
,
const
UpperIndex
&
idx_up
)
:
tensor_desc_up_
{
tensor_desc_up
},
:
tensor_desc_up_
{
tensor_desc_up
},
idx_up_
{
idx_up
},
idx_up_
{
idx_up
},
...
@@ -240,30 +240,32 @@ struct DynamicTransformedTensorCoordinate
...
@@ -240,30 +240,32 @@ struct DynamicTransformedTensorCoordinate
template
<
index_t
NDim
>
template
<
index_t
NDim
>
__host__
__device__
constexpr
auto
__host__
__device__
constexpr
auto
make_dynamic_tensor_coordinate
(
const
DynamicNativeTensorDescriptor
<
NDim
>&
tensor_desc
,
make_dynamic_tensor_coordinate
_v1
(
const
DynamicNativeTensorDescriptor
_v1
<
NDim
>&
tensor_desc
,
const
MultiIndex
<
NDim
>&
idx
)
const
MultiIndex
<
NDim
>&
idx
)
{
{
return
DynamicNativeTensorCoordinate
<
DynamicNativeTensorDescriptor
<
NDim
>>
{
tensor_desc
,
idx
};
return
DynamicNativeTensorCoordinate_v1
<
DynamicNativeTensorDescriptor_v1
<
NDim
>>
{
tensor_desc
,
idx
};
}
}
template
<
index_t
NDim
,
typename
...
Ts
>
template
<
index_t
NDim
,
typename
...
Ts
>
__host__
__device__
constexpr
auto
__host__
__device__
constexpr
auto
make_dynamic_tensor_coordinate
(
const
DynamicTransformedTensorDescriptor
<
Ts
...
>&
tensor_desc
,
make_dynamic_tensor_coordinate
_v1
(
const
DynamicTransformedTensorDescriptor
_v1
<
Ts
...
>&
tensor_desc
,
const
MultiIndex
<
NDim
>&
idx
)
const
MultiIndex
<
NDim
>&
idx
)
{
{
static_assert
(
DynamicTransformedTensorDescriptor
<
Ts
...
>::
GetNumOfDimension
()
==
NDim
,
static_assert
(
DynamicTransformedTensorDescriptor
_v1
<
Ts
...
>::
GetNumOfDimension
()
==
NDim
,
"wrong! inconsistent # of dimensions"
);
"wrong! inconsistent # of dimensions"
);
return
DynamicTransformedTensorCoordinate
<
DynamicTransformedTensorDescriptor
<
Ts
...
>>
{
return
DynamicTransformedTensorCoordinate
_v1
<
DynamicTransformedTensorDescriptor
_v1
<
Ts
...
>>
{
tensor_desc
,
idx
};
tensor_desc
,
idx
};
}
}
template
<
typename
TensorDesc
>
template
<
typename
TensorDesc
>
struct
DynamicTensorCoordinate
struct
DynamicTensorCoordinate
_v1
{
{
static
constexpr
index_t
NDim
=
TensorDesc
::
GetNumOfDimension
();
static
constexpr
index_t
NDim
=
TensorDesc
::
GetNumOfDimension
();
using
type
=
decltype
(
make_dynamic_tensor_coordinate
<
NDim
>
(
TensorDesc
{},
MultiIndex
<
NDim
>
{}));
using
type
=
decltype
(
make_dynamic_tensor_coordinate_v1
<
NDim
>
(
TensorDesc
{},
MultiIndex
<
NDim
>
{}));
};
};
}
// namespace ck
}
// namespace ck
...
...
composable_kernel/include/tensor_description/dynamic_tensor_descriptor.hpp
View file @
506df423
This diff is collapsed.
Click to expand it.
composable_kernel/include/tensor_description/dynamic_tensor_descriptor_helper.hpp
View file @
506df423
...
@@ -6,29 +6,46 @@
...
@@ -6,29 +6,46 @@
namespace
ck
{
namespace
ck
{
template
<
typename
Lengths
,
typename
Strides
>
template
<
index_t
N
>
__host__
__device__
constexpr
auto
make_dynamic_native_tensor_descriptor
(
const
Lengths
&
lengths
,
__host__
__device__
constexpr
auto
const
Strides
&
stride
s
)
make_dynamic_native_tensor_descriptor_packed
(
const
MultiIndex
<
N
>&
length
s
)
{
{
static_assert
(
Lengths
::
Size
()
==
Strides
::
Size
(),
"wrong! Size not the same"
);
return
DynamicNativeTensorDescriptor
<
Lengths
::
Size
()
>
(
lengths
,
strides
);
const
auto
transforms
=
make_tuple
(
DynamicUnMerge
<
N
>
{
lengths
});
constexpr
auto
low_dim_hidden_idss
=
make_tuple
(
Sequence
<
0
>
{});
constexpr
auto
up_dim_hidden_idss
=
make_tuple
(
typename
arithmetic_sequence_gen
<
1
,
N
+
1
,
1
>::
type
{});
constexpr
auto
visible_dim_hidden_ids
=
typename
arithmetic_sequence_gen
<
1
,
N
+
1
,
1
>::
type
{};
const
index_t
element_space_size
=
container_reduce
(
lengths
,
math
::
multiplies
<
index_t
>
{},
index_t
{
1
});
return
DynamicTensorDescriptor
<
decltype
(
transforms
),
decltype
(
low_dim_hidden_idss
),
decltype
(
up_dim_hidden_idss
),
decltype
(
visible_dim_hidden_ids
)
>
{
transforms
,
element_space_size
};
}
}
template
<
typename
LowTensorDescriptor
,
template
<
index_t
N
>
typename
Transforms
,
typename
LowDimensionIds
,
typename
UpDimensionIds
>
__host__
__device__
constexpr
auto
__host__
__device__
constexpr
auto
transform_dynamic_tensor_descriptor
(
const
LowTensorDescriptor
&
low_tensor_desc
,
make_dynamic_native_tensor_descriptor
(
const
MultiIndex
<
N
>&
lengths
,
const
MultiIndex
<
N
>&
strides
)
const
Transforms
&
transforms
,
LowDimensionIds
,
UpDimensionIds
)
{
{
return
DynamicTransformedTensorDescriptor
<
LowTensorDescriptor
,
const
auto
transforms
=
make_tuple
(
DynamicEmbed
<
N
>
{
lengths
,
strides
});
Transforms
,
constexpr
auto
low_dim_hidden_idss
=
make_tuple
(
Sequence
<
0
>
{});
LowDimensionIds
,
constexpr
auto
up_dim_hidden_idss
=
UpDimensionIds
>
{
low_tensor_desc
,
transforms
};
make_tuple
(
typename
arithmetic_sequence_gen
<
1
,
N
+
1
,
1
>::
type
{});
constexpr
auto
visible_dim_hidden_ids
=
typename
arithmetic_sequence_gen
<
1
,
N
+
1
,
1
>::
type
{};
index_t
element_space_size
=
1
;
static_for
<
0
,
N
,
1
>
{}([
&
](
auto
i
)
{
element_space_size
+=
(
lengths
[
i
]
-
1
)
*
strides
[
i
];
});
return
DynamicTensorDescriptor
<
decltype
(
transforms
),
decltype
(
low_dim_hidden_idss
),
decltype
(
up_dim_hidden_idss
),
decltype
(
visible_dim_hidden_ids
)
>
{
transforms
,
element_space_size
};
}
}
}
// namespace ck
}
// namespace ck
...
...
composable_kernel/include/tensor_description/dynamic_tensor_descriptor_helper_v1.hpp
0 → 100644
View file @
506df423
#ifndef CK_DYNAMIC_TENSOR_DESCRIPTOR_HELPER_V1_HPP
#define CK_DYNAMIC_TENSOR_DESCRIPTOR_HELPER_V1_HPP
#include "common_header.hpp"
#include "dynamic_tensor_descriptor_v1.hpp"
namespace
ck
{
template
<
typename
Lengths
,
typename
Strides
>
__host__
__device__
constexpr
auto
make_dynamic_native_tensor_descriptor_v1
(
const
Lengths
&
lengths
,
const
Strides
&
strides
)
{
static_assert
(
Lengths
::
Size
()
==
Strides
::
Size
(),
"wrong! Size not the same"
);
return
DynamicNativeTensorDescriptor_v1
<
Lengths
::
Size
()
>
(
lengths
,
strides
);
}
template
<
typename
LowTensorDescriptor
,
typename
Transforms
,
typename
LowDimensionIds
,
typename
UpDimensionIds
>
__host__
__device__
constexpr
auto
transform_dynamic_tensor_descriptor_v1
(
const
LowTensorDescriptor
&
low_tensor_desc
,
const
Transforms
&
transforms
,
LowDimensionIds
,
UpDimensionIds
)
{
return
DynamicTransformedTensorDescriptor_v1
<
LowTensorDescriptor
,
Transforms
,
LowDimensionIds
,
UpDimensionIds
>
{
low_tensor_desc
,
transforms
};
}
}
// namespace ck
#endif
composable_kernel/include/tensor_description/dynamic_tensor_descriptor_helper_v2.hpp
deleted
100644 → 0
View file @
b6bfde53
#ifndef CK_DYNAMIC_TENSOR_DESCRIPTOR_HELPER_V2_HPP
#define CK_DYNAMIC_TENSOR_DESCRIPTOR_HELPER_V2_HPP
#include "common_header.hpp"
#include "dynamic_tensor_descriptor_v2.hpp"
namespace
ck
{
template
<
index_t
N
>
__host__
__device__
constexpr
auto
make_dynamic_native_tensor_descriptor_packed_v2
(
const
MultiIndex
<
N
>&
lengths
)
{
const
auto
transforms
=
make_tuple
(
DynamicUnMerge
<
N
>
{
lengths
});
constexpr
auto
low_dim_hidden_idss
=
make_tuple
(
Sequence
<
0
>
{});
constexpr
auto
up_dim_hidden_idss
=
make_tuple
(
typename
arithmetic_sequence_gen
<
1
,
N
+
1
,
1
>::
type
{});
constexpr
auto
visible_dim_hidden_ids
=
typename
arithmetic_sequence_gen
<
1
,
N
+
1
,
1
>::
type
{};
const
index_t
element_space_size
=
container_reduce
(
lengths
,
math
::
multiplies
<
index_t
>
{},
index_t
{
1
});
return
DynamicTensorDescriptor_v2
<
decltype
(
transforms
),
decltype
(
low_dim_hidden_idss
),
decltype
(
up_dim_hidden_idss
),
decltype
(
visible_dim_hidden_ids
)
>
{
transforms
,
element_space_size
};
}
template
<
index_t
N
>
__host__
__device__
constexpr
auto
make_dynamic_native_tensor_descriptor_v2
(
const
MultiIndex
<
N
>&
lengths
,
const
MultiIndex
<
N
>&
strides
)
{
const
auto
transforms
=
make_tuple
(
DynamicEmbed
<
N
>
{
lengths
,
strides
});
constexpr
auto
low_dim_hidden_idss
=
make_tuple
(
Sequence
<
0
>
{});
constexpr
auto
up_dim_hidden_idss
=
make_tuple
(
typename
arithmetic_sequence_gen
<
1
,
N
+
1
,
1
>::
type
{});
constexpr
auto
visible_dim_hidden_ids
=
typename
arithmetic_sequence_gen
<
1
,
N
+
1
,
1
>::
type
{};
index_t
element_space_size
=
1
;
static_for
<
0
,
N
,
1
>
{}([
&
](
auto
i
)
{
element_space_size
+=
(
lengths
[
i
]
-
1
)
*
strides
[
i
];
});
return
DynamicTensorDescriptor_v2
<
decltype
(
transforms
),
decltype
(
low_dim_hidden_idss
),
decltype
(
up_dim_hidden_idss
),
decltype
(
visible_dim_hidden_ids
)
>
{
transforms
,
element_space_size
};
}
}
// namespace ck
#endif
composable_kernel/include/tensor_description/dynamic_tensor_descriptor_v1.hpp
0 → 100644
View file @
506df423
#ifndef CK_DYNAMIC_TENSOR_DESCRIPTOR_V1_HPP
#define CK_DYNAMIC_TENSOR_DESCRIPTOR_V1_HPP
#include "common_header.hpp"
#include "dynamic_multi_index_transform.hpp"
namespace
ck
{
template
<
index_t
NDim
>
struct
DynamicNativeTensorDescriptor_v1
{
using
Index
=
MultiIndex
<
NDim
>
;
const
Index
lengths_
;
const
Index
strides_
;
__host__
__device__
explicit
constexpr
DynamicNativeTensorDescriptor_v1
(
const
Index
&
lengths
,
const
Index
&
strides
)
:
lengths_
{
lengths
},
strides_
{
strides
}
{
}
__host__
__device__
explicit
constexpr
DynamicNativeTensorDescriptor_v1
()
:
lengths_
{
make_zero_multi_index
<
NDim
>
()},
strides_
{
make_zero_multi_index
<
NDim
>
()}
{
}
__host__
__device__
static
constexpr
index_t
GetNumOfDimension
()
{
return
NDim
;
}
__host__
__device__
constexpr
auto
GetLengths
()
const
{
return
lengths_
;
}
__host__
__device__
constexpr
auto
GetStrides
()
const
{
return
strides_
;
}
template
<
index_t
IDim
>
__host__
__device__
constexpr
index_t
GetLength
(
Number
<
IDim
>
)
const
{
return
lengths_
[
Number
<
IDim
>
{}];
}
template
<
index_t
IDim
>
__host__
__device__
constexpr
index_t
GetStride
(
Number
<
IDim
>
)
const
{
return
strides_
[
Number
<
IDim
>
{}];
}
__host__
__device__
constexpr
index_t
GetElementSize
()
const
{
return
container_reduce
(
GetLengths
(),
math
::
multiplies
<
index_t
>
{},
index_t
{
1
});
}
__host__
__device__
constexpr
index_t
GetElementSpace
()
const
{
index_t
space
=
1
;
static_for
<
0
,
NDim
,
1
>
{}([
&
](
auto
i
)
{
space
+=
(
GetLength
(
i
)
-
1
)
*
GetStride
(
i
);
});
return
space
;
}
template
<
typename
Idx
>
__host__
__device__
constexpr
index_t
CalculateOffset
(
const
Idx
&
idx
)
const
{
index_t
offset
=
0
;
static_for
<
0
,
NDim
,
1
>
{}([
&
](
auto
i
)
{
offset
+=
idx
[
i
]
*
GetStride
(
i
);
});
return
offset
;
}
template
<
typename
IdxDiff
>
__host__
__device__
constexpr
index_t
CalculateOffsetDiff
(
const
IdxDiff
&
idx_diff
)
const
{
return
CalculateOffset
(
idx_diff
);
}
template
<
typename
Idx
>
__host__
__device__
constexpr
bool
IsUpperIndexValid
(
const
Idx
&
idx
)
const
{
bool
flag
=
true
;
static_for
<
0
,
NDim
,
1
>
{}(
[
&
](
auto
i
)
{
flag
=
flag
&&
idx
[
i
]
>=
0
&&
idx
[
i
]
<
GetLength
(
i
);
});
return
flag
;
}
};
template
<
typename
LowTensorDescriptor
,
// DynamicNativeTensorDescriptor_v1 or
// DynamicTransformedTensorDescriptor_v1
typename
Transforms
,
// Tuple<MultIndexTransforms...>
typename
LowDimensionIds
,
// Tuple<Sequence<...>>
typename
UpDimensionIds
>
// Tuple<Sequence<...>>
struct
DynamicTransformedTensorDescriptor_v1
{
using
LowerDesc
=
LowTensorDescriptor
;
using
UpperDesc
=
DynamicTransformedTensorDescriptor_v1
;
static
constexpr
index_t
NTransform
=
Transforms
::
Size
();
const
LowerDesc
low_tensor_desc_
;
const
Transforms
transforms_
;
__host__
__device__
static
constexpr
index_t
GetNumOfLowerDimension
()
{
return
LowerDesc
::
GetNumOfDimension
();
}
__host__
__device__
static
constexpr
index_t
GetNumOfUpperDimension
()
{
index_t
ndim_up
=
0
;
static_for
<
0
,
NTransform
,
1
>
{}([
&
](
auto
i
)
constexpr
{
constexpr
auto
tmp
=
UpDimensionIds
{}.
At
(
i
);
ndim_up
+=
decltype
(
tmp
)
::
Size
();
});
return
ndim_up
;
}
static
constexpr
index_t
NDimUp
=
GetNumOfUpperDimension
();
static
constexpr
index_t
NDimLow
=
GetNumOfLowerDimension
();
using
UpperIndex
=
MultiIndex
<
NDimUp
>
;
using
LowerIndex
=
MultiIndex
<
NDimLow
>
;
struct
lambda_merge_sequences
{
template
<
typename
...
Xs
>
__host__
__device__
constexpr
auto
operator
()(
Xs
...
xs
)
const
{
return
merge_sequences
(
xs
...);
}
};
struct
lambda_merge_arrays
{
template
<
typename
...
Xs
>
__host__
__device__
constexpr
auto
operator
()(
Xs
...
xs
)
const
{
return
container_cat
(
xs
...);
}
};
__host__
__device__
explicit
constexpr
DynamicTransformedTensorDescriptor_v1
(
const
LowerDesc
&
low_tensor_desc
,
const
Transforms
&
transforms
)
:
low_tensor_desc_
{
low_tensor_desc
},
transforms_
{
transforms
}
{
static_assert
(
NTransform
==
Transforms
::
Size
()
&&
NTransform
==
LowDimensionIds
::
Size
()
&&
NTransform
==
UpDimensionIds
::
Size
(),
"wrong! # of transformations not the same"
);
// sanity check:
// LowDimensionIds should include all low-dimensions,
// UpDimensionIds should include all up-dimensions
using
unsorted_up_dimension_ids
=
decltype
(
unpack
(
lambda_merge_sequences
{},
UpDimensionIds
{}));
using
sorted_up_dimension_ids
=
typename
sequence_sort
<
unsorted_up_dimension_ids
,
math
::
less
<
index_t
>>::
type
;
static_assert
(
sorted_up_dimension_ids
::
Size
()
==
NDimUp
&&
is_valid_sequence_map
<
sorted_up_dimension_ids
>
{},
"wrong! UpDimensionIds is not configured correctly"
);
using
unsorted_low_dimension_ids
=
decltype
(
unpack
(
lambda_merge_sequences
{},
LowDimensionIds
{}));
using
sorted_low_dimension_ids
=
typename
sequence_sort
<
unsorted_low_dimension_ids
,
math
::
less
<
index_t
>>::
type
;
static_assert
(
sorted_low_dimension_ids
::
Size
()
==
NDimLow
&&
is_valid_sequence_map
<
sorted_low_dimension_ids
>
{},
"wrong! LowDimensionIds is not configured correctly"
);
// TODO: sanity check: while a up-dimension could be associated with
// multille
// transformation, a low-dimension should be associated with only one
// transformation
// TODO: sanity-check: GetLowerLengths of each transform should be
// consistent with lengths
// of lower-tensor-descriptor
}
__host__
__device__
explicit
constexpr
DynamicTransformedTensorDescriptor_v1
()
:
low_tensor_desc_
{},
transforms_
{}
{
}
__host__
__device__
static
constexpr
index_t
GetNumOfDimension
()
{
return
GetNumOfUpperDimension
();
}
__host__
__device__
constexpr
auto
GetUpperLengths
()
const
{
// sort upper-dimension-ids
constexpr
auto
unsorted_up_dimension_ids
=
unpack
(
lambda_merge_sequences
{},
UpDimensionIds
{});
using
sort_up_dimension_ids
=
sequence_unique_sort
<
decltype
(
unsorted_up_dimension_ids
),
math
::
less
<
index_t
>
,
math
::
equal
<
index_t
>>
;
constexpr
auto
sorted2unsorted_map
=
typename
sort_up_dimension_ids
::
sorted2unsorted_map
{};
// sort upper-lengths
const
auto
tuple_of_up_lengths
=
transform_tuples
([](
const
auto
&
tran
)
constexpr
{
return
tran
.
GetUpperLengths
();
},
transforms_
);
const
auto
unsorted_up_lengths
=
unpack
(
lambda_merge_arrays
{},
tuple_of_up_lengths
);
const
auto
sorted_up_lengths
=
container_reorder_given_new2old
(
unsorted_up_lengths
,
sorted2unsorted_map
);
return
sorted_up_lengths
;
}
__host__
__device__
constexpr
auto
GetLengths
()
const
{
return
GetUpperLengths
();
}
template
<
index_t
IDim
>
__host__
__device__
constexpr
index_t
GetLength
(
Number
<
IDim
>
)
const
{
return
GetLengths
()[
Number
<
IDim
>
{}];
}
__host__
__device__
constexpr
index_t
GetElementSize
()
const
{
return
container_reduce
(
GetLengths
(),
math
::
multiplies
<
index_t
>
{},
index_t
{
1
});
}
__host__
__device__
constexpr
index_t
GetElementSpace
()
const
{
return
low_tensor_desc_
.
GetElementSpace
();
}
__host__
__device__
constexpr
auto
GetLowerTensorDescriptor
()
const
{
return
low_tensor_desc_
;
}
template
<
typename
LowIdx
,
typename
UpIdx
>
__host__
__device__
void
CalculateLowerIndex
(
LowIdx
&
idx_low
,
const
UpIdx
&
idx_up
)
const
{
static_for
<
0
,
NTransform
,
1
>
{}([
&
](
auto
itran
)
constexpr
{
const
auto
tran
=
transforms_
.
At
(
itran
);
const
auto
idx_up_part
=
pick_container_element
(
idx_up
,
UpDimensionIds
{}.
At
(
itran
));
auto
idx_low_part
=
pick_container_element
(
idx_low
,
LowDimensionIds
{}.
At
(
itran
));
tran
.
CalculateLowerIndex
(
idx_low_part
,
idx_up_part
);
});
}
template
<
typename
LowIdxDiff
,
typename
UpIdxDiff
,
typename
LowIdx
,
typename
UpIdx
>
__host__
__device__
void
CalculateLowerIndexDiff
(
LowIdxDiff
&
idx_low_diff
,
const
UpIdxDiff
&
idx_up_diff
,
const
LowIdx
&
idx_low_old
,
const
UpIdx
&
idx_up_old
)
const
{
static_for
<
0
,
NTransform
,
1
>
{}([
&
](
auto
itran
)
{
const
auto
tran
=
transforms_
.
At
(
itran
);
const
auto
idx_up_diff_part
=
pick_container_element
(
idx_up_diff
,
UpDimensionIds
{}.
At
(
itran
));
const
auto
idx_up_old_part
=
pick_container_element
(
idx_up_old
,
UpDimensionIds
{}.
At
(
itran
));
const
auto
idx_low_old_part
=
pick_container_element
(
idx_low_old
,
LowDimensionIds
{}.
At
(
itran
));
auto
idx_low_diff_part
=
pick_container_element
(
idx_low_diff
,
LowDimensionIds
{}.
At
(
itran
));
tran
.
CalculateLowerIndexDiff
(
idx_low_diff_part
,
idx_up_diff_part
,
idx_low_old_part
,
idx_up_old_part
);
});
}
template
<
typename
UpIdx
>
__host__
__device__
constexpr
auto
CalculateLowerIndex
(
const
UpIdx
&
idx_up
)
const
{
LowerIndex
idx_low
;
CalculateLowerIndex
(
idx_low
,
idx_up
);
return
idx_low
;
}
template
<
typename
UpIdxDiff
,
typename
LowIdx
,
typename
UpIdx
>
__host__
__device__
constexpr
auto
CalculateLowerIndexDiff
(
const
UpIdxDiff
&
idx_up_diff
,
const
LowIdx
&
idx_low_old
,
const
UpIdx
&
idx_up_old
)
const
{
LowerIndex
idx_low_diff
;
CalculateLowerIndexDiff
(
idx_low_diff
,
idx_up_diff
,
idx_low_old
,
idx_up_old
);
return
idx_low_diff
;
}
__host__
__device__
constexpr
index_t
CalculateOffset
(
const
UpperIndex
&
idx_up
)
const
{
return
low_tensor_desc_
.
CalculateOffset
(
CalculateLowerIndex
(
idx_up
));
}
__host__
__device__
constexpr
bool
IsUpperIndexValid
(
const
UpperIndex
&
idx_up
)
const
{
bool
flag
=
true
;
static_for
<
0
,
NDimUp
,
1
>
{}(
[
&
](
auto
i
)
{
flag
=
flag
&&
idx_up
[
i
]
>=
0
&&
idx_up
[
i
]
<
GetLength
(
i
);
});
return
flag
;
}
__host__
__device__
constexpr
bool
IsValidUpperIndexMappedToValidLowerIndex
(
const
UpperIndex
&
idx_up
)
const
{
bool
flag
=
true
;
static_for
<
0
,
NTransform
,
1
>
{}([
&
](
auto
itran
)
{
const
auto
tran
=
Transforms
{}.
At
(
itran
);
// check a indtransformation if it does not always has a valid mapping
constexpr
bool
is_valid_up_always_mapped_to_valid_low
=
decltype
(
tran
)
::
IsValidUpperIndexAlwaysMappedToValidLowerIndex
();
if
constexpr
(
!
is_valid_up_always_mapped_to_valid_low
)
{
const
auto
up_dims_part
=
UpDimensionIds
{}.
At
(
itran
);
const
auto
idx_up_part
=
pick_container_element
(
idx_up
,
up_dims_part
);
flag
=
flag
&&
tran
.
IsValidUpperIndexMappedToValidLowerIndex
(
idx_up_part
);
}
});
return
flag
;
}
};
}
// namespace ck
#endif
composable_kernel/include/tensor_description/dynamic_tensor_descriptor_v2.hpp
deleted
100644 → 0
View file @
b6bfde53
This diff is collapsed.
Click to expand it.
driver/include/device_dummy_dynamic_transform_v1.hpp
View file @
506df423
...
@@ -28,11 +28,11 @@ void device_dummy_dynamic_transform_v1(InDesc,
...
@@ -28,11 +28,11 @@ void device_dummy_dynamic_transform_v1(InDesc,
using
TDevice
=
typename
conditional
<
is_same
<
half_float
::
half
,
T
>::
value
,
half_t
,
T
>::
type
;
using
TDevice
=
typename
conditional
<
is_same
<
half_float
::
half
,
T
>::
value
,
half_t
,
T
>::
type
;
const
auto
in_nchw_desc
=
make_dynamic_native_tensor_descriptor
(
const
auto
in_nchw_desc
=
make_dynamic_native_tensor_descriptor
_v1
(
to_multi_index
(
InDesc
::
GetLengths
()),
to_multi_index
(
InDesc
::
GetStrides
()));
to_multi_index
(
InDesc
::
GetLengths
()),
to_multi_index
(
InDesc
::
GetStrides
()));
const
auto
wei_kcyx_desc
=
make_dynamic_native_tensor_descriptor
(
const
auto
wei_kcyx_desc
=
make_dynamic_native_tensor_descriptor
_v1
(
to_multi_index
(
WeiDesc
::
GetLengths
()),
to_multi_index
(
WeiDesc
::
GetStrides
()));
to_multi_index
(
WeiDesc
::
GetLengths
()),
to_multi_index
(
WeiDesc
::
GetStrides
()));
const
auto
out_nkhw_desc
=
make_dynamic_native_tensor_descriptor
(
const
auto
out_nkhw_desc
=
make_dynamic_native_tensor_descriptor
_v1
(
to_multi_index
(
OutDesc
::
GetLengths
()),
to_multi_index
(
OutDesc
::
GetStrides
()));
to_multi_index
(
OutDesc
::
GetLengths
()),
to_multi_index
(
OutDesc
::
GetStrides
()));
const
auto
conv_strides
=
to_multi_index
(
ConvStrides
{});
const
auto
conv_strides
=
to_multi_index
(
ConvStrides
{});
...
@@ -52,7 +52,7 @@ void device_dummy_dynamic_transform_v1(InDesc,
...
@@ -52,7 +52,7 @@ void device_dummy_dynamic_transform_v1(InDesc,
const
auto
in_gemmk_gemmn_global_desc
=
tensor_descs
.
At
(
Number
<
0
>
{});
const
auto
in_gemmk_gemmn_global_desc
=
tensor_descs
.
At
(
Number
<
0
>
{});
auto
in_gemmk_gemmn_coord
=
auto
in_gemmk_gemmn_coord
=
make_dynamic_tensor_coordinate
<
2
>
(
in_gemmk_gemmn_global_desc
,
make_multi_index
(
0
,
0
));
make_dynamic_tensor_coordinate
(
in_gemmk_gemmn_global_desc
,
make_multi_index
(
0
,
0
));
for
(
index_t
iter
=
0
;
iter
<
10
;
++
iter
)
for
(
index_t
iter
=
0
;
iter
<
10
;
++
iter
)
{
{
...
@@ -112,9 +112,9 @@ void device_dummy_dynamic_transform_v1(InDesc,
...
@@ -112,9 +112,9 @@ void device_dummy_dynamic_transform_v1(InDesc,
index_t
*
const
,
index_t
*
const
,
float
*
const
,
float
*
const
,
float
*
const
,
float
*
const
,
const
DynamicNativeTensorDescriptor
<
4
>
,
const
DynamicNativeTensorDescriptor
_v1
<
4
>
,
const
DynamicNativeTensorDescriptor
<
4
>
,
const
DynamicNativeTensorDescriptor
_v1
<
4
>
,
const
DynamicNativeTensorDescriptor
<
4
>
,
const
DynamicNativeTensorDescriptor
_v1
<
4
>
,
const
MultiIndex
<
2
>
,
const
MultiIndex
<
2
>
,
const
MultiIndex
<
2
>
,
const
MultiIndex
<
2
>
,
const
MultiIndex
<
2
>
,
const
MultiIndex
<
2
>
,
...
...
driver/include/device_dummy_dynamic_transform_v2.hpp
View file @
506df423
...
@@ -28,11 +28,11 @@ void device_dummy_dynamic_transform_v2(InDesc,
...
@@ -28,11 +28,11 @@ void device_dummy_dynamic_transform_v2(InDesc,
using
TDevice
=
typename
conditional
<
is_same
<
half_float
::
half
,
T
>::
value
,
half_t
,
T
>::
type
;
using
TDevice
=
typename
conditional
<
is_same
<
half_float
::
half
,
T
>::
value
,
half_t
,
T
>::
type
;
const
auto
in_nchw_desc
=
make_dynamic_native_tensor_descriptor
_v2
<
4
>
(
const
auto
in_nchw_desc
=
make_dynamic_native_tensor_descriptor
<
4
>
(
to_multi_index
(
InDesc
::
GetLengths
()),
to_multi_index
(
InDesc
::
GetStrides
()));
to_multi_index
(
InDesc
::
GetLengths
()),
to_multi_index
(
InDesc
::
GetStrides
()));
const
auto
wei_kcyx_desc
=
make_dynamic_native_tensor_descriptor
_v2
<
4
>
(
const
auto
wei_kcyx_desc
=
make_dynamic_native_tensor_descriptor
<
4
>
(
to_multi_index
(
WeiDesc
::
GetLengths
()),
to_multi_index
(
WeiDesc
::
GetStrides
()));
to_multi_index
(
WeiDesc
::
GetLengths
()),
to_multi_index
(
WeiDesc
::
GetStrides
()));
const
auto
out_nkhw_desc
=
make_dynamic_native_tensor_descriptor
_v2
<
4
>
(
const
auto
out_nkhw_desc
=
make_dynamic_native_tensor_descriptor
<
4
>
(
to_multi_index
(
OutDesc
::
GetLengths
()),
to_multi_index
(
OutDesc
::
GetStrides
()));
to_multi_index
(
OutDesc
::
GetLengths
()),
to_multi_index
(
OutDesc
::
GetStrides
()));
const
auto
conv_strides
=
to_multi_index
(
ConvStrides
{});
const
auto
conv_strides
=
to_multi_index
(
ConvStrides
{});
...
@@ -52,12 +52,11 @@ void device_dummy_dynamic_transform_v2(InDesc,
...
@@ -52,12 +52,11 @@ void device_dummy_dynamic_transform_v2(InDesc,
// test on cpu
// test on cpu
{
{
auto
in_gemmk_gemmn_gemmkpack_coord
=
make_dynamic_tensor_coordinate
_v2
(
auto
in_gemmk_gemmn_gemmkpack_coord
=
make_dynamic_tensor_coordinate
(
in_gemmk_gemmn_gemmkpack_global_desc
,
make_multi_index
(
0
,
0
,
0
));
in_gemmk_gemmn_gemmkpack_global_desc
,
make_multi_index
(
0
,
0
,
0
));
const
auto
in_gemmk_gemmn_gemmkpack_coord_step_0_0_1
=
const
auto
in_gemmk_gemmn_gemmkpack_coord_step_0_0_1
=
make_dynamic_tensor_coordinate_step
(
make_dynamic_tensor_coordinate_step_v2
(
in_gemmk_gemmn_gemmkpack_global_desc
,
in_gemmk_gemmn_gemmkpack_global_desc
,
make_multi_index
(
0
,
0
,
1
));
make_multi_index
(
0
,
0
,
1
));
print_array_v2
(
"do_tansforms 0 0 1: "
,
print_array_v2
(
"do_tansforms 0 0 1: "
,
in_gemmk_gemmn_gemmkpack_coord_step_0_0_1
.
do_transforms_
);
in_gemmk_gemmn_gemmkpack_coord_step_0_0_1
.
do_transforms_
);
...
@@ -70,19 +69,18 @@ void device_dummy_dynamic_transform_v2(InDesc,
...
@@ -70,19 +69,18 @@ void device_dummy_dynamic_transform_v2(InDesc,
printf
(
"offset: %d
\n
"
,
in_gemmk_gemmn_gemmkpack_coord
.
GetOffset
());
printf
(
"offset: %d
\n
"
,
in_gemmk_gemmn_gemmkpack_coord
.
GetOffset
());
printf
(
"
\n
"
);
printf
(
"
\n
"
);
move_dynamic_tensor_coordinate
_v2
(
in_gemmk_gemmn_gemmkpack_global_desc
,
move_dynamic_tensor_coordinate
(
in_gemmk_gemmn_gemmkpack_global_desc
,
in_gemmk_gemmn_gemmkpack_coord
,
in_gemmk_gemmn_gemmkpack_coord
,
in_gemmk_gemmn_gemmkpack_coord_step_0_0_1
);
in_gemmk_gemmn_gemmkpack_coord_step_0_0_1
);
}
}
}
}
{
{
auto
in_gemmk_gemmn_gemmkpack_coord
=
make_dynamic_tensor_coordinate
_v2
(
auto
in_gemmk_gemmn_gemmkpack_coord
=
make_dynamic_tensor_coordinate
(
in_gemmk_gemmn_gemmkpack_global_desc
,
make_multi_index
(
0
,
0
,
0
));
in_gemmk_gemmn_gemmkpack_global_desc
,
make_multi_index
(
0
,
0
,
0
));
const
auto
in_gemmk_gemmn_gemmkpack_coord_step_0_1_0
=
const
auto
in_gemmk_gemmn_gemmkpack_coord_step_0_1_0
=
make_dynamic_tensor_coordinate_step
(
make_dynamic_tensor_coordinate_step_v2
(
in_gemmk_gemmn_gemmkpack_global_desc
,
in_gemmk_gemmn_gemmkpack_global_desc
,
make_multi_index
(
0
,
1
,
0
));
make_multi_index
(
0
,
1
,
0
));
print_array_v2
(
"do_tansforms 0 1 0: "
,
print_array_v2
(
"do_tansforms 0 1 0: "
,
in_gemmk_gemmn_gemmkpack_coord_step_0_1_0
.
do_transforms_
);
in_gemmk_gemmn_gemmkpack_coord_step_0_1_0
.
do_transforms_
);
...
@@ -95,19 +93,18 @@ void device_dummy_dynamic_transform_v2(InDesc,
...
@@ -95,19 +93,18 @@ void device_dummy_dynamic_transform_v2(InDesc,
printf
(
"offset: %d
\n
"
,
in_gemmk_gemmn_gemmkpack_coord
.
GetOffset
());
printf
(
"offset: %d
\n
"
,
in_gemmk_gemmn_gemmkpack_coord
.
GetOffset
());
printf
(
"
\n
"
);
printf
(
"
\n
"
);
move_dynamic_tensor_coordinate
_v2
(
in_gemmk_gemmn_gemmkpack_global_desc
,
move_dynamic_tensor_coordinate
(
in_gemmk_gemmn_gemmkpack_global_desc
,
in_gemmk_gemmn_gemmkpack_coord
,
in_gemmk_gemmn_gemmkpack_coord
,
in_gemmk_gemmn_gemmkpack_coord_step_0_1_0
);
in_gemmk_gemmn_gemmkpack_coord_step_0_1_0
);
}
}
}
}
{
{
auto
in_gemmk_gemmn_gemmkpack_coord
=
make_dynamic_tensor_coordinate
_v2
(
auto
in_gemmk_gemmn_gemmkpack_coord
=
make_dynamic_tensor_coordinate
(
in_gemmk_gemmn_gemmkpack_global_desc
,
make_multi_index
(
0
,
0
,
0
));
in_gemmk_gemmn_gemmkpack_global_desc
,
make_multi_index
(
0
,
0
,
0
));
const
auto
in_gemmk_gemmn_gemmkpack_coord_step_1_0_0
=
const
auto
in_gemmk_gemmn_gemmkpack_coord_step_1_0_0
=
make_dynamic_tensor_coordinate_step
(
make_dynamic_tensor_coordinate_step_v2
(
in_gemmk_gemmn_gemmkpack_global_desc
,
in_gemmk_gemmn_gemmkpack_global_desc
,
make_multi_index
(
1
,
0
,
0
));
make_multi_index
(
1
,
0
,
0
));
print_array_v2
(
"do_tansforms 1 0 0: "
,
print_array_v2
(
"do_tansforms 1 0 0: "
,
in_gemmk_gemmn_gemmkpack_coord_step_1_0_0
.
do_transforms_
);
in_gemmk_gemmn_gemmkpack_coord_step_1_0_0
.
do_transforms_
);
...
@@ -120,9 +117,9 @@ void device_dummy_dynamic_transform_v2(InDesc,
...
@@ -120,9 +117,9 @@ void device_dummy_dynamic_transform_v2(InDesc,
printf
(
"offset: %d
\n
"
,
in_gemmk_gemmn_gemmkpack_coord
.
GetOffset
());
printf
(
"offset: %d
\n
"
,
in_gemmk_gemmn_gemmkpack_coord
.
GetOffset
());
printf
(
"
\n
"
);
printf
(
"
\n
"
);
move_dynamic_tensor_coordinate
_v2
(
in_gemmk_gemmn_gemmkpack_global_desc
,
move_dynamic_tensor_coordinate
(
in_gemmk_gemmn_gemmkpack_global_desc
,
in_gemmk_gemmn_gemmkpack_coord
,
in_gemmk_gemmn_gemmkpack_coord
,
in_gemmk_gemmn_gemmkpack_coord_step_1_0_0
);
in_gemmk_gemmn_gemmkpack_coord_step_1_0_0
);
}
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment