Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
2ffa2708
Commit
2ffa2708
authored
Sep 06, 2020
by
Chao Liu
Browse files
adding dynamic tensor descriptor
parent
c1eaba27
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
555 additions
and
56 deletions
+555
-56
composable_kernel/include/kernel_algorithm/dummy_dynamic_transform.hpp
...rnel/include/kernel_algorithm/dummy_dynamic_transform.hpp
+5
-1
composable_kernel/include/tensor_description/dynamic_multi_index_transform.hpp
...lude/tensor_description/dynamic_multi_index_transform.hpp
+72
-54
composable_kernel/include/tensor_description/dynamic_tensor_descriptor_helper_v2.hpp
...ensor_description/dynamic_tensor_descriptor_helper_v2.hpp
+50
-0
composable_kernel/include/tensor_description/dynamic_tensor_descriptor_v2.hpp
...clude/tensor_description/dynamic_tensor_descriptor_v2.hpp
+421
-0
composable_kernel/include/tensor_description/multi_index_transform.hpp
...rnel/include/tensor_description/multi_index_transform.hpp
+6
-0
driver/include/device_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw.hpp
.../device_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw.hpp
+1
-1
No files found.
composable_kernel/include/kernel_algorithm/dummy_dynamic_transform.hpp
View file @
2ffa2708
...
...
@@ -553,7 +553,7 @@ struct DummyDynamicTransform
auto
in_gemmk_gemmn_coord
=
make_dynamic_tensor_coordinate
(
in_gemmk_gemmn_global_desc
,
idx
);
for
(
index_t
iter
=
0
;
iter
<
100
;
++
iter
)
for
(
index_t
iter
=
0
;
iter
<
niter
;
++
iter
)
{
constexpr
auto
gemmk1_gemmn0
=
MultiIndex
<
2
>
{
1
,
0
};
...
...
@@ -574,7 +574,11 @@ struct DummyDynamicTransform
1
,
p_out_global
,
in_gemmk_gemmn_coord
.
GetOffset
(),
#if 0
in_gemmk_gemmn_coord.IsOffsetValidAssumingUpperIndexIsValid(),
#else
true
,
#endif
in_gemmk_gemmn_global_desc
.
GetElementSpace
());
}
}
...
...
composable_kernel/include/tensor_description/dynamic_multi_index_transform.hpp
View file @
2ffa2708
...
...
@@ -35,8 +35,8 @@ struct DynamicPassThrough
}
template
<
typename
LowIdxDiff
,
typename
UpIdxDiff
,
typename
LowIdx
,
typename
UpIdx
>
__host__
__device__
static
void
CalculateLowerIndexDiff
(
LowIdxDiff
&
idx_
low_
diff
,
const
UpIdxDiff
&
idx_
up_
diff
,
__host__
__device__
static
void
CalculateLowerIndexDiff
(
LowIdxDiff
&
idx_diff
_low
,
const
UpIdxDiff
&
idx_diff
_up
,
const
LowIdx
&
/* idx_low_old */
,
const
UpIdx
&
/* idx_up_old */
)
{
...
...
@@ -44,7 +44,7 @@ struct DynamicPassThrough
UpIdx
::
Size
()
==
1
,
"wrong! inconsistent # of dimension"
);
idx_
low_
diff
(
0
)
=
idx_
up_
diff
[
0
];
idx_diff
_low
(
0
)
=
idx_diff
_up
[
0
];
}
__host__
__device__
static
constexpr
bool
IsLinearTransform
()
{
return
true
;
}
...
...
@@ -86,7 +86,8 @@ struct DynamicLeftPad
__host__
__device__
constexpr
auto
GetUpperLengths
()
const
{
return
UpperIndex
{
up_length_
};
}
template
<
typename
LowIdx
,
typename
UpIdx
>
__host__
__device__
void
CalculateLowerIndex
(
LowIdx
&
idx_low
,
const
UpIdx
&
idx_up
)
const
__host__
__device__
constexpr
void
CalculateLowerIndex
(
LowIdx
&
idx_low
,
const
UpIdx
&
idx_up
)
const
{
static_assert
(
LowIdx
::
Size
()
==
1
&&
UpIdx
::
Size
()
==
1
,
"wrong! inconsistent # of dimension"
);
...
...
@@ -95,8 +96,8 @@ struct DynamicLeftPad
}
template
<
typename
LowIdxDiff
,
typename
UpIdxDiff
,
typename
LowIdx
,
typename
UpIdx
>
__host__
__device__
static
void
CalculateLowerIndexDiff
(
LowIdxDiff
&
idx_
low_
diff
,
const
UpIdxDiff
&
idx_
up_
diff
,
__host__
__device__
static
void
CalculateLowerIndexDiff
(
LowIdxDiff
&
idx_diff
_low
,
const
UpIdxDiff
&
idx_diff
_up
,
const
LowIdx
&
/* idx_low_old */
,
const
UpIdx
&
/* idx_up_old */
)
{
...
...
@@ -104,7 +105,7 @@ struct DynamicLeftPad
UpIdx
::
Size
()
==
1
,
"wrong! inconsistent # of dimension"
);
idx_
low_
diff
(
0
)
=
idx_
up_
diff
[
0
];
idx_diff
_low
(
0
)
=
idx_diff
_up
[
0
];
}
__host__
__device__
static
constexpr
bool
IsLinearTransform
()
{
return
true
;
}
...
...
@@ -159,8 +160,8 @@ struct DynamicRightPad
}
template
<
typename
LowIdxDiff
,
typename
UpIdxDiff
,
typename
LowIdx
,
typename
UpIdx
>
__host__
__device__
static
void
CalculateLowerIndexDiff
(
LowIdxDiff
&
idx_
low_
diff
,
const
UpIdxDiff
&
idx_
up_
diff
,
__host__
__device__
static
void
CalculateLowerIndexDiff
(
LowIdxDiff
&
idx_diff
_low
,
const
UpIdxDiff
&
idx_diff
_up
,
const
LowIdx
&
/* idx_low_old */
,
const
UpIdx
&
/* idx_up_old */
)
{
...
...
@@ -168,7 +169,7 @@ struct DynamicRightPad
UpIdx
::
Size
()
==
1
,
"wrong! inconsistent # of dimension"
);
idx_
low_
diff
(
0
)
=
idx_
up_
diff
[
0
];
idx_diff
_low
(
0
)
=
idx_diff
_up
[
0
];
}
__host__
__device__
static
constexpr
bool
IsLinearTransform
()
{
return
true
;
}
...
...
@@ -218,7 +219,8 @@ struct DynamicEmbed
__host__
__device__
constexpr
auto
GetUpperLengths
()
const
{
return
up_lengths_
;
}
template
<
typename
LowIdx
,
typename
UpIdx
>
__host__
__device__
void
CalculateLowerIndex
(
LowIdx
&
idx_low
,
const
UpIdx
&
idx_up
)
const
__host__
__device__
constexpr
void
CalculateLowerIndex
(
LowIdx
&
idx_low
,
const
UpIdx
&
idx_up
)
const
{
static_assert
(
LowIdx
::
Size
()
==
1
&&
UpIdx
::
Size
()
==
NDimUp
,
"wrong! inconsistent # of dimension"
);
...
...
@@ -233,8 +235,8 @@ struct DynamicEmbed
}
template
<
typename
LowIdxDiff
,
typename
UpIdxDiff
,
typename
LowIdx
,
typename
UpIdx
>
__host__
__device__
void
CalculateLowerIndexDiff
(
LowIdxDiff
&
idx_
low_
diff
,
const
UpIdxDiff
&
idx_
up_
diff
,
__host__
__device__
constexpr
void
CalculateLowerIndexDiff
(
LowIdxDiff
&
idx_diff
_low
,
const
UpIdxDiff
&
idx_diff
_up
,
const
LowIdx
&
/* idx_low_old */
,
const
UpIdx
&
/* idx_up_old */
)
const
{
...
...
@@ -242,12 +244,12 @@ struct DynamicEmbed
LowIdx
::
Size
()
==
1
&&
UpIdx
::
Size
()
==
NDimUp
,
"wrong! inconsistent # of dimension"
);
idx_
low_
diff
(
0
)
=
0
;
idx_diff
_low
(
0
)
=
0
;
#pragma unroll
for
(
index_t
i
=
0
;
i
<
NDimUp
;
++
i
)
{
idx_
low_
diff
(
0
)
+=
idx_
up_
diff
[
i
]
*
coefficients_
[
i
];
idx_diff
_low
(
0
)
+=
idx_diff
_up
[
i
]
*
coefficients_
[
i
];
}
}
...
...
@@ -299,7 +301,8 @@ struct DynamicMerge
__host__
__device__
constexpr
auto
GetUpperLengths
()
const
{
return
UpperIndex
{
up_length_
};
}
template
<
typename
LowIdx
,
typename
UpIdx
>
__host__
__device__
void
CalculateLowerIndex
(
LowIdx
&
idx_low
,
const
UpIdx
&
idx_up
)
const
__host__
__device__
constexpr
void
CalculateLowerIndex
(
LowIdx
&
idx_low
,
const
UpIdx
&
idx_up
)
const
{
static_assert
(
LowIdx
::
Size
()
==
NDimLow
&&
UpIdx
::
Size
()
==
1
,
"wrong! inconsistent # of dimension"
);
...
...
@@ -316,13 +319,13 @@ struct DynamicMerge
idx_low
(
NDimLow
-
1
)
=
tmp
;
}
// idx_
low_
diff depends on idx_low_old, so idx_low need to be up-to-date
// If idx_
up_
diff is known at compile-time, many calculations can be optimized
// idx_diff
_low
depends on idx_low_old, so idx_low need to be up-to-date
// If idx_diff
_up
is known at compile-time, many calculations can be optimized
// away by compiler
// This function assume idx_low_old is not out-of-bound
template
<
typename
LowIdxDiff
,
typename
UpIdxDiff
,
typename
LowIdx
,
typename
UpIdx
>
__host__
__device__
void
CalculateLowerIndexDiff
(
LowIdxDiff
&
idx_
low_
diff
,
const
UpIdxDiff
&
idx_
up_
diff
,
__host__
__device__
constexpr
void
CalculateLowerIndexDiff
(
LowIdxDiff
&
idx_diff
_low
,
const
UpIdxDiff
&
idx_diff
_up
,
const
LowIdx
&
idx_low_old
,
const
UpIdx
&
/* idx_up_old */
)
const
{
...
...
@@ -330,22 +333,35 @@ struct DynamicMerge
LowIdx
::
Size
()
==
NDimLow
&&
UpIdx
::
Size
()
==
1
,
"wrong! inconsistent # of dimension"
);
// CalculateLowerIndex(idx_low_diff_const) has multiple integer divisions.
#if 1
// I only want to do this check, if idx_diff_up is know at compile-time
if
(
idx_diff_up
[
0
]
==
0
)
{
#pragma unroll
for
(
index_t
i
=
0
;
i
<
NDimLow
;
++
i
)
{
idx_diff_low
(
i
)
=
0
;
}
return
;
}
#endif
// CalculateLowerIndex(idx_diff_low_const) has multiple integer divisions.
// However,
// 1) If idx_
up_
diff is known at compile-time, then idx_
low_
diff_const
// 1) If idx_diff
_up
is known at compile-time, then idx_diff
_low
_const
// can be calculated at compile-time.
// 2) If idx_
up_
diff is not known at compile-time, but its value
// 2) If idx_diff
_up
is not known at compile-time, but its value
// doesn't change during the whole kernel execution, then
// idx_
low_
diff_const also
// idx_diff
_low
_const also
// doesn't change during the whole kernel execution. Compiler generated
// ISA should
// only caclculate idx_
low_
diff_const once and save it durinng the whole
// only caclculate idx_diff
_low
_const once and save it durinng the whole
// kernel execution
// If neither 1) nor 2) is satisfied, then the calculation will also be
// computed at
// run-time each time this function is called, and can be very expensive.
LowerIndex
idx_
low_
diff_const
;
CalculateLowerIndex
(
idx_
low_
diff_const
,
idx_
up_
diff
);
LowerIndex
idx_diff
_low
_const
;
CalculateLowerIndex
(
idx_diff_
low_
const
,
idx_diff
_up
);
// do carry check on each low dimension in reversed order
// do not need to check the first dimension
...
...
@@ -354,29 +370,29 @@ struct DynamicMerge
#pragma unroll
for
(
index_t
i
=
NDimLow
-
1
;
i
>
0
;
--
i
)
{
// this should be saved as well
index_t
idx_low_length_minus_idx_
low_
diff_const
=
low_lengths_
[
i
]
-
idx_
low_
diff_const
[
i
];
// this should be saved
in SGPR
as well
index_t
idx_low_length_minus_idx_diff
_low
_const
=
low_lengths_
[
i
]
-
idx_diff
_low
_const
[
i
];
#if 0
index_t idx_low_length_plus_idx_
low_
diff_const =
low_lengths_[i] + idx_
low_
diff_const[i];
index_t idx_low_length_plus_idx_diff
_low
_const =
low_lengths_[i] + idx_diff
_low
_const[i];
#endif
index_t
idx_low_tmp
=
idx_low_old
[
i
]
+
carry
;
bool
do_carry
=
idx_low_tmp
>=
idx_low_length_minus_idx_
low_
diff_const
;
bool
do_carry
=
idx_low_tmp
>=
idx_low_length_minus_idx_diff
_low
_const
;
#if 0
bool do_borrow = idx_low_tmp < -idx_
low_
diff_const[i];
bool do_borrow = idx_low_tmp < -idx_diff
_low
_const[i];
#endif
idx_
low_
diff
(
i
)
=
do_carry
?
-
idx_low_length_minus_idx_
low_
diff_const
:
idx_
low_
diff_const
[
i
];
idx_diff
_low
(
i
)
=
do_carry
?
-
idx_low_length_minus_idx_diff
_low
_const
:
idx_diff
_low
_const
[
i
];
#if 0
idx_
low_
diff(i) =
do_borrow ? idx_low_length_plus_idx_
low_
diff_const : idx_
low_
diff[i];
idx_diff
_low
(i) =
do_borrow ? idx_low_length_plus_idx_diff
_low
_const : idx_diff
_low
[i];
#endif
idx_
low_
diff
(
i
)
+=
carry
;
idx_diff
_low
(
i
)
+=
carry
;
carry
=
do_carry
?
1
:
0
;
#if 0
...
...
@@ -384,7 +400,7 @@ struct DynamicMerge
#endif
}
idx_
low_
diff
(
0
)
=
idx_
low_
diff_const
[
0
]
+
carry
;
idx_diff
_low
(
0
)
=
idx_diff
_low
_const
[
0
]
+
carry
;
}
__host__
__device__
static
constexpr
bool
IsLinearTransform
()
{
return
false
;
}
...
...
@@ -431,7 +447,8 @@ struct DynamicUnMerge
__host__
__device__
constexpr
auto
GetUpperLengths
()
const
{
return
up_lengths_
;
}
template
<
typename
LowIdx
,
typename
UpIdx
>
__host__
__device__
void
CalculateLowerIndex
(
LowIdx
&
idx_low
,
const
UpIdx
&
idx_up
)
const
__host__
__device__
constexpr
void
CalculateLowerIndex
(
LowIdx
&
idx_low
,
const
UpIdx
&
idx_up
)
const
{
idx_low
(
0
)
=
idx_up
[
NDimUp
];
...
...
@@ -443,12 +460,12 @@ struct DynamicUnMerge
}
template
<
typename
LowIdxDiff
,
typename
UpIdxDiff
,
typename
LowIdx
,
typename
UpIdx
>
__host__
__device__
void
CalculateLowerIndexDiff
(
LowIdxDiff
&
idx_
low_
diff
,
const
UpIdxDiff
&
idx_
up_
diff
,
__host__
__device__
constexpr
void
CalculateLowerIndexDiff
(
LowIdxDiff
&
idx_diff
_low
,
const
UpIdxDiff
&
idx_diff
_up
,
const
LowIdx
&
/* idx_low_old */
,
const
UpIdx
&
/* idx_up_old */
)
const
{
CalculateLowerIndex
(
idx_
low_
diff
,
idx_
up_
diff
);
CalculateLowerIndex
(
idx_diff
_low
,
idx_diff
_up
);
}
__host__
__device__
static
constexpr
bool
IsLinearTransform
()
{
return
true
;
}
...
...
@@ -486,7 +503,8 @@ struct DynamicFreeze
__host__
__device__
constexpr
auto
GetUpperLengths
()
const
{
return
UpperIndex
{};
}
template
<
typename
LowIdx
,
typename
UpIdx
>
__host__
__device__
void
CalculateLowerIndex
(
LowIdx
&
idx_low
,
const
UpIdx
&
idx_up
)
const
__host__
__device__
constexpr
void
CalculateLowerIndex
(
LowIdx
&
idx_low
,
const
UpIdx
&
idx_up
)
const
{
static_assert
(
LowIdx
::
Size
()
==
1
&&
UpIdx
::
Size
()
==
1
,
"wrong! inconsistent # of dimension"
);
...
...
@@ -495,12 +513,12 @@ struct DynamicFreeze
}
template
<
typename
LowIdxDiff
,
typename
UpIdxDiff
,
typename
LowIdx
,
typename
UpIdx
>
__host__
__device__
static
void
CalculateLowerIndexDiff
(
LowIdxDiff
&
idx_
low_
diff
,
const
UpIdxDiff
&
idx_
up_
diff
,
__host__
__device__
static
void
CalculateLowerIndexDiff
(
LowIdxDiff
&
idx_diff
_low
,
const
UpIdxDiff
&
idx_diff
_up
,
const
LowIdx
&
/* idx_low_old */
,
const
UpIdx
&
/* idx_up_old */
)
{
idx_
low_
diff
(
0
)
=
index_t
{
0
};
idx_diff
_low
(
0
)
=
index_t
{
0
};
}
__host__
__device__
static
constexpr
bool
IsLinearTransform
()
{
return
true
;
}
...
...
composable_kernel/include/tensor_description/dynamic_tensor_descriptor_helper_v2.hpp
0 → 100644
View file @
2ffa2708
#ifndef CK_DYNAMIC_TENSOR_DESCRIPTOR_HELPER_V2_HPP
#define CK_DYNAMIC_TENSOR_DESCRIPTOR_HELPER_V2_HPP
#include "common_header.hpp"
#include "dynamic_tensor_descriptor_v2.hpp"
namespace
ck
{
template
<
typename
LowerTensorDescriptor
,
typename
Transforms
,
typename
LowerVisibleDimensionLowerVisibleIdss
,
typename
UpperVisibleDimensionUpperVisibleIdss
>
__host__
__device__
constexpr
auto
transform_dynamic_tensor_descriptor_v2
(
const
LowerTensorDescriptor
&
low_tensor_desc
,
const
Transforms
&
transforms
,
LowerVisibleDimensionLowerVisibleIdss
,
UpperVisibleDimensionUpperVisibleIdss
)
{
// convert lower visible dimension idss (tuple of sequences) to hidden dimension idss (tuple of sequences)
constexpr
auto
low_visible_dimension_hidden_idss
=
transform_tuples
(
// convert lower visible dimension ids (a sequence) to hidden dimension ids (a sequence)
[](
auto
low_visible_dim_ids
)
{
return
transform_sequences
(
// convert lower visible dimension id to hidden dimension id
[](
auto
low_visible_dim_id
)
{
return
low_tensor_desc
.
GetVisibleDimensionIds
()[
low_visible_dim_id
];
},
low_visible_dim_ids
);
},
LowerVisibleDimensionLowerVisibleIdss
{});
constexpr
auto
up_visible_dims_
const
auto
all_transforms
=
merge_tuples
(
old_tensor_desc
.
GetTransforms
(),
new_transforms
);
constexpr
auto
all_low_dim_idss
=
merge_tuples
(
old_tensor_desc
.
GetLowerDimensionIdss
(),
new_low_dim_idss
);
constexpr
auto
all_up_dim_idss
=
merge_tuples
(
old_tensor_desc
.
GetUpperDimensionIdss
(),
new_up_dim_idss
);
constexpr
auto
new_visible_dim_ids
=
new_up_dim_idss
return
DynamicTensorDescriptor_v2
<
decltype
(
all_transforms
),
decltype
(
all_low_dim_idss
),
decltype
(
all_up_dim_idss
),
decltype
(
new_visible_dim_ids
)
>
{
all_transforms
,
old_tensor_desc
.
GetElementSpaceSize
()};
}
}
// namespace ck
#endif
composable_kernel/include/tensor_description/dynamic_tensor_descriptor_v2.hpp
0 → 100644
View file @
2ffa2708
#ifndef CK_DYNAMIC_TENSOR_DESCRIPTOR_V2_HPP
#define CK_DYNAMIC_TENSOR_DESCRIPTOR_V2_HPP
#include "common_header.hpp"
#include "dynamic_multi_index_transform.hpp"
namespace
ck
{
template
<
index_t
NDimHidden
,
typename
VisibleDimensionIds
>
struct
DynamicTensorCoordinate_v2
;
template
<
index_t
NTransform
,
index_t
NDimVisible
>
struct
DynamicTensorCoordinateStep_v2
;
// Transforms: Tuple<transforms...>
// LowerDimensionIdss : Tuple<Sequence<...>, ...>
// UpperDimensionIdss : Tuple<Sequence<...>, ...>
// VisibleDimensionIds> : Sequence<...>
template
<
typename
Transforms
,
typename
LowerDimensionIdss
,
typename
UpperDimensionIdss
,
typename
VisibleDimensionIds
>
struct
DynamicTensorDescriptor_v2
{
constexpr
static
index_t
ntransform_
=
GetNumOfTransform
();
constexpr
static
index_t
ndim_visible_
=
GetNumOfVisibleDimension
();
constexpr
static
index_t
ndim_hidden_
=
GetNumOfHiddenDimension
();
using
VisibleIndex
=
MultiIndex
<
ndim_visible_
>
;
using
HiddenIndex
=
MultiIndex
<
ndim_hidden_
>
;
__host__
__device__
explicit
constexpr
DynamicTensorDescriptor_v2
(
const
Transforms
&
transforms
,
index_t
element_space_size
)
:
transforms_
{
transforms
},
hidden_lengths_
{
InitializeHiddenLengths
(
transforms_
,
element_space_size
)},
visble_lengths_
{
hidden_lengths_
}
{
static_assert
(
Transforms
::
Size
()
==
ntransforms_
&&
LowerDimensionIdss
::
Size
()
==
ntransforms_
&&
UpperDimensionIdss
::
Size
()
==
ntransforms_
,
"wrong! inconsistent # of transformations"
);
// TODO check dependency of dimensions is valid
}
__host__
__device__
static
constexpr
index_t
GetNumOfDimension
()
const
{
return
GetNumOfVisibleDimension
();
}
__host__
__device__
constexpr
index_t
GetLength
(
index_t
idim
)
const
{
return
visible_lengths_
[
idim
];
}
__host__
__device__
constexpr
const
auto
&
GetLengths
()
const
{
return
visible_lengths_
;
}
// maybe this result should be saved as a member variable
__host__
__device__
constexpr
index_t
GetElementSize
()
const
{
return
reduce_on_array
(
GetLengths
(),
math
::
multiplies
<
index_t
>
{},
index_t
{
1
});
}
__host__
__device__
constexpr
index_t
GetElementSpaceSize
()
const
{
return
hidden_lengths_
[
0
];
}
template
<
typename
Idx
>
__host__
__device__
constexpr
index_t
CalculateOffset
(
const
Idx
&
idx
)
const
{
static_assert
(
Idx
::
Size
()
==
GetNumOfDimension
(),
"wrong! inconsistent # of dimension"
);
return
make_tensor_coordinate_v2
(
*
this
,
idx
).
GetOffset
();
}
private:
__host__
__device__
static
constexpr
index_t
GetNumOfVisibleDimension
()
{
return
VisibleDimensionIds
::
Size
();
}
__host__
__device__
static
constexpr
index_t
GetNumOfHiddenDimension
()
{
constexpr
auto
all_low_dim_ids
=
unpack
([](
auto
&&
...
xs
)
constexpr
{
return
merge_sequences
(
xs
...);
},
LowerDimsionIdss
{});
constexpr
auto
all_up_dim_ids
=
unpack
([](
auto
&&
...
xs
)
constexpr
{
return
merge_sequences
(
xs
...);
},
UpperDimsionIdss
{});
constexpr
auto
all_dim_ids
=
merge_sequenses
(
all_low_dim_ids
,
all_up_dim_ids
);
using
unique_sort_all_dim_ids
=
sequence_unique_sort
<
decltype
(
all_dim_ids
),
math
::
less
<
index_t
>
,
math
::
equal
<
index_t
>>::
type
;
return
uniqie_sort_all_dim_ids
::
type
::
Size
();
}
__host__
__device__
static
constexpr
index_t
GetNumOfTransform
()
{
return
Transforms
::
Size
();
}
__host__
__device__
constexpr
const
auto
&
GetTransforms
()
const
{
return
transforms_
;
}
__host__
__device__
static
constexpr
auto
GetLowerDimensionIdss
()
{
return
LowerDimensionIdss
{};
}
__host__
__device__
static
constexpr
auto
GetUpperDimensionIdss
()
{
return
UpperDimensionIdss
{};
}
__host__
__device__
static
constexpr
index_t
GetVisibleDimensionIds
()
{
return
VisibleDimensionIds
{};
}
__host__
__device__
static
constexpr
auto
InitializeHiddenLengths
(
const
Transforms
&
transforms
,
index_t
element_space_size
)
{
HiddenIndex
lengths_hidden
=
make_zero_multi_index
<
ndim_hidden_
>
();
// this is the orignal tensor element space size
lengths_hidden
(
0
)
=
element_space_size
;
// lengths for all other hidden dimensions
static_for
<
0
,
ntransform_
,
1
>
{}([
&
](
auto
itran
)
{
const
auto
&
tran
=
transforms
.
At
(
itran
);
constexpr
auto
up_dim_ids
=
UpperDimensionIdss
::
At
(
itran
);
const
auto
lengths_up_pick
=
pick_array_element
(
lengths_hidden
,
up_dim_ids
);
#pragma unroll
for
(
index_t
i
=
0
;
i
<
lengths_low
.
Size
();
++
i
)
{
lengths_low_pick
(
i
)
=
tran
.
GetUpperLengths
()[
i
];
}
});
return
lengths_hidden
;
}
// private member variables
const
Transforms
transforms_
;
// TODO maybe hidden_lengths_ should use reference_wrapper to save space on stack?
const
HiddenIndex
hidden_lengths_
;
// visible_lenths_ contains a reference to hidden_lengths_
const
ArrayElementPicker
<
HiddenIndex
,
VisibleDimensionIds
>
visible_lengths_
;
// friend functions for making and updating tensor coordinate
__host__
__device__
friend
constexpr
DynamicTensorCoordinate_v2
<
ndim_hidden_
,
VisibleDimensionIds
>
make_tensor_coordinate_v2
(
const
DynamicTensorDescriptor_v2
&
/* tensor_desc */
,
const
VisibleIndex
&
/* idx_visible */
);
__host__
__device__
friend
constexpr
DynamicTensorCoordinateStep_v2
<
ntransform_
,
ndim_visible_
>
make_tensor_coordinate_step_v2
(
const
DynamicTensorDescriptor_v2
&
/* tensor_desc */
,
const
VisibleIndex
&
/* idx_diff_visible */
);
__host__
__device__
friend
void
move_tensor_coordinate_v2
(
const
DynamicTensorDescriptor_v2
&
/* tensor_desc */
,
DynamicTensorCoordinate_v2
<
ndim_hidden_
,
VisibleDimensionIds
>&
/* coord */
,
const
DynamicTensorCoordinateStep_v2
<
ntransform_
,
ndim_visible_
>&
/* coord_step */
);
};
template
<
index_t
NDimHidden
,
typename
VisibleDimensionIds
>
struct
DynamicTensorCoordinate_v2
{
constexpr
index_t
ndim_visible_
=
VisbleDimension
::
Size
();
using
HiddenIndex
=
MultiIndex
<
NDimHidden
>
;
using
VisibleIndex
=
MultiIndex
<
ndim_visible_
>
;
__host__
__device__
explicit
constexpr
DynamicTensorCoordinate_v2
(
const
HiddenIndex
&
idx_hidden
)
:
idx_hidden_
{
idx_hidden
},
idx_visible_
{
idx_hidden_
}
{
}
__host__
__device__
constexpr
const
auto
&
GetIndex
()
const
{
GetVisibleIndex
();
}
__host__
__device__
constexpr
index_t
GetOffset
()
const
{
return
idx_hidden_
[
0
];
}
private:
__host__
__device__
constexpr
const
auto
&
GetHiddenIndex
()
const
{
return
idx_hidden_
;
}
__host__
__device__
auto
&
GetHiddenIndex
()
{
return
idx_hidden_
;
}
__host__
__device__
constexpr
const
auto
&
GetVisibleIndex
()
const
{
return
idx_visible_
;
}
__host__
__device__
auto
&
GetVisibleIndex
()
{
return
idx_visible_
;
}
// private member variables
HiddenIndex
idx_hidden_
;
// idx_visible_ contains a reference to idx_hidden_
ArrayElementPicker
<
HiddenIndex
,
VisibleDimensionIds
>
idx_visible_
;
// friend functions for making and updating tensor coordinate
template
<
typename
TensorDesc
>
__host__
__device__
friend
constexpr
DynamicTensorCoordinate_v2
make_tensor_coordinate_v2
(
const
TensorDesc
&
/* tensor_desc */
,
const
VisibleIndex
&
/* idx_visible */
);
template
<
typename
TensorDesc
>
__host__
__device__
friend
void
move_tensor_coordinate_v2
(
const
TensorDesc
&
/* tensor_desc */
,
DynamicTensorCoordinate_v2
&
/* coord */
,
const
DynamicTensorCoordinateStep_v2
<
TensorDesc
::
GetNumOfTransform
(),
ndim_visible_
>&
/* coord_step */
);
};
template
<
index_t
NTransform
,
index_t
NDimVisible
>
struct
DynamicTensorCoordinateStep_v2
{
using
VisibleIndex
=
MultiIndex
<
NDimVisible
>
;
__host__
__device__
explicit
constexpr
DynamicTensorCoordinateStep_v2
(
const
VisibleIndex
&
idx_diff_visible
,
const
Array
<
bool
,
NTransform
>&
do_transforms
)
:
idx_diff_visible_
{
idx_diff_visible
},
do_transforms_
{
do_transforms
}
{
}
private:
const
VisibleIndex
idx_diff_visible_
;
const
Array
<
bool
,
NTransform
>
do_transforms_
;
// friend functions for updating tensor coordinate
template
<
typename
TensorDesc
>
__host__
__device__
friend
constexpr
DynamicTensorCoordinateStep_v2
make_tensor_coordinate_step_v2
(
const
TensorDesc
&
/* tensor_desc */
,
const
VisibleIndex
&
/* idx_visible */
);
template
<
typename
TensorDesc
,
index_t
NDimHidden
,
typename
VisibleDimensionIds
>
__host__
__device__
friend
void
move_tensor_coordinate_v2
(
const
TensorDesc
&
/* tensor_desc */
,
DynamicTensorCoordinate_v2
<
NDimHidden
,
VisibleDimensionIds
>&
/* coord */
,
const
DynamicTensorCoordinateStep_v2
&
/* coord_step */
);
};
template
<
typename
TensorDesc
,
typename
VisibleIndex
>
__host__
__device__
constexpr
auto
make_tensor_coordinate_v2
(
const
TensorDesc
&
tensor_desc
,
const
VisibleIndex
&
idx_visible
)
{
static_assert
(
tensor_desc
.
GetNumOfDimension
()
==
idx_visible
.
Size
(),
"wrong! # of dimension inconsistent"
);
constexpr
index_t
ntransform
=
tensor_desc
.
GetNumOfTransformation
();
constexpr
index_t
ndim_hidden
=
tensor_desc
.
GetNumOfHiddenDimension
();
constexpr
index_t
ndim_visible
=
tensor_desc
.
GetNumOfVisibleDimension
();
MultiIndex
<
ndim_hidden
>
idx_hidden
;
auto
idx_visible_pick
=
pick_array_element
(
idx_hidden
,
tensor_desc
.
GetVisibleDimensionIds
());
// initialize visible index
#pragma unroll
for
(
index_t
i
<
ndim_visible
;
i
<
ndim_visible
,
++
i
)
{
idx_visible_pick
(
i
)
=
idx_visible
[
i
];
}
// calculate hidden index
static_for
<
ntransform
-
1
,
-
1
,
-
1
>
{}([
&
](
auto
itran
)
{
const
auto
&
tran
=
transforms_
.
At
(
itran
);
constexpr
auto
dims_low
=
LowerDimensionIdss
::
At
(
itran
);
constexpr
auto
dims_up
=
UpperDimensionIdss
::
At
(
itran
);
const
auto
idx_up
=
pick_array_element
(
idx_hidden_
,
dim_up
);
auto
idx_low
=
pick_array_element
(
idx_hidden_
,
dim_low
);
tran
.
CalculateLowerIndex
(
idx_up
,
idx_low
);
});
// better to use std::move?
return
DynamicTensorCoordinate_v2
{
idx_hidden
};
}
template
<
typename
TensorDesc
,
typename
VisibleIndex
>
__host__
__device__
constexpr
auto
make_tensor_coordinate_step_v2
(
const
TensorDesc
&
tensor_desc
,
const
VisibleIndex
&
idx_diff_visible
)
{
static_assert
(
tensor_desc
.
GetNumOfDimension
()
==
idx_visible
.
Size
(),
"wrong! # of dimension inconsistent"
);
constexpr
index_t
ntransform
=
tensor_desc
.
GetNumOfTransformation
();
constexpr
index_t
ndim_hidden
=
tensor_desc
.
GetNumOfHiddenDimension
();
constexpr
index_t
ndim_visible
=
tensor_desc
.
GetNumOfVisibleDimension
();
Array
<
bool
,
ntransform
>
do_transforms
=
{
false
};
Array
<
bool
,
ndim_hidden
>
non_zero_diff
=
{
false
};
auto
non_zero_diff_pick_visible
=
pick_array_element
(
non_zero_diff
,
tensor_desc
.
GetVisibleDimensionIds
());
#pragma unroll
for
(
index_t
i
<
ndim_visible
;
i
<
ndim_visible
,
++
i
)
{
non_zero_diff_pick_visible
(
i
)
=
(
idx_diff_visible
[
i
]
!=
0
);
}
static_for
<
ntransform
-
1
,
-
1
,
-
1
>
{}([
&
](
auto
itran
)
{
const
auto
&
tran
=
tensor_desec
.
GetTransforms
().
At
(
itran
);
constexpr
auto
dims_low
=
tensor_desc
.
GetLowerDimensionIdss
().
At
(
itran
);
constexpr
auto
dims_up
=
tensor_Desc
.
GetUpperDimensionIdss
().
At
(
itran
);
const
auto
non_zero_diff_pick_up
=
pick_array_element
(
non_zero_diff
,
dims_up
);
auto
non_zero_diff_pick_low
=
pick_array_element
(
non_zero_diff
,
dims_low
);
// if any of upper index diff components is non-zero, then
// 1) Need to do this transform
// 2) all components of lower index diff will assume to be non-zero and need to be
// computed
const
bool
idx_diff_up_has_non_zero
=
reduce_on_array
(
non_zero_diff_pick_up
,
[](
auto
a
,
auto
b
)
{
return
a
or
b
;
},
false
);
do_transforms
(
itran
)
=
idx_diff_up_has_non_zero
;
#pragma unroll
for
(
index_t
i
=
0
;
i
<
dims_low
.
Size
();
++
i
)
{
non_zero_diff_pick_low
(
i
)
=
idx_diff_up_has_non_zero
;
}
});
return
do_transforms
;
}
template
<
typename
TensorDesc
,
typename
TensorCoord
,
typename
TensorCoordStep
>
__host__
__device__
void
move_tensor_coordinate_v2
(
const
TensorDesc
&
tensor_desc
,
TensorCoord
&
coord
,
const
TensorCoordStep
&
coord_step
)
{
constexpr
index_t
ndim_hidden
=
tensor_desc
.
GetNumOfHiddenDimension
();
constexpr
index_t
ndim_visible
=
tensor_desc
.
GetNumOfVisibleDimension
();
constexpr
index_t
ntransform
=
tensor_desc
.
GetNumOfTransform
();
// this is what needs to be calculated
auto
idx_diff_hidden
=
make_zero_multi_index
<
ndim_hidden
>
();
const
auto
idx_diff_visible_pick
=
pick_array_element
(
idx_diff_hidden
,
tensor_desc
.
GetVisibleDimensionIds
());
// initialize visible index diff
#pragma unroll
for
(
index_t
i
=
0
;
i
<
ndim_visible_
;
++
i
)
{
idx_diff_visible_pick
(
i
)
=
coord_step
.
GetVisibleIndexDiff
()[
i
];
}
// this is what needs to be updated
auto
&
idx_hidden
=
coord
.
GetHiddenIndex
();
// update hidden index
static_for
<
ntransform
-
1
,
-
1
,
-
1
>
{}([
&
](
auto
itran
)
{
const
auto
&
tran
=
tensor_desc
.
GetTransformations
().
At
(
itran
);
constexpr
auto
dims_low
=
tensor_desc
.
GetLowerDimensionIdss
().
At
(
itran
);
constexpr
auto
dims_up
=
tensor_desc
.
GetUpperDimensionIdss
().
At
(
itran
);
// this const is for ArrayElementPicker, Array itself may not be const
const
auto
idx_up
=
pick_array_element
(
idx_hidden
,
dim_up
);
const
auto
idx_low
=
pick_array_element
(
idx_hidden
,
dim_low
);
const
auto
idx_diff_up
=
pick_array_element
(
idx_diff_hidden
,
dim_up
);
const
auto
idx_diff_low
=
pick_array_element
(
idx_diff_hidden
,
dim_low
);
tran
.
CalculateLowerIndexDiff
(
idx_diff_low
,
idx_diff_up
,
idx_low
,
idx_up
);
// update idx_low
idx_low
+=
idx_diff_low
;
});
}
template
<
typename
TensorDesc
,
typename
TensorCoord
>
__host__
__device__
bool
constexpr
coordinate_has_valid_offset_assuming_visible_index_is_valid
(
const
TensorDesc
&
tensor_desc
,
const
TensorCoord
&
coord
)
{
bool
valid
=
true
;
constexpr
index_t
ntransform
=
tensor_desc
.
GetNumOfTransform
();
const
auto
&
idx_hidden
=
coord
.
GetHiddenIndex
();
static_for
<
ntransform
-
1
,
-
1
,
-
1
>
{}([
&
](
auto
itran
)
{
const
auto
tran
=
tensor_desc
.
GetTransforms
().
At
(
itran
);
// check validity, only if current transformation does not always has a valid mapping
if
constexpr
(
!
decltype
(
tran
)
::
IsValidUpperIndexAlwaysMappedToValidLowerIndex
())
{
const
auto
idx_up
=
pick_array_element
(
idx_hidden
,
tensor_desc
.
GetUpperDimensionIdss
().
At
(
itran
));
valid
=
valid
&&
tran
.
IsValidUpperIndexMappedToValidLowerIndex
(
idx_up
);
}
});
return
valid
;
}
template
<
typename
TensorDesc
,
typename
TensorCoord
>
__host__
__device__
bool
constexpr
coordinate_has_valid_offset
(
const
TensorDesc
&
tensor_desc
,
const
TensorCoord
&
coord
)
{
// check visible index
const
auto
&
idx_visible
=
coord
.
GetVisibleIndex
();
bool
is_visible_index_valid
=
true
;
#pragma unroll
for
(
index_t
i
=
0
;
i
<
tensor_desc
.
GetNumOfDimension
();
++
i
)
{
is_visible_index_valid
=
is_visible_index_valid
&&
(
idx_visible
[
i
]
>=
0
&&
idx_visible
[
i
]
<
tensor_desc
.
GetLength
(
i
));
}
// check other hidden index
return
is_visible_index_valid
&&
coordinate_has_valid_offset_assuming_visible_index_is_valid
(
tensor_desc
,
coord
);
}
}
// namespace ck
#endif
composable_kernel/include/tensor_description/multi_index_transform.hpp
View file @
2ffa2708
...
...
@@ -14,6 +14,12 @@ __host__ __device__ constexpr auto make_multi_index(Xs... xs)
return
MultiIndex
<
sizeof
...(
Xs
)
>
(
xs
...);
}
template
<
index_t
NSize
>
__host__
__device__
constexpr
auto
make_zero_multi_index
()
{
make_zero_array
<
index_t
,
NSize
>
();
}
template
<
index_t
Length
>
struct
PassThrough
{
...
...
driver/include/device_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw.hpp
View file @
2ffa2708
...
...
@@ -133,7 +133,7 @@ void device_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw(InDesc,
constexpr
index_t
WeiBlockCopySrcDataPerRead_E
=
2
;
constexpr
index_t
WeiBlockCopyDstDataPerWrite_K
=
1
;
#elif
0
#elif
1
// cdata = 64, BlockSize = 256, 128x128x8
constexpr
index_t
BlockSize
=
256
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment