Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
5730b906
Commit
5730b906
authored
Mar 31, 2021
by
Chao Liu
Browse files
clean up
parent
2fc3888d
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
8 additions
and
37 deletions
+8
-37
composable_kernel/include/tensor_description/dynamic_multi_index_transform.hpp
...lude/tensor_description/dynamic_multi_index_transform.hpp
+8
-37
No files found.
composable_kernel/include/tensor_description/dynamic_multi_index_transform.hpp
View file @
5730b906
...
@@ -607,27 +607,12 @@ struct DynamicMerge
...
@@ -607,27 +607,12 @@ struct DynamicMerge
#if !CK_HACK_DYNAMIC_MERGE_CALCULATE_IDX_DIFF_LOW_CONST_USE_AMD_GCN_READ_FIRST_LANE
#if !CK_HACK_DYNAMIC_MERGE_CALCULATE_IDX_DIFF_LOW_CONST_USE_AMD_GCN_READ_FIRST_LANE
index_t
tmp
=
idx_diff_up
[
Number
<
0
>
{}];
index_t
tmp
=
idx_diff_up
[
Number
<
0
>
{}];
#if 1
// normal division
static_for
<
0
,
NDimLow
-
1
,
1
>
{}([
&
](
auto
i
)
{
static_for
<
0
,
NDimLow
-
1
,
1
>
{}([
&
](
auto
i
)
{
idx_diff_low_const
(
i
)
=
tmp
/
low_lengths_scan_
[
i
];
idx_diff_low_const
(
i
)
=
tmp
/
low_lengths_scan_
[
i
];
tmp
-=
idx_diff_low_const
[
i
]
*
low_lengths_scan_
[
i
];
tmp
-=
idx_diff_low_const
[
i
]
*
low_lengths_scan_
[
i
];
});
});
idx_diff_low_const
(
Number
<
NDimLow
-
1
>
{})
=
tmp
;
idx_diff_low_const
(
Number
<
NDimLow
-
1
>
{})
=
tmp
;
#else
// magic division
static_for
<
NDimLow
-
1
,
0
,
-
1
>
{}([
&
](
auto
i
)
{
index_t
tmp2
=
magic_division
::
DoMagicDivision
(
tmp
,
this
->
low_lengths_magic_divisor_multiplier_
[
i
],
this
->
low_lengths_magic_divisor_shift_
[
i
]);
idx_diff_low_const
(
i
)
=
tmp
-
tmp2
*
this
->
low_lengths_
[
i
];
tmp
=
tmp2
;
});
idx_diff_low_const
(
Number
<
0
>
{})
=
tmp
;
#endif
static_for
<
0
,
NDimLow
,
1
>
{}([
&
](
auto
i
)
{
static_for
<
0
,
NDimLow
,
1
>
{}([
&
](
auto
i
)
{
idx_low_length_minus_idx_diff_low_const
(
i
)
=
low_lengths_
[
i
]
-
idx_diff_low_const
[
i
];
idx_low_length_minus_idx_diff_low_const
(
i
)
=
low_lengths_
[
i
]
-
idx_diff_low_const
[
i
];
...
@@ -638,25 +623,10 @@ struct DynamicMerge
...
@@ -638,25 +623,10 @@ struct DynamicMerge
// Hack: this force result into SGPR. Need to make sure the result is thread invariant
// Hack: this force result into SGPR. Need to make sure the result is thread invariant
index_t
tmp
=
idx_diff_up
[
Number
<
0
>
{}];
index_t
tmp
=
idx_diff_up
[
Number
<
0
>
{}];
#if 1
// normal division
static_for
<
0
,
NDimLow
-
1
,
1
>
{}([
&
](
auto
i
)
{
static_for
<
0
,
NDimLow
-
1
,
1
>
{}([
&
](
auto
i
)
{
idx_diff_low_const
(
i
)
=
__builtin_amdgcn_readfirstlane
(
tmp
/
low_lengths_scan_
[
i
]);
idx_diff_low_const
(
i
)
=
__builtin_amdgcn_readfirstlane
(
tmp
/
low_lengths_scan_
[
i
]);
tmp
-=
idx_diff_low_const
[
i
]
*
low_lengths_scan_
[
i
];
tmp
-=
idx_diff_low_const
[
i
]
*
low_lengths_scan_
[
i
];
});
});
#else
// magic division
static_for
<
NDimLow
-
1
,
0
,
-
1
>
{}([
&
](
auto
i
)
{
index_t
tmp2
=
magic_division
::
DoMagicDivision
(
tmp
,
this
->
low_lengths_magic_divisor_multiplier_
[
i
],
this
->
low_lengths_magic_divisor_shift_
[
i
]);
idx_diff_low_const
(
i
)
=
__builtin_amdgcn_readfirstlane
(
tmp
-
tmp2
*
this
->
low_lengths_
[
i
]);
tmp
=
tmp2
;
});
#endif
idx_diff_low_const
(
Number
<
NDimLow
-
1
>
{})
=
__builtin_amdgcn_readfirstlane
(
tmp
);
idx_diff_low_const
(
Number
<
NDimLow
-
1
>
{})
=
__builtin_amdgcn_readfirstlane
(
tmp
);
...
@@ -1072,7 +1042,7 @@ struct DynamicMerge
...
@@ -1072,7 +1042,7 @@ struct DynamicMerge
};
};
#else
#else
template
<
typename
LowLengths
>
template
<
typename
LowLengths
>
struct
lambda_generate_magic_division_calculate_magic_multiplier
struct
lambda_
merge_
generate_magic_division_calculate_magic_multiplier
{
{
template
<
index_t
I
>
template
<
index_t
I
>
__host__
__device__
constexpr
auto
operator
()(
Number
<
I
>
i
)
const
__host__
__device__
constexpr
auto
operator
()(
Number
<
I
>
i
)
const
...
@@ -1082,7 +1052,7 @@ struct lambda_generate_magic_division_calculate_magic_multiplier
...
@@ -1082,7 +1052,7 @@ struct lambda_generate_magic_division_calculate_magic_multiplier
};
};
template
<
typename
LowLengths
>
template
<
typename
LowLengths
>
struct
lambda_generate_magic_division_calculate_magic_shift
struct
lambda_
merge_
generate_magic_division_calculate_magic_shift
{
{
template
<
index_t
I
>
template
<
index_t
I
>
__host__
__device__
constexpr
auto
operator
()(
Number
<
I
>
i
)
const
__host__
__device__
constexpr
auto
operator
()(
Number
<
I
>
i
)
const
...
@@ -1102,12 +1072,13 @@ struct DynamicMerge
...
@@ -1102,12 +1072,13 @@ struct DynamicMerge
using
UpLengths
=
using
UpLengths
=
decltype
(
make_tuple
(
container_reduce
(
LowLengths
{},
math
::
multiplies_v2
{},
Number
<
1
>
{})));
decltype
(
make_tuple
(
container_reduce
(
LowLengths
{},
math
::
multiplies_v2
{},
Number
<
1
>
{})));
using
LowLengthsMagicDivisorMultipiler
=
decltype
(
using
LowLengthsMagicDivisorMultipiler
=
decltype
(
generate_tuple
(
generate_tuple
(
lambda_generate_magic_division_calculate_magic_multiplier
<
LowLengths
>
{},
lambda
_merge
_generate_magic_division_calculate_magic_multiplier
<
LowLengths
>
{},
Number
<
NDimLow
>
{}));
Number
<
NDimLow
>
{}));
using
LowLengthsMagicDivisorShift
=
decltype
(
generate_tuple
(
using
LowLengthsMagicDivisorShift
=
decltype
(
lambda_generate_magic_division_calculate_magic_shift
<
LowLengths
>
{},
Number
<
NDimLow
>
{}));
generate_tuple
(
lambda_merge_generate_magic_division_calculate_magic_shift
<
LowLengths
>
{},
Number
<
NDimLow
>
{}));
LowLengths
low_lengths_
;
LowLengths
low_lengths_
;
LowLengthsMagicDivisorMultipiler
low_lengths_magic_divisor_multiplier_
;
LowLengthsMagicDivisorMultipiler
low_lengths_magic_divisor_multiplier_
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment