Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
c6f26bb4
Unverified
Commit
c6f26bb4
authored
Aug 23, 2021
by
Chao Liu
Committed by
GitHub
Aug 23, 2021
Browse files
magic division use __umulhi() (#19)
parent
a2ad6d35
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
3 additions
and
22 deletions
+3
-22
composable_kernel/include/utility/magic_division.hpp
composable_kernel/include/utility/magic_division.hpp
+3
-22
No files found.
composable_kernel/include/utility/magic_division.hpp
View file @
c6f26bb4
...
@@ -114,12 +114,11 @@ struct MagicDivision
...
@@ -114,12 +114,11 @@ struct MagicDivision
__host__
__device__
static
constexpr
uint32_t
__host__
__device__
static
constexpr
uint32_t
DoMagicDivision
(
uint32_t
dividend
,
uint32_t
multiplier
,
uint32_t
shift
)
DoMagicDivision
(
uint32_t
dividend
,
uint32_t
multiplier
,
uint32_t
shift
)
{
{
uint32_t
tmp
=
(
uint64_t
(
dividend
)
*
uint64_t
(
multiplier
)
)
>>
32
;
uint32_t
tmp
=
__umulhi
(
dividend
,
multiplier
);
return
(
tmp
+
dividend
)
>>
shift
;
return
(
tmp
+
dividend
)
>>
shift
;
}
}
#if 1 // debug
// magic division for int32_t
// HACK: magic division for int32_t
// HACK: use dividend_i32 as if it's uint32_t, dividend_i32 need to be
// HACK: use dividend_i32 as if it's uint32_t, dividend_i32 need to be
// non-negative for result to be correct
// non-negative for result to be correct
// TODO: figure out how to do magic number divison for int32_t as dividended
// TODO: figure out how to do magic number divison for int32_t as dividended
...
@@ -127,27 +126,9 @@ struct MagicDivision
...
@@ -127,27 +126,9 @@ struct MagicDivision
DoMagicDivision
(
int32_t
dividend_i32
,
uint32_t
multiplier
,
uint32_t
shift
)
DoMagicDivision
(
int32_t
dividend_i32
,
uint32_t
multiplier
,
uint32_t
shift
)
{
{
uint32_t
dividend_u32
=
as_type
<
uint32_t
>
(
dividend_i32
);
uint32_t
dividend_u32
=
as_type
<
uint32_t
>
(
dividend_i32
);
uint32_t
tmp
=
uint32_t
tmp
=
__umulhi
(
dividend_u32
,
multiplier
);
(
static_cast
<
uint64_t
>
(
dividend_u32
)
*
static_cast
<
uint64_t
>
(
multiplier
))
>>
32
;
return
(
tmp
+
dividend_u32
)
>>
shift
;
return
(
tmp
+
dividend_u32
)
>>
shift
;
}
}
#else
// the inline ASM is producing wrong result
__host__
__device__
static
int32_t
DoMagicDivision
(
int32_t
dividend_i32
,
uint32_t
multiplier
,
uint32_t
shift
)
{
uint32_t
r
;
asm
volatile
(
"
\n
\
v_mul_hi_u32 %0, %1, %2
\n
\
v_add_u32_e32 %0, %1, %0
\n
\
v_lshrrev_b32_e32 %0, %3, %0
\n
\
"
:
"=v"
(
r
)
:
"v"
(
as_type
<
uint32_t
>
(
dividend_i32
)),
"s"
(
multiplier
),
"s"
(
shift
));
return
as_type
<
int32_t
>
(
r
);
}
#endif
};
};
}
// namespace ck
}
// namespace ck
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment