Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
1e55a3b1
Commit
1e55a3b1
authored
Feb 20, 2021
by
Chao Liu
Browse files
add v_fmac inline asm
parent
2a87a973
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
24 additions
and
0 deletions
+24
-0
composable_kernel/include/utility/amd_inline_asm.hpp
composable_kernel/include/utility/amd_inline_asm.hpp
+20
-0
composable_kernel/include/utility/config.amd.hpp.in
composable_kernel/include/utility/config.amd.hpp.in
+4
-0
No files found.
composable_kernel/include/utility/amd_inline_asm.hpp
View file @
1e55a3b1
...
...
@@ -8,18 +8,37 @@ namespace ck {
// outer-product: c[i,j] += inner_product(a[i], b[j])
__device__
void
amd_assembly_outer_product_1x2
(
float
a
,
float
b0
,
float
b1
,
float
&
c0
,
float
&
c1
)
{
#if CK_USE_AMD_V_FMAC_F32
asm
volatile
(
"
\n
\
v_fmac_f32 %0, %2, %3
\n
\
v_fmac_f32 %1, %2, %4
\n
\
"
:
"=v"
(
c0
),
"=v"
(
c1
)
:
"v"
(
a
),
"v"
(
b0
),
"v"
(
b1
),
"0"
(
c0
),
"1"
(
c1
));
#else
asm
volatile
(
"
\n
\
v_mac_f32 %0, %2, %3
\n
\
v_mac_f32 %1, %2, %4
\n
\
"
:
"=v"
(
c0
),
"=v"
(
c1
)
:
"v"
(
a
),
"v"
(
b0
),
"v"
(
b1
),
"0"
(
c0
),
"1"
(
c1
));
#endif
}
// outer-product: c[i,j] += inner_product(a[i], b[j])
__device__
void
amd_assembly_outer_product_1x4
(
float
a
,
float
b0
,
float
b1
,
float
b2
,
float
b3
,
float
&
c0
,
float
&
c1
,
float
&
c2
,
float
&
c3
)
{
#if CK_USE_AMD_V_FMAC_F32
asm
volatile
(
"
\n
\
v_fmac_f32 %0, %4, %5
\n
\
v_fmac_f32 %1, %4, %6
\n
\
v_fmac_f32 %2, %4, %7
\n
\
v_fmac_f32 %3, %4, %8
\n
\
"
:
"=v"
(
c0
),
"=v"
(
c1
),
"=v"
(
c2
),
"=v"
(
c3
)
:
"v"
(
a
),
"v"
(
b0
),
"v"
(
b1
),
"v"
(
b2
),
"v"
(
b3
),
"0"
(
c0
),
"1"
(
c1
),
"2"
(
c2
),
"3"
(
c3
));
#else
asm
volatile
(
"
\n
\
v_mac_f32 %0, %4, %5
\n
\
v_mac_f32 %1, %4, %6
\n
\
...
...
@@ -28,6 +47,7 @@ __device__ void amd_assembly_outer_product_1x4(
"
:
"=v"
(
c0
),
"=v"
(
c1
),
"=v"
(
c2
),
"=v"
(
c3
)
:
"v"
(
a
),
"v"
(
b0
),
"v"
(
b1
),
"v"
(
b2
),
"v"
(
b3
),
"0"
(
c0
),
"1"
(
c1
),
"2"
(
c2
),
"3"
(
c3
));
#endif
}
// outer-product: c[i,j] += inner_product(a[i], b[j])
...
...
composable_kernel/include/utility/config.amd.hpp.in
View file @
1e55a3b1
...
...
@@ -26,6 +26,10 @@
#define CK_THREADWISE_GEMM_USE_AMD_INLINE_ASM 1
#endif
#ifndef CK_USE_AMD_V_FMAC_F32
#define CK_USE_AMD_V_FMAC_F32 1
#endif
// AMD buffer addressing
#ifndef CK_USE_AMD_BUFFER_ADDRESSING
#define CK_USE_AMD_BUFFER_ADDRESSING 1
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment