Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
d8cf5e5a
"git@developer.sourcefind.cn:wangsen/mineru.git" did not exist on "0b01a3468720fba73e6b001ea3ad0fa46fced255"
Commit
d8cf5e5a
authored
Jul 30, 2020
by
Chao Liu
Browse files
fix
parent
ec17a109
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
8 additions
and
2 deletions
+8
-2
composable_kernel/include/utility/amd_inline_asm.hpp
composable_kernel/include/utility/amd_inline_asm.hpp
+4
-0
composable_kernel/include/utility/common_header.hpp
composable_kernel/include/utility/common_header.hpp
+1
-0
composable_kernel/include/utility/in_memory_operation.amd.hpp.in
...ble_kernel/include/utility/in_memory_operation.amd.hpp.in
+3
-2
No files found.
composable_kernel/include/utility/amd_inline_asm.hpp
View file @
d8cf5e5a
...
@@ -5,6 +5,7 @@
...
@@ -5,6 +5,7 @@
namespace
ck
{
namespace
ck
{
#if MIOPEN_USE_FP32
// outer-product: c[i,j] += inner_product(a[i], b[j])
// outer-product: c[i,j] += inner_product(a[i], b[j])
__device__
void
amd_assembly_outer_product_1x2
(
float
a
,
float
b0
,
float
b1
,
float
&
c0
,
float
&
c1
)
__device__
void
amd_assembly_outer_product_1x2
(
float
a
,
float
b0
,
float
b1
,
float
&
c0
,
float
&
c1
)
{
{
...
@@ -29,7 +30,9 @@ __device__ void amd_assembly_outer_product_1x4(
...
@@ -29,7 +30,9 @@ __device__ void amd_assembly_outer_product_1x4(
:
"=v"
(
c0
),
"=v"
(
c1
),
"=v"
(
c2
),
"=v"
(
c3
)
:
"=v"
(
c0
),
"=v"
(
c1
),
"=v"
(
c2
),
"=v"
(
c3
)
:
"v"
(
a
),
"v"
(
b0
),
"v"
(
b1
),
"v"
(
b2
),
"v"
(
b3
),
"0"
(
c0
),
"1"
(
c1
),
"2"
(
c2
),
"3"
(
c3
));
:
"v"
(
a
),
"v"
(
b0
),
"v"
(
b1
),
"v"
(
b2
),
"v"
(
b3
),
"0"
(
c0
),
"1"
(
c1
),
"2"
(
c2
),
"3"
(
c3
));
}
}
#endif
#if MIOPEN_USE_FP16
// outer-product: c[i,j] += inner_product(a[i], b[j])
// outer-product: c[i,j] += inner_product(a[i], b[j])
__device__
void
__device__
void
amd_assembly_outer_product_1x2
(
half2_t
a
,
half2_t
b0
,
half2_t
b1
,
float
&
c0
,
float
&
c1
)
amd_assembly_outer_product_1x2
(
half2_t
a
,
half2_t
b0
,
half2_t
b1
,
float
&
c0
,
float
&
c1
)
...
@@ -145,6 +148,7 @@ __device__ void amd_assembly_outer_product_1x4(half4_t a,
...
@@ -145,6 +148,7 @@ __device__ void amd_assembly_outer_product_1x4(half4_t a,
"2"
(
c2
),
"2"
(
c2
),
"3"
(
c3
));
// 3rd Src Acc registers for 2 half2 registers
"3"
(
c3
));
// 3rd Src Acc registers for 2 half2 registers
}
}
#endif
}
// namespace ck
}
// namespace ck
#endif
#endif
composable_kernel/include/utility/common_header.hpp
View file @
d8cf5e5a
...
@@ -24,6 +24,7 @@
...
@@ -24,6 +24,7 @@
#if CK_USE_AMD_XDLOPS
#if CK_USE_AMD_XDLOPS
#include "amd_xdlops.hpp"
#include "amd_xdlops.hpp"
#include "amd_xdlops_inline_asm.hpp"
#endif
#endif
#endif
#endif
composable_kernel/include/utility/in_memory_operation.amd.hpp.in
View file @
d8cf5e5a
...
@@ -145,8 +145,9 @@ struct AtomicAddData
...
@@ -145,8 +145,9 @@ struct AtomicAddData
template <>
template <>
__device__ void Run<AddressSpace::Vgpr, AddressSpace::Global>(const T* p_src,
__device__ void Run<AddressSpace::Vgpr, AddressSpace::Global>(const T* p_src,
index_t src_offset,
index_t src_offset,
bool src_valid,
index_t /* src_range */,
index_t /* src_range */,
bool src_valid
T* p_dst,
T* p_dst,
index_t dst_offset,
index_t dst_offset,
bool dst_valid,
bool dst_valid,
index_t dst_range) const
index_t dst_range) const
...
@@ -157,7 +158,7 @@ struct AtomicAddData
...
@@ -157,7 +158,7 @@ struct AtomicAddData
p_dst,
p_dst,
dst_offset,
dst_offset,
dst_valid,
dst_valid,
index_t
dst_range);
dst_range);
}
}
#endif
#endif
};
};
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment