Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
91e0de2e
Commit
91e0de2e
authored
Jan 16, 2020
by
Chao Liu
Browse files
adding atomic add intrinsic
parent
b8385cca
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
59 additions
and
14 deletions
+59
-14
composable_kernel/include/utility/amd_buffer_addressing.hpp
composable_kernel/include/utility/amd_buffer_addressing.hpp
+40
-0
composable_kernel/include/utility/in_memory_operation.amd.hpp.in
...ble_kernel/include/utility/in_memory_operation.amd.hpp.in
+5
-0
driver/src/conv_bwd_data_driver.cpp
driver/src/conv_bwd_data_driver.cpp
+14
-14
No files found.
composable_kernel/include/utility/amd_buffer_addressing.hpp
View file @
91e0de2e
...
...
@@ -54,6 +54,13 @@ __device__ void __llvm_amdgcn_buffer_storex4(float4_t vdata,
bool
glc
,
bool
slc
)
__asm
(
"llvm.amdgcn.buffer.store.v4f32"
);
__device__
void
__llvm_amdgcn_buffer_atomic_add
(
float
vdata
,
int32x4_t
rsrc
,
index_t
vindex
,
index_t
offset
,
bool
slc
)
__asm
(
"llvm.amdgcn.buffer.atomic.fadd.f32"
);
// buffer_load requires:
// 1) p_src must be in global memory space, d_dst must be vgpr
// 2) p_src to be a block-invariant pointer.
...
...
@@ -73,6 +80,13 @@ amd_intrinsic_buffer_store(const typename vector_type<T, VectorSize>::MemoryType
index_t
dst_thread_data_offset
,
index_t
dst_const_data_offset
);
template
<
typename
T
,
index_t
VectorSize
>
__device__
void
amd_intrinsic_buffer_atomic_add
(
const
typename
vector_type
<
T
,
VectorSize
>::
MemoryType
&
src
,
T
*
p_dst_block
,
index_t
dst_thread_data_offset
,
index_t
dst_const_data_offset
);
template
<
>
__device__
float
amd_intrinsic_buffer_load
<
float
,
1
>
(
const
float
*
p_src_block
,
index_t
src_thread_data_offset
,
...
...
@@ -289,5 +303,31 @@ __device__ void amd_intrinsic_buffer_store<float, 4>(const float4_t& src,
#endif
}
template
<
>
__device__
void
amd_intrinsic_buffer_atomic_add
<
float
,
1
>
(
const
float
&
src
,
float
*
p_dst_block
,
index_t
dst_thread_data_offset
,
index_t
dst_const_data_offset
)
{
index_t
dst_thread_addr_offset
=
dst_thread_data_offset
*
sizeof
(
float
);
index_t
dst_const_addr_offset
=
dst_const_data_offset
*
sizeof
(
float
);
BufferLoadStoreDwordConfig
<
float
>
dst_block_config
;
// fill in byte 0 - 1
dst_block_config
.
address
[
0
]
=
p_dst_block
;
// fill in byte 2
dst_block_config
.
range
[
2
]
=
-
1
;
// fill in byte 3
dst_block_config
.
range
[
3
]
=
0x00027000
;
#if CK_USE_AMD_BUFFER_ADDRESSING_INTRINSIC
__llvm_amdgcn_buffer_atomic_add
(
src
,
dst_block_config
.
data
,
0
,
dst_thread_addr_offset
+
dst_const_addr_offset
,
false
);
#else
static_assert
(
false
,
" wrong! not implemented"
);
#endif
}
}
// namespace ck
#endif
composable_kernel/include/utility/in_memory_operation.amd.hpp.in
View file @
91e0de2e
...
...
@@ -52,8 +52,13 @@ __device__ void atomic_add_data(const T* p_src, index_t src_offset, T* p_dst, in
static_if<SrcAddressSpace == AddressSpace::vgpr && DstAddressSpace == AddressSpace::global>{}(
[&](auto) {
#if 1
atomicAdd(reinterpret_cast<vector_t*>(&p_dst[dst_offset]),
*reinterpret_cast<const vector_t*>(&p_src[src_offset]));
#else
amd_intrinsic_buffer_atomic_add<T, DataPerAccess>(
*reinterpret_cast<const vector_t*>(&p_src[src_offset]), p_dst, dst_offset, 0);
#endif
})
.Else([&](auto fwd) {
static_assert(fwd(false), "atomic_add doesn't support this memory space");
...
...
driver/src/conv_bwd_data_driver.cpp
View file @
91e0de2e
...
...
@@ -29,7 +29,7 @@ int main(int argc, char* argv[])
constexpr
index_t
C
=
128
;
constexpr
index_t
HI
=
35
;
constexpr
index_t
WI
=
35
;
constexpr
index_t
K
=
1
28
;
constexpr
index_t
K
=
1
024
;
constexpr
index_t
Y
=
3
;
constexpr
index_t
X
=
3
;
...
...
@@ -246,9 +246,9 @@ int main(int argc, char* argv[])
#endif
}
#if
0
#if
1
device_convolution_backward_data_implicit_gemm_v1r1_nchw_kcyx_nkhw
#elif
0
#elif
1
device_convolution_backward_data_implicit_gemm_v1r2_nchw_kcyx_nkhw
#elif 0
device_convolution_backward_data_implicit_gemm_v2r1_nchw_kcyx_nkhw
...
...
@@ -257,17 +257,17 @@ int main(int argc, char* argv[])
#else
device_convolution_backward_data_implicit_gemm_v4r1_nchw_kcyx_nkhw
#endif
(
in_nchw_desc
,
in_nchw_device
,
wei_kcyx_desc
,
wei_kcyx
,
out_nkhw_desc
,
out_nkhw
,
ConvStrides
{},
ConvDilations
{},
LeftPads
{},
RightPads
{},
nrepeat
);
(
in_nchw_desc
,
in_nchw_device
,
wei_kcyx_desc
,
wei_kcyx
,
out_nkhw_desc
,
out_nkhw
,
ConvStrides
{},
ConvDilations
{},
LeftPads
{},
RightPads
{},
nrepeat
);
if
(
do_verification
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment