Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
22995e9a
"vscode:/vscode.git/clone" did not exist on "da8c060873fb9120004627b2d3963154e1f8a68c"
Commit
22995e9a
authored
May 14, 2024
by
Adam Osewski
Browse files
Use bultin to only sync threads not LDS.
parent
2541812e
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
6 additions
and
19 deletions
+6
-19
include/ck/utility/workgroup_barrier.hpp
include/ck/utility/workgroup_barrier.hpp
+6
-19
No files found.
include/ck/utility/workgroup_barrier.hpp
View file @
22995e9a
...
@@ -9,26 +9,13 @@ struct workgroup_barrier
...
@@ -9,26 +9,13 @@ struct workgroup_barrier
__device__
uint32_t
ld
(
uint32_t
offset
)
const
__device__
uint32_t
ld
(
uint32_t
offset
)
const
{
{
#if 0
float d = llvm_amdgcn_raw_buffer_load_fp32(
amdgcn_make_buffer_resource(base_ptr),
0,
offset,
AMDGCN_BUFFER_GLC);
union cvt {
float f32;
uint32_t u32;
};
cvt x;
x.f32 = d;
return x.u32;
#endif
return
__atomic_load_n
(
base_ptr
+
offset
,
__ATOMIC_RELAXED
);
return
__atomic_load_n
(
base_ptr
+
offset
,
__ATOMIC_RELAXED
);
}
}
__device__
void
st
(
uint32_t
offset
,
uint32_t
value
)
__device__
void
st
(
uint32_t
offset
,
uint32_t
value
)
{
{
__atomic_store_n
(
base_ptr
+
offset
,
value
,
__ATOMIC_RELEASE
);
__atomic_store_n
(
base_ptr
+
offset
,
value
,
__ATOMIC_RELEASE
);
// __atomic_store_n(base_ptr + offset, value, __ATOMIC_SEQ_CST);
}
}
__device__
void
wait_eq
(
uint32_t
offset
,
uint32_t
value
)
__device__
void
wait_eq
(
uint32_t
offset
,
uint32_t
value
)
...
@@ -37,7 +24,7 @@ struct workgroup_barrier
...
@@ -37,7 +24,7 @@ struct workgroup_barrier
{
{
while
(
ld
(
offset
)
!=
value
)
{}
while
(
ld
(
offset
)
!=
value
)
{}
}
}
__
syncthreads
();
__
builtin_amdgcn_s_barrier
();
}
}
__device__
void
wait_lt
(
uint32_t
offset
,
uint32_t
value
)
__device__
void
wait_lt
(
uint32_t
offset
,
uint32_t
value
)
...
@@ -46,7 +33,7 @@ struct workgroup_barrier
...
@@ -46,7 +33,7 @@ struct workgroup_barrier
{
{
while
(
ld
(
offset
)
<
value
)
{}
while
(
ld
(
offset
)
<
value
)
{}
}
}
__
syncthreads
();
__
builtin_amdgcn_s_barrier
();
}
}
__device__
void
wait_set
(
uint32_t
offset
,
uint32_t
compare
,
uint32_t
value
)
__device__
void
wait_set
(
uint32_t
offset
,
uint32_t
compare
,
uint32_t
value
)
...
@@ -55,7 +42,7 @@ struct workgroup_barrier
...
@@ -55,7 +42,7 @@ struct workgroup_barrier
{
{
while
(
atomicCAS
(
base_ptr
+
offset
,
compare
,
value
)
!=
compare
)
{}
while
(
atomicCAS
(
base_ptr
+
offset
,
compare
,
value
)
!=
compare
)
{}
}
}
__
syncthreads
();
__
builtin_amdgcn_s_barrier
();
}
}
// enter critical zoon, assume buffer is zero when launch kernel
// enter critical zoon, assume buffer is zero when launch kernel
...
@@ -66,20 +53,20 @@ struct workgroup_barrier
...
@@ -66,20 +53,20 @@ struct workgroup_barrier
__device__
void
inc
(
uint32_t
offset
)
__device__
void
inc
(
uint32_t
offset
)
{
{
__builtin_amdgcn_s_barrier
();
if
(
threadIdx
.
x
==
0
)
if
(
threadIdx
.
x
==
0
)
{
{
atomicAdd
(
base_ptr
+
offset
,
1
);
atomicAdd
(
base_ptr
+
offset
,
1
);
}
}
__syncthreads
();
}
}
__device__
void
reset
(
uint32_t
offset
)
__device__
void
reset
(
uint32_t
offset
)
{
{
__builtin_amdgcn_s_barrier
();
if
(
threadIdx
.
x
==
0
)
if
(
threadIdx
.
x
==
0
)
{
{
st
(
offset
,
0
);
st
(
offset
,
0
);
}
}
__syncthreads
();
}
}
uint32_t
*
base_ptr
;
uint32_t
*
base_ptr
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment