workgroup_barrier.hpp 1.75 KB
Newer Older
1
2
3
4
5
#pragma once
#include <hip/hip_runtime.h>
#include <stdint.h>

namespace ck {
carlushuang's avatar
carlushuang committed
6
7
8
struct workgroup_barrier
{
    __device__ workgroup_barrier(uint32_t* ptr) : base_ptr(ptr) {}
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30

    __device__ uint32_t ld(uint32_t offset)
    {
#if 0
        float d = llvm_amdgcn_raw_buffer_load_fp32(
                        amdgcn_make_buffer_resource(base_ptr),
                        0,
                        offset,
                        AMDGCN_BUFFER_GLC);
        union cvt {
            float f32;
            uint32_t u32;
        };
        cvt x;
        x.f32 = d;
        return x.u32;
#endif
        return __atomic_load_n(base_ptr + offset, __ATOMIC_RELAXED);
    }

    __device__ void wait_eq(uint32_t offset, uint32_t value)
    {
carlushuang's avatar
carlushuang committed
31
32
33
        if(threadIdx.x == 0)
        {
            while(ld(offset) != value) {}
34
35
36
37
38
39
        }
        __syncthreads();
    }

    __device__ void wait_lt(uint32_t offset, uint32_t value)
    {
carlushuang's avatar
carlushuang committed
40
41
42
        if(threadIdx.x == 0)
        {
            while(ld(offset) < value) {}
43
44
45
46
47
48
        }
        __syncthreads();
    }

    __device__ void wait_set(uint32_t offset, uint32_t compare, uint32_t value)
    {
carlushuang's avatar
carlushuang committed
49
50
51
        if(threadIdx.x == 0)
        {
            while(atomicCAS(base_ptr + offset, compare, value) != compare) {}
52
53
54
55
56
        }
        __syncthreads();
    }

    // enter critical zoon, assume buffer is zero when launch kernel
carlushuang's avatar
carlushuang committed
57
    __device__ void aquire(uint32_t offset) { wait_set(offset, 0, 1); }
58
59

    // exit critical zoon, assume buffer is zero when launch kernel
carlushuang's avatar
carlushuang committed
60
    __device__ void release(uint32_t offset) { wait_set(offset, 1, 0); }
61
62
63
64

    __device__ void inc(uint32_t offset)
    {
        __syncthreads();
carlushuang's avatar
carlushuang committed
65
66
        if(threadIdx.x == 0)
        {
67
68
69
70
            atomicAdd(base_ptr + offset, 1);
        }
    }

carlushuang's avatar
carlushuang committed
71
    uint32_t* base_ptr;
72
};
carlushuang's avatar
carlushuang committed
73
} // namespace ck