synchronization.hpp 1.04 KB
Newer Older
Chao Liu's avatar
Chao Liu committed
1
// SPDX-License-Identifier: MIT
Illia Silin's avatar
Illia Silin committed
2
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
Chao Liu's avatar
Chao Liu committed
3

Chao Liu's avatar
Chao Liu committed
4
#pragma once
Chao Liu's avatar
Chao Liu committed
5

Chao Liu's avatar
Chao Liu committed
6
#include "ck/ck.hpp"
Chao Liu's avatar
Chao Liu committed
7
8
9
10
11

namespace ck {

__device__ void block_sync_lds()
{
12
#if CK_EXPERIMENTAL_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM
13
14
15
16
17
18
19
#ifdef __gfx12__
    asm volatile("\
    s_wait_dscnt 0x0 \n \
    s_barrier_signal -1 \n \
    s_barrier_wait -1 \
    " ::);
#else
20
21
22
23
24
25
    // asm volatile("\
    // s_waitcnt lgkmcnt(0) \n \
    // s_barrier \
    // " ::);
    __builtin_amdgcn_s_waitcnt(0xc07f);
    __builtin_amdgcn_s_barrier();
26
#endif
Chao Liu's avatar
Chao Liu committed
27
#else
28
    __syncthreads();
Chao Liu's avatar
Chao Liu committed
29
30
#endif
}
31

32
33
__device__ void block_sync_lds_direct_load()
{
34
35
36
37
38
39
40
41
#ifdef __gfx12__
    asm volatile("\
    s_wait_vmcnt 0x0 \n \
    s_wait_dscnt 0x0 \n \
    s_barrier_signal -1 \n \
    s_barrier_wait -1 \
    " ::);
#else
42
43
44
45
46
    asm volatile("\
    s_waitcnt vmcnt(0) \n \
    s_waitcnt lgkmcnt(0) \n \
    s_barrier \
    " ::);
47
#endif
48
49
}

ltqin's avatar
ltqin committed
50
51
52
53
54
55
56
57
58
59
__device__ void s_nop()
{
#if 1
    asm volatile("\
    s_nop 0 \n \
    " ::);
#else
    __builtin_amdgcn_sched_barrier(0);
#endif
}
Chao Liu's avatar
Chao Liu committed
60
61

} // namespace ck