reduction_common.hpp 1.76 KB
Newer Older
Chao Liu's avatar
Chao Liu committed
1
2
3
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.

Chao Liu's avatar
Chao Liu committed
4
#pragma once
5

Chao Liu's avatar
Chao Liu committed
6
#include "ck/utility/reduction_enums.hpp"
7

8
namespace ck {
9
10
11
12

struct float_equal_one
{
    template <class T>
13
    __host__ __device__ inline bool operator()(T x)
14
    {
15
        return x <= static_cast<T>(1.0f) and x >= static_cast<T>(1.0f);
16
17
18
19
20
21
    };
};

struct float_equal_zero
{
    template <class T>
22
    __host__ __device__ inline bool operator()(T x)
23
    {
24
        return x <= static_cast<T>(0.0f) and x >= static_cast<T>(0.0f);
25
26
27
    };
};

28
29
30
31
32
33
34
35
36
37
38
39
template <index_t N>
static constexpr __device__ index_t get_shift()
{
    return (get_shift<N / 2>() + 1);
};

template <>
constexpr __device__ index_t get_shift<1>()
{
    return (0);
}

40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
template<typename T>
__host__ __device__ void waveReduceSum(T& src)
{
        T val;
	index_t sumVal = 0;
       //	= __builtin_amdgcn_readlane(src,63);
        asm volatile("\n \
            v_add_f32 %0, %1, %1 row_shr:1 bound_ctrl:0\n \
            v_add_f32 %0, %1, %0 row_shr:2 bound_ctrl:0\n \
            v_add_f32 %0, %1, %0 row_shr:3 bound_ctrl:0\n \
            v_nop\n \
            v_nop\n \
            v_add_f32 %0, %0, %0 row_shr:4 bound_ctrl:0\n \
            v_nop\n \
            v_nop\n \
            v_add_f32 %0, %0, %0 row_shr:8 bound_ctrl:0\n \
            v_nop\n \
            v_nop\n \
            v_add_f32 %1, %0, %0 row_bcast:15 row_mask:0xa\n \
	    v_nop\n \
	    v_nop\n \
            v_add_f32 %1, %1, %1 row_bcast:31 row_mask:0xc\n \
	    v_nop\n \
	    v_nop\n \
	    v_readlane_b32 %2, %1, 63\n \
            v_nop\n \
            v_nop\n \
            v_mov_b32 %1, %2\n \
            "
                    : "=v"(val)
                    : "v"(src), "s"(sumVal),
                      "0"(val));
}

Chao Liu's avatar
Chao Liu committed
74
} // namespace ck