reduce.h 1.87 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#ifndef __INFINIOP_REDUCE_CPU_H__
#define __INFINIOP_REDUCE_CPU_H__
#include "../../../utils.h"
#include <cstddef>

#ifdef ENABLE_OMP
#include <omp.h>
#endif

#include <type_traits>

namespace op::common_cpu {

namespace reduce_op {

template <typename T>
using ReduceToSame = std::disjunction<
    std::is_same<T, float>,
    std::is_same<T, double>,
    std::is_same<T, uint8_t>,
    std::is_same<T, int8_t>,
    std::is_same<T, uint16_t>,
    std::is_same<T, int16_t>,
    std::is_same<T, uint32_t>,
    std::is_same<T, int32_t>,
    std::is_same<T, uint64_t>,
    std::is_same<T, int64_t>>;

template <typename T, typename = std::enable_if_t<ReduceToSame<T>::value>>
T sum(const T *data, size_t len, ptrdiff_t stride = 1) {
    T result = 0;
    for (size_t i = 0; i < len; i++) {
        result += data[i * stride];
    }

    return result;
}

39
float sum(const fp16_t *data, size_t len, ptrdiff_t stride = 1);
40
float sum(const bf16_t *data, size_t len, ptrdiff_t stride = 1);
41
42
43
44
45
46

template <typename T, typename = std::enable_if_t<ReduceToSame<T>::value>>
T max(const T *data, size_t len, ptrdiff_t stride = 1) {
    T result = data[0];
    for (size_t i = 1; i < len; i++) {
        result = std::max(result, data[i * stride]);
47
48
49
50
51
    }

    return result;
}

52
float max(const fp16_t *data, size_t len, ptrdiff_t stride = 1);
53
float max(const bf16_t *data, size_t len, ptrdiff_t stride = 1);
54

55
56
57
58
59
60
61
62
63
64
65
template <typename T, typename = std::enable_if_t<ReduceToSame<T>::value>>
T sumSquared(const T *data, size_t len, ptrdiff_t stride = 1) {
    T result = 0;
    for (size_t i = 0; i < len; i++) {
        T val = data[i * stride];
        result += val * val;
    }

    return result;
}

66
float sumSquared(const fp16_t *data, size_t len, ptrdiff_t stride = 1);
67
float sumSquared(const bf16_t *data, size_t len, ptrdiff_t stride = 1);
68
69
70
71
72
73

} // namespace reduce_op

} // namespace op::common_cpu

#endif //__INFINIOP_REDUCE_CPU_H__