utils.hpp 3.08 KB
Newer Older
Li, Jiang's avatar
Li, Jiang committed
1
2
3
4
5
#ifndef UTILS_HPP
#define UTILS_HPP

#include <atomic>
#include <unistd.h>
6
#include <ATen/cpu/Utils.h>
Li, Jiang's avatar
Li, Jiang committed
7

8
#include "cpu/cpu_types.hpp"
Li, Jiang's avatar
Li, Jiang committed
9
10
11
12

namespace cpu_utils {
enum class ISA { AMX, VEC };

13
14
15
16
17
18
19
20
21
22
inline ISA get_isa(const std::string& isa) {
  if (isa == "amx") {
    return ISA::AMX;
  } else if (isa == "vec") {
    return ISA::VEC;
  } else {
    TORCH_CHECK(false, "Invalid isa type: " + isa);
  }
}

Li, Jiang's avatar
Li, Jiang committed
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
template <typename T>
struct VecTypeTrait {
  using vec_t = void;
};

template <>
struct VecTypeTrait<float> {
  using vec_t = vec_op::FP32Vec16;
};

template <>
struct VecTypeTrait<c10::BFloat16> {
  using vec_t = vec_op::BF16Vec16;
};

38
#if !defined(__powerpc__)
Li, Jiang's avatar
Li, Jiang committed
39
40
41
42
template <>
struct VecTypeTrait<c10::Half> {
  using vec_t = vec_op::FP16Vec16;
};
43
#endif
Li, Jiang's avatar
Li, Jiang committed
44
45
46
47
48
49
50
51
52
53
54
55

struct Counter {
  std::atomic<int64_t> counter;
  char _padding[56];

  Counter() : counter(0) {}

  void reset_counter() { counter.store(0); }

  int64_t acquire_counter() { return counter++; }
};

56
inline int64_t get_available_l2_size() {
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#if defined(__s390x__)
  static int64_t size = []() {
    uint32_t l2_cache_size = 0;
    auto caps = at::cpu::get_cpu_capabilities();
    auto it = caps.find("l2_cache_size");
    if (it != caps.end()) {
      l2_cache_size = static_cast<uint32_t>(it->second.toInt());
    }
    if (l2_cache_size == 0) {
      long sys_l2 = sysconf(_SC_LEVEL2_CACHE_SIZE);
      if (sys_l2 > 0) {
        l2_cache_size = static_cast<uint32_t>(sys_l2);
      }
    }
    if (l2_cache_size == 0) {
      l2_cache_size = 256 * 1024;
    }
    return static_cast<int64_t>(l2_cache_size) >> 1;  // use 50% of L2 cache
  }();
  return size;
#else
Li, Jiang's avatar
Li, Jiang committed
78
  static int64_t size = []() {
79
80
    auto caps = at::cpu::get_cpu_capabilities();
    const uint32_t l2_cache_size = caps.at("l2_cache_size").toInt();
Li, Jiang's avatar
Li, Jiang committed
81
82
83
    return l2_cache_size >> 1;  // use 50% of L2 cache
  }();
  return size;
84
#endif
Li, Jiang's avatar
Li, Jiang committed
85
}
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138

template <int32_t alignment_v, typename T>
inline T round_up(T size) {
  T alignment = alignment_v;
  return (((size + alignment - 1) / alignment) * alignment);
}

template <int32_t alignment_v, typename T>
inline T round_down(T size) {
  T alignment = alignment_v;
  return (size / alignment) * alignment;
}

template <typename T>
inline void print_logits(const char* name, T* ptr, int32_t row, int32_t col,
                         int32_t stride) {
  std::stringstream ss;
  ss << std::fixed << std::setprecision(5) << name << ": [\n";
  auto* curr_logits_buffer = ptr;
  for (int32_t m = 0; m < row; ++m) {
    for (int32_t n = 0; n < col; ++n) {
      ss << curr_logits_buffer[n] << ", ";
    }
    ss << "\n";
    curr_logits_buffer += stride;
  }
  ss << "]\n";
  std::printf("%s", ss.str().c_str());
}

class ScratchPadManager {
 public:
  static constexpr size_t allocation_unit = 4 * 1024;  // 4KB

  static ScratchPadManager* get_scratchpad_manager();

  ScratchPadManager();

  template <typename T>
  T* get_data() {
    return reinterpret_cast<T*>(ptr_);
  }

  static size_t round(size_t size) {
    return ((size + allocation_unit - 1) / allocation_unit) * allocation_unit;
  }

  void realloc(size_t new_size);

 private:
  size_t size_;
  void* ptr_;
};
Li, Jiang's avatar
Li, Jiang committed
139
140
141
}  // namespace cpu_utils

#endif