diag.cpp 1.37 KB
Newer Older
rusty1s's avatar
rusty1s committed
1
2
3
4
5
6
#include <torch/extension.h>

#include "compat.h"

#define CHECK_CPU(x) AT_ASSERTM(!x.type().is_cuda(), #x " must be CPU tensor")

rusty1s's avatar
rusty1s committed
7
8
9
10
at::Tensor non_diag_mask(at::Tensor row, at::Tensor col, int64_t M, int64_t N,
                         int64_t k) {
  CHECK_CPU(row);
  CHECK_CPU(col);
rusty1s's avatar
rusty1s committed
11

rusty1s's avatar
rusty1s committed
12
  int64_t E = row.size(0);
rusty1s's avatar
rusty1s committed
13
14
  int64_t num_diag = k < 0 ? std::min(M + k, N) : std::min(M, N - k);

rusty1s's avatar
rusty1s committed
15
16
17
18
  auto row_data = row.DATA_PTR<int64_t>();
  auto col_data = col.DATA_PTR<int64_t>();

  auto mask = at::zeros(E + num_diag, row.options().dtype(at::kBool));
rusty1s's avatar
rusty1s committed
19
20
21
22
23
  auto mask_data = mask.DATA_PTR<bool>();

  int64_t r, c;
  if (k < 0) {
    for (int64_t i = 0; i < E; i++) {
rusty1s's avatar
rusty1s committed
24
      r = row_data[i], c = col_data[i];
rusty1s's avatar
rusty1s committed
25
26
27
28
29
30
31
32
33
34
35
36
      if (r + k < 0) {
        mask_data[i] = true;
      } else if (r + k >= N) {
        mask_data[i + num_diag] = true;
      } else if (r + k > c) {
        mask_data[i + r + k] = true;
      } else if (r + k < c) {
        mask_data[i + r + k + 1] = true;
      }
    }
  } else {
    for (int64_t i = 0; i < E; i++) {
rusty1s's avatar
rusty1s committed
37
      r = row_data[i], c = col_data[i];
rusty1s's avatar
rusty1s committed
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
      if (r + k >= N) {
        mask_data[i + num_diag] = true;
      } else if (r + k > c) {
        mask_data[i + r] = true;
      } else if (r + k < c) {
        mask_data[i + r + 1] = true;
      }
    }
  }

  return mask;
}

PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
  m.def("non_diag_mask", &non_diag_mask, "Non-Diagonal Mask (CPU)");
}