diag_cpu.cpp 1.19 KB
Newer Older
rusty1s's avatar
matmul  
rusty1s committed
1
#include "diag_cpu.h"
rusty1s's avatar
rusty1s committed
2

rusty1s's avatar
matmul  
rusty1s committed
3
#include "utils.h"
rusty1s's avatar
rusty1s committed
4

rusty1s's avatar
matmul  
rusty1s committed
5
6
torch::Tensor non_diag_mask_cpu(torch::Tensor row, torch::Tensor col, int64_t M,
                                int64_t N, int64_t k) {
rusty1s's avatar
rusty1s committed
7
8
  CHECK_CPU(row);
  CHECK_CPU(col);
rusty1s's avatar
rusty1s committed
9

rusty1s's avatar
matmul  
rusty1s committed
10
11
  auto E = row.size(0);
  auto num_diag = k < 0 ? std::min(M + k, N) : std::min(M, N - k);
rusty1s's avatar
rusty1s committed
12

rusty1s's avatar
matmul  
rusty1s committed
13
14
  auto row_data = row.data_ptr<int64_t>();
  auto col_data = col.data_ptr<int64_t>();
rusty1s's avatar
rusty1s committed
15

rusty1s's avatar
rusty1s committed
16
  auto mask = torch::zeros(E + num_diag, row.options().dtype(torch::kBool));
rusty1s's avatar
matmul  
rusty1s committed
17
  auto mask_data = mask.data_ptr<bool>();
rusty1s's avatar
rusty1s committed
18
19
20
21

  int64_t r, c;
  if (k < 0) {
    for (int64_t i = 0; i < E; i++) {
rusty1s's avatar
rusty1s committed
22
      r = row_data[i], c = col_data[i];
rusty1s's avatar
rusty1s committed
23
24
25
26
27
28
29
30
31
32
33
34
      if (r + k < 0) {
        mask_data[i] = true;
      } else if (r + k >= N) {
        mask_data[i + num_diag] = true;
      } else if (r + k > c) {
        mask_data[i + r + k] = true;
      } else if (r + k < c) {
        mask_data[i + r + k + 1] = true;
      }
    }
  } else {
    for (int64_t i = 0; i < E; i++) {
rusty1s's avatar
rusty1s committed
35
      r = row_data[i], c = col_data[i];
rusty1s's avatar
rusty1s committed
36
37
38
39
40
41
42
43
44
45
46
47
      if (r + k >= N) {
        mask_data[i + num_diag] = true;
      } else if (r + k > c) {
        mask_data[i + r] = true;
      } else if (r + k < c) {
        mask_data[i + r + 1] = true;
      }
    }
  }

  return mask;
}