kernel.cu 4.26 KB
Newer Older
rusty1s's avatar
typos  
rusty1s committed
1
#include <THC.h>
rusty1s's avatar
rusty1s committed
2

3
#include "kernel.h"
rusty1s's avatar
rusty1s committed
4

rusty1s's avatar
rusty1s committed
5
#include "common.cuh"
rusty1s's avatar
rusty1s committed
6
7
#include "THCIndex.cuh"
#include "THCAtomics.cuh"
rusty1s's avatar
rusty1s committed
8

rusty1s's avatar
rusty1s committed
9
10
#define scatter_(NAME) TH_CONCAT_4(scatter_, NAME, _kernel_, Real)
#define index_backward TH_CONCAT_2(index_backward_kernel_, Real)
rusty1s's avatar
rusty1s committed
11
12
13
14
15
#define thc_(NAME) TH_CONCAT_4(thc_, NAME, _, Real)

#include "generic/common.cu"
#include "THCGenerateAllTypes.h"

rusty1s's avatar
rusty1s committed
16
17
18
template<typename Real, int Dims>
__global__ void mulKernel(TensorInfo<Real> output, TensorInfo<int64_t> index, TensorInfo<Real> input, const int dim, const int n) {
  KERNEL_LOOP(i, n) {
rusty1s's avatar
rusty1s committed
19
    int outputOffset = 0; int indexOffset = 0; int inputOffset = 0;
rusty1s's avatar
rusty1s committed
20
21
22
23
24
25
26
27
    IndexToScatterOffsets3<Real, Real, Dims>::compute(i, dim, index, &indexOffset, input, &inputOffset, output, &outputOffset);
    atomicMul(&output.data[outputOffset], input.data[inputOffset]);
  }
}

template<typename Real, int Dims>
__global__ void divKernel(TensorInfo<Real> output, TensorInfo<int64_t> index, TensorInfo<Real> input, const int dim, const int n) {
  KERNEL_LOOP(i, n) {
rusty1s's avatar
rusty1s committed
28
    int outputOffset = 0; int indexOffset = 0; int inputOffset = 0;
rusty1s's avatar
rusty1s committed
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
    IndexToScatterOffsets3<Real, Real, Dims>::compute(i, dim, index, &indexOffset, input, &inputOffset, output, &outputOffset);
    atomicDiv(&output.data[outputOffset], input.data[inputOffset]);
  }
}

template<typename Real, int Dims>
__global__ void meanKernel(TensorInfo<Real> output, TensorInfo<int64_t> index, TensorInfo<Real> input, TensorInfo<Real> count, const int dim, const int n) {
  KERNEL_LOOP(i, n) {
    int outputOffset = 0; int indexOffset = 0; int inputOffset = 0; int countOffset = 0;
    IndexToScatterOffsets4<Real, Real, Real, Dims>::compute(i, dim, index, &indexOffset, input, &inputOffset, output, &outputOffset, count, &countOffset);
    atomicAdd(&output.data[outputOffset], input.data[inputOffset]);
    atomicAdd(&count.data[countOffset], 1);
  }
}

rusty1s's avatar
rusty1s committed
44
template<typename Real, int Dims>
rusty1s's avatar
rusty1s committed
45
46
__global__ void maxKernel(TensorInfo<Real> output, TensorInfo<int64_t> index, TensorInfo<Real> input, const int dim, const int n) {
  KERNEL_LOOP(i, n) {
rusty1s's avatar
rusty1s committed
47
    int outputOffset = 0; int indexOffset = 0; int inputOffset = 0;
rusty1s's avatar
rusty1s committed
48
49
50
51
52
53
54
55
    IndexToScatterOffsets3<Real, Real, Dims>::compute(i, dim, index, &indexOffset, input, &inputOffset, output, &outputOffset);
    atomicMax(&output.data[outputOffset], input.data[inputOffset]);
  }
}

template<typename Real, int Dims>
__global__ void minKernel(TensorInfo<Real> output, TensorInfo<int64_t> index, TensorInfo<Real> input, const int dim, const int n) {
  KERNEL_LOOP(i, n) {
rusty1s's avatar
rusty1s committed
56
    int outputOffset = 0; int indexOffset = 0; int inputOffset = 0;
rusty1s's avatar
rusty1s committed
57
58
59
60
61
62
63
    IndexToScatterOffsets3<Real, Real, Dims>::compute(i, dim, index, &indexOffset, input, &inputOffset, output, &outputOffset);
    atomicMin(&output.data[outputOffset], input.data[inputOffset]);
  }
}

template<typename Real, int Dims>
__global__ void argKernel(TensorInfo<Real> output, TensorInfo<int64_t> index, TensorInfo<Real> input, TensorInfo<int64_t> arg, const int dim, const int n) {
rusty1s's avatar
rusty1s committed
64
  KERNEL_LOOP(i, n) {
rusty1s's avatar
rusty1s committed
65
66
    int outputOffset = 0; int indexOffset = 0; int inputOffset = 0; int argOffset = 0;
    IndexToScatterOffsets4<Real, Real, int64_t, Dims>::compute(i, dim, index, &indexOffset, input, &inputOffset, output, &outputOffset, arg, &argOffset);
rusty1s's avatar
rusty1s committed
67
    if (input.data[inputOffset] == output.data[outputOffset]) arg.data[argOffset] = inputOffset % input.size[dim];
rusty1s's avatar
rusty1s committed
68
69
  }
}
rusty1s's avatar
max dim  
rusty1s committed
70

rusty1s's avatar
rusty1s committed
71
72
73
template<typename Real, int Dims>
__global__ void indexBackwardKernel(TensorInfo<Real> output, TensorInfo<int64_t> index, TensorInfo<Real> grad, TensorInfo<int64_t> arg, const int dim, const int n) {
  KERNEL_LOOP(i, n) {
rusty1s's avatar
rusty1s committed
74
75
76
    int outputOffset = 0; int indexOffset = 0; int gradOffset = 0; int argOffset = 0;
    IndexToScatterOffsets4<Real, Real, int64_t, Dims>::compute(i, dim, index, &indexOffset, output, &outputOffset, grad, &gradOffset, arg, &argOffset);
    if (arg.data[argOffset] == outputOffset % output.size[dim]) output.data[outputOffset] = grad.data[gradOffset];
rusty1s's avatar
rusty1s committed
77
78
79
  }
}

rusty1s's avatar
rusty1s committed
80
#include "generic/kernel.cu"
rusty1s's avatar
rusty1s committed
81
82
83
84
85
86
87
88
89
90
91
92
93
#include "THCGenerateFloatType.h"
#include "generic/kernel.cu"
#include "THCGenerateDoubleType.h"
#include "generic/kernel.cu"
#include "THCGenerateByteType.h"
#include "generic/kernel.cu"
#include "THCGenerateCharType.h"
#include "generic/kernel.cu"
#include "THCGenerateShortType.h"
#include "generic/kernel.cu"
#include "THCGenerateIntType.h"
#include "generic/kernel.cu"
#include "THCGenerateLongType.h"