common.cuh 1.04 KB
Newer Older
rusty1s's avatar
rusty1s committed
1
2
3
#ifndef THC_COMMON_INC
#define THC_COMMON_INC

rusty1s's avatar
rusty1s committed
4
5
#define THCTensor_(NAME) TH_CONCAT_4(TH,CReal,Tensor_,NAME)

rusty1s's avatar
rusty1s committed
6
#define KERNEL_LOOP(I, N) \
rusty1s's avatar
rusty1s committed
7
  for (ptrdiff_t I = blockIdx.x * blockDim.x + threadIdx.x; I < N; I += blockDim.x * gridDim.x)
rusty1s's avatar
rusty1s committed
8

rusty1s's avatar
rusty1s committed
9
const int MAX_DIMS = 25;
rusty1s's avatar
rusty1s committed
10
11
/* const int NUM_THREADS = 1024; */
const int NUM_THREADS = 256;
rusty1s's avatar
rusty1s committed
12

rusty1s's avatar
rusty1s committed
13
inline int GET_BLOCKS(int N) {
rusty1s's avatar
rusty1s committed
14
  return (N + NUM_THREADS - 1) / NUM_THREADS;
rusty1s's avatar
rusty1s committed
15
16
17
}

#define KERNEL_RUN(NAME, N, ...) \
rusty1s's avatar
rusty1s committed
18
19
20
21
22
23
  int grid = GET_BLOCKS(N); \
  cudaStream_t stream = THCState_getCurrentStream(state); \
  NAME<<<grid, NUM_THREADS, 0, stream>>>(__VA_ARGS__, N); \
  THCudaCheck(cudaGetLastError())

#define KERNEL_REAL_RUN(NAME, N, ...) \
rusty1s's avatar
rusty1s committed
24
25
26
27
28
  int grid = GET_BLOCKS(N); \
  cudaStream_t stream = THCState_getCurrentStream(state); \
  NAME<real><<<grid, NUM_THREADS, 0, stream>>>(__VA_ARGS__, N); \
  THCudaCheck(cudaGetLastError())

rusty1s's avatar
rusty1s committed
29
30
31
32
33
34
35
36
template<typename T>
struct TensorInfo {
  T *data;
  int dims;
  int size[MAX_DIMS];
  int stride[MAX_DIMS];
};

rusty1s's avatar
rusty1s committed
37
#include "generic/common.cuh"
rusty1s's avatar
rusty1s committed
38
#include "THC/THCGenerateAllTypes.h"
rusty1s's avatar
rusty1s committed
39
40

#endif  // THC_COMMON_INC