Commit e419e153 authored by rusty1s's avatar rusty1s
Browse files

no half tensor type

parent a8bbb422
......@@ -109,9 +109,6 @@ static inline __device__ void atomicAdd( double *address, double val) { AtomicA
#elif !defined(__CUDA_ARCH__) && (CUDA_VERSION < 8000)
static inline __device__ void atomicAdd( double *address, double val) {}
#endif
#ifdef CUDA_HALF_TENSOR
static inline __device__ void atomicAdd( half *address, half val) {}
#endif
#define OP(X, Y) Y * X
ATOMIC_(Mul)
......@@ -123,9 +120,6 @@ static inline __device__ void atomicMul(int32_t *address, int32_t val) { AtomicM
static inline __device__ void atomicMul(int64_t *address, int64_t val) { AtomicMulIntegerImpl<int64_t, sizeof(int64_t)>()(address, val); }
static inline __device__ void atomicMul( float *address, float val) { AtomicMulDecimalImpl< float, sizeof( float)>()(address, val); }
static inline __device__ void atomicMul( double *address, double val) { AtomicMulDecimalImpl< double, sizeof( double)>()(address, val); }
#ifdef CUDA_HALF_TENSOR
static inline __device__ void atomicMul( half *address, half val) {}
#endif
#define OP(X, Y) Y / X
ATOMIC_(Div)
......@@ -137,9 +131,6 @@ static inline __device__ void atomicDiv(int32_t *address, int32_t val) { AtomicD
static inline __device__ void atomicDiv(int64_t *address, int64_t val) { AtomicDivIntegerImpl<int64_t, sizeof(int64_t)>()(address, val); }
static inline __device__ void atomicDiv( float *address, float val) { AtomicDivDecimalImpl< float, sizeof( float)>()(address, val); }
static inline __device__ void atomicDiv( double *address, double val) { AtomicDivDecimalImpl< double, sizeof( double)>()(address, val); }
#ifdef CUDA_HALF_TENSOR
static inline __device__ void atomicDiv( half *address, half val) {}
#endif
#define OP(X, Y) max(Y, X)
ATOMIC_(Max)
......@@ -150,9 +141,6 @@ static inline __device__ void atomicMax(int16_t *address, int16_t val) { AtomicM
static inline __device__ void atomicMax(int64_t *address, int64_t val) { AtomicMaxIntegerImpl<int64_t, sizeof(int64_t)>()(address, val); }
static inline __device__ void atomicMax( float *address, float val) { AtomicMaxDecimalImpl< float, sizeof( float)>()(address, val); }
static inline __device__ void atomicMax( double *address, double val) { AtomicMaxDecimalImpl< double, sizeof( double)>()(address, val); }
#ifdef CUDA_HALF_TENSOR
static inline __device__ void atomicMax( half *address, half val) {}
#endif
#define OP(X, Y) min(Y, X)
ATOMIC_(Min)
......@@ -163,6 +151,3 @@ static inline __device__ void atomicMin(int16_t *address, int16_t val) { AtomicM
static inline __device__ void atomicMin(int64_t *address, int64_t val) { AtomicMinIntegerImpl<int64_t, sizeof(int64_t)>()(address, val); }
static inline __device__ void atomicMin( float *address, float val) { AtomicMinDecimalImpl< float, sizeof( float)>()(address, val); }
static inline __device__ void atomicMin( double *address, double val) { AtomicMinDecimalImpl< double, sizeof( double)>()(address, val); }
#ifdef CUDA_HALF_TENSOR
static inline __device__ void atomicMin( half *address, half val) {}
#endif
......@@ -35,14 +35,3 @@ struct TensorInfo {
} \
THCudaCheck(cudaGetLastError()); \
}
static inline __device__ bool eq(uint8_t a, uint8_t b) { return a == b; }
static inline __device__ bool eq( int8_t a, int8_t b) { return a == b; }
static inline __device__ bool eq(int16_t a, int16_t b) { return a == b; }
static inline __device__ bool eq(int32_t a, int32_t b) { return a == b; }
static inline __device__ bool eq(int64_t a, int64_t b) { return a == b; }
static inline __device__ bool eq( float a, float b) { return a == b; }
static inline __device__ bool eq( double a, double b) { return a == b; }
#ifdef CUDA_HALF_TENSOR
static inline __device__ bool eq(half a, half b) { return __half2float(a) == __half2float(b); }
#endif
......@@ -64,7 +64,7 @@ __global__ void argKernel(TensorInfo<Real> output, TensorInfo<int64_t> index, Te
KERNEL_LOOP(i, n) {
int outputOffset = 0; int indexOffset = 0; int inputOffset = 0; int argOffset = 0;
IndexToScatterOffsets4<Real, Real, int64_t, Dims>::compute(i, dim, index, &indexOffset, input, &inputOffset, output, &outputOffset, arg, &argOffset);
if (eq(input.data[inputOffset], output.data[outputOffset])) arg.data[argOffset] = inputOffset % input.size[dim];
if (input.data[inputOffset] == output.data[outputOffset]) arg.data[argOffset] = inputOffset % input.size[dim];
}
}
......@@ -78,4 +78,16 @@ __global__ void indexBackwardKernel(TensorInfo<Real> output, TensorInfo<int64_t>
}
#include "generic/kernel.cu"
#include "THCGenerateAllTypes.h"
#include "THCGenerateFloatType.h"
#include "generic/kernel.cu"
#include "THCGenerateDoubleType.h"
#include "generic/kernel.cu"
#include "THCGenerateByteType.h"
#include "generic/kernel.cu"
#include "THCGenerateCharType.h"
#include "generic/kernel.cu"
#include "THCGenerateShortType.h"
#include "generic/kernel.cu"
#include "THCGenerateIntType.h"
#include "generic/kernel.cu"
#include "THCGenerateLongType.h"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment