#include "THCTensorInfo.cuh" #include #include #include #include // this is suboptimal, try forward declarations later #include #define Dims -2 #define DEVICE_LINEAR_GET(D_TENSOR, INDEX) D_TENSOR.data[IndexToOffset::get(INDEX, D_TENSOR)] #define DEVICE_LINEAR_GET_F(D_TENSOR, INDEX) D_TENSOR.data[IndexToOffset::get(INDEX, D_TENSOR)] // template // void send_to_kernel( // TensorInfo Input_1, // TensorInfo Input_2, // IndexType totalElems // ); typedef int idxType; struct send_to_fwd_wrapper { template static void call(std::vector>& tensors, int dim); }; struct send_to_bwd_wrapper { template static void call(std::vector>& tensors, int dim); }; template struct ScalarConvert { static __host__ __device__ __forceinline__ Out to(const In v) { return (Out) v; } }; #ifdef CUDA_HALF_TENSOR template struct ScalarConvert { static __host__ __device__ __forceinline__ Out to(const half v) { #ifdef __CUDA_ARCH__ return (Out) __half2float(v); #else return (Out) THC_half2float(v); #endif } }; template struct ScalarConvert { static __host__ __device__ __forceinline__ half to(const In v) { #ifdef __CUDA_ARCH__ return __float2half((float) v); #else return THC_float2half((float) v); #endif } }; template <> struct ScalarConvert { static __host__ __device__ __forceinline__ half to(const half v) { return v; } }; #endif