Commit 6ed9c162 authored by rusty1s's avatar rusty1s
Browse files

impl floor with scalar convert

parent df57d3a5
......@@ -8,9 +8,12 @@ __global__ void gridKernel(int64_t *self, TensorInfo<T> posInfo, T *size,
int64_t *count, ptrdiff_t nNodes) {
KERNEL_LOOP(i, nNodes) {
T *pos = posInfo.data + i * posInfo.stride[0];
T c;
int64_t coef = 1, value = 0;
for (ptrdiff_t d = 0; d < posInfo.size[1]; d += posInfo.stride[1]) {
value += coef * THCNumerics<T>::floor(THCNumerics<T>::div(pos[d], size[d]));
c = THCNumerics<T>::div(pos[d], size[d]);
c = ScalarConvert<int64_t, T>::to(ScalarConvert<T, int64_t>::to(c)); // floor.
value += coef * c;
coef *= count[d];
}
self[i] = value;
......
......@@ -17,7 +17,6 @@ template<typename T>
struct THCNumerics {
static inline __host__ __device__ T div(T a, T b) { return a / b; }
static inline __host__ __device__ bool gt(T a, T b) { return a > b; }
static inline __host__ __device__ int floor(T a) { return a; }
};
#ifdef CUDA_HALF_TENSOR
......@@ -25,7 +24,6 @@ template<>
struct THCNumerics<half> {
static inline __host__ __device__ half div(half a, half b) { return f2h(h2f(a) / h2f(b)); }
static inline __host__ __device__ bool gt(half a, half b) { return h2f(a) > h2f(b); }
static inline __host__ __device__ int floor(half a) { return (int) h2f(a); }
};
#endif // CUDA_HALF_TENSOR
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment