impl floor with scalar convert

6ed9c162 · rusty1s · df57d3a5 · 6ed9c162 · 6ed9c162
Commit 6ed9c162 authored Apr 09, 2018 by rusty1s
Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 3 deletions

aten/THC/THCGrid.cu aten/THC/THCGrid.cu +4 -1

aten/THC/THCNumerics.cuh aten/THC/THCNumerics.cuh +0 -2

No files found.
--- a/aten/THC/THCGrid.cu
+++ b/aten/THC/THCGrid.cu
@@ -8,9 +8,12 @@ __global__ void gridKernel(int64_t *self, TensorInfo<T> posInfo, T *size,
                           int64_t *count, ptrdiff_t nNodes) {
  KERNEL_LOOP(i, nNodes) {
    T *pos = posInfo.data + i * posInfo.stride[0];
+    T c;
    int64_t coef = 1, value = 0;
    for (ptrdiff_t d = 0; d < posInfo.size[1]; d += posInfo.stride[1]) {
-      value += coef * THCNumerics<T>::floor(THCNumerics<T>::div(pos[d], size[d]));
+      c = THCNumerics<T>::div(pos[d], size[d]);
+      c = ScalarConvert<int64_t, T>::to(ScalarConvert<T, int64_t>::to(c));  // floor.
+      value += coef * c;
      coef *= count[d];
    }
    self[i] = value;

--- a/aten/THC/THCNumerics.cuh
+++ b/aten/THC/THCNumerics.cuh
@@ -17,7 +17,6 @@ template<typename T>
 struct THCNumerics {
  static inline __host__ __device__ T div(T a, T b) { return a / b; }
  static inline __host__ __device__ bool gt(T a, T b) { return a > b; }
-  static inline __host__ __device__ int floor(T a) { return a; }
 };

 #ifdef CUDA_HALF_TENSOR
@@ -25,7 +24,6 @@ template<>
 struct THCNumerics<half> {
  static inline __host__ __device__ half div(half a, half b) { return f2h(h2f(a) / h2f(b)); }
  static inline __host__ __device__ bool gt(half a, half b) { return h2f(a) > h2f(b); }
-  static inline __host__ __device__ int floor(half a) { return (int) h2f(a); }
 };
 #endif  // CUDA_HALF_TENSOR