Commit 5d199c86 authored by rusty1s's avatar rusty1s
Browse files

half cuda bugfix

parent 186c0c93
...@@ -8,12 +8,9 @@ __global__ void gridKernel(int64_t *self, TensorInfo<T> posInfo, T *size, ...@@ -8,12 +8,9 @@ __global__ void gridKernel(int64_t *self, TensorInfo<T> posInfo, T *size,
int64_t *count, ptrdiff_t nNodes) { int64_t *count, ptrdiff_t nNodes) {
KERNEL_LOOP(i, nNodes) { KERNEL_LOOP(i, nNodes) {
T *pos = posInfo.data + i * posInfo.stride[0]; T *pos = posInfo.data + i * posInfo.stride[0];
T c;
int64_t coef = 1, value = 0; int64_t coef = 1, value = 0;
for (ptrdiff_t d = 0; d < posInfo.size[1]; d += posInfo.stride[1]) { for (ptrdiff_t d = 0; d < posInfo.size[1]; d += posInfo.stride[1]) {
c = THCNumerics<T>::div(pos[d], size[d]); value += coef * ScalarConvert<T, int64_t>::to(THCNumerics<T>::div(pos[d], size[d]));
c = ScalarConvert<int64_t, T>::to(ScalarConvert<T, int64_t>::to(c));
value += coef * c;
coef *= count[d]; coef *= count[d];
} }
self[i] = value; self[i] = value;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment