add atomics

e85dcbdc · rusty1s · ab4d22e0 · e85dcbdc · e85dcbdc
Commit e85dcbdc authored Feb 29, 2020 by rusty1s
Show whitespace changes
Inline Side-by-side

Showing with 24 additions and 2 deletions

csrc/cuda/atomics.cuh csrc/cuda/atomics.cuh +21 -0

csrc/cuda/weighting_cuda.cu csrc/cuda/weighting_cuda.cu +3 -2

No files found.
--- a/csrc/cuda/atomics.cuh
+++ b/csrc/cuda/atomics.cuh
+#pragma once
+
+static inline __device__ void atomAdd(float *address, float val) {
+  atomicAdd(address, val);
+}
+#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 600 || CUDA_VERSION < 8000)
+static inline __device__ void atomAdd(double *address, double val) {
+  unsigned long long int *address_as_ull = (unsigned long long int *)address;
+  unsigned long long int old = *address_as_ull;
+  unsigned long long int assumed;
+
+  do {
+    assumed = old;
+    old = atomicCAS(address_as_ull, assumed,
+                    __double_as_longlong(val + __longlong_as_double(assumed)));
+  } while (assumed != old);
+}
+#else
+static inline __device__ void atomAdd(double *address, double val) {
+  atomicAdd(address, val);
+}
--- a/csrc/cuda/weighting_cuda.cu
+++ b/csrc/cuda/weighting_cuda.cu
@@ -2,6 +2,7 @@

 #include <ATen/cuda/CUDAContext.h>

+#include "atomics.cuh"
 #include "utils.cuh"

 #define THREADS 1024
@@ -154,7 +155,7 @@ __global__ void spline_weighting_bw_weight_kernel(

      for (int64_t m_in = 0; m_in < M_in; m_in++) {
        auto v = g * b * x[e * M_in + m_in];
-        atomicAdd(&grad_weight[wi * M_in * M_out + m_in * M_out + m_out], v);
+        atomAdd(&grad_weight[wi * M_in * M_out + m_in * M_out + m_out], v);
      }
    }
  }
@@ -217,7 +218,7 @@ __global__ void spline_weighting_bw_basis_kernel(
        const scalar_t w = weight[wi * M_in * M_out + m_in * M_out + m_out];
        v += g * w * x[e * M_in + m_in];
      }
-      atomicAdd(&grad_basis[e * S + s], v);
+      atomAdd(&grad_basis[e * S + s], v);
    }
  }
 }