Commit e85dcbdc authored by rusty1s's avatar rusty1s
Browse files

add atomics

parent ab4d22e0
#pragma once
static inline __device__ void atomAdd(float *address, float val) {
atomicAdd(address, val);
}
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 600 || CUDA_VERSION < 8000)
static inline __device__ void atomAdd(double *address, double val) {
unsigned long long int *address_as_ull = (unsigned long long int *)address;
unsigned long long int old = *address_as_ull;
unsigned long long int assumed;
do {
assumed = old;
old = atomicCAS(address_as_ull, assumed,
__double_as_longlong(val + __longlong_as_double(assumed)));
} while (assumed != old);
}
#else
static inline __device__ void atomAdd(double *address, double val) {
atomicAdd(address, val);
}
......@@ -2,6 +2,7 @@
#include <ATen/cuda/CUDAContext.h>
#include "atomics.cuh"
#include "utils.cuh"
#define THREADS 1024
......@@ -154,7 +155,7 @@ __global__ void spline_weighting_bw_weight_kernel(
for (int64_t m_in = 0; m_in < M_in; m_in++) {
auto v = g * b * x[e * M_in + m_in];
atomicAdd(&grad_weight[wi * M_in * M_out + m_in * M_out + m_out], v);
atomAdd(&grad_weight[wi * M_in * M_out + m_in * M_out + m_out], v);
}
}
}
......@@ -217,7 +218,7 @@ __global__ void spline_weighting_bw_basis_kernel(
const scalar_t w = weight[wi * M_in * M_out + m_in * M_out + m_out];
v += g * w * x[e * M_in + m_in];
}
atomicAdd(&grad_basis[e * S + s], v);
atomAdd(&grad_basis[e * S + s], v);
}
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment