Commit 65abb5e3 authored by rusty1s's avatar rusty1s
Browse files

coalesce memory access

parent 9a9a511c
...@@ -45,7 +45,9 @@ def spline_weighting_backward_input(grad_output, weight, basis, ...@@ -45,7 +45,9 @@ def spline_weighting_backward_input(grad_output, weight, basis,
weight_index): # pragma: no cover weight_index): # pragma: no cover
grad_input = grad_output.new(grad_output.size(0), weight.size(1)) grad_input = grad_output.new(grad_output.size(0), weight.size(1))
func = get_func('weighting_backward_input', grad_output) func = get_func('weighting_backward_input', grad_output)
weight = weight.transpose(1, 2).contiguous()
func(grad_input, grad_output, weight, basis, weight_index) func(grad_input, grad_output, weight, basis, weight_index)
weight = weight.transpose(1, 2).contiguous()
return grad_input return grad_input
......
...@@ -17,13 +17,13 @@ __global__ void weightingForwardKernel(TensorInfo<Real> output, TensorInfo<Real> ...@@ -17,13 +17,13 @@ __global__ void weightingForwardKernel(TensorInfo<Real> output, TensorInfo<Real>
KERNEL_LOOP(i, n) { KERNEL_LOOP(i, n) {
int64_t edgeOffset = i / output.size[1], inputOffset = edgeOffset * input.stride[0]; int64_t edgeOffset = i / output.size[1], inputOffset = edgeOffset * input.stride[0];
int64_t s, S = basis.size[1], m_in, M_in = input.size[1], m_out = i % output.size[1], M_out = output.size[1], weightOffset; int64_t s, S = basis.size[1], m_in, M_in = input.size[1], m_out = i % output.size[1], M_out = output.size[1], weightOffset;
Real b, value = 0; Real value = 0;
for (s = 0; s < S; s++) { for (s = 0; s < S; s++) {
b = basis.data[edgeOffset * S + s];
weightOffset = weightIndex.data[edgeOffset * S + s] * M_in * M_out + m_out; weightOffset = weightIndex.data[edgeOffset * S + s] * M_in * M_out + m_out;
for (m_in = 0; m_in < M_in; m_in++) { for (m_in = 0; m_in < M_in; m_in++) {
value += b * weight.data[weightOffset + m_in * M_out] * input.data[inputOffset + m_in * input.stride[1]]; value += weight.data[weightOffset + m_in * M_out] * input.data[inputOffset + m_in * input.stride[1]];
} }
value *= basis.data[edgeOffset * S + s];
} }
output.data[i] = value; output.data[i] = value;
} }
...@@ -32,15 +32,15 @@ __global__ void weightingForwardKernel(TensorInfo<Real> output, TensorInfo<Real> ...@@ -32,15 +32,15 @@ __global__ void weightingForwardKernel(TensorInfo<Real> output, TensorInfo<Real>
template<typename Real> template<typename Real>
__global__ void weightingBackwardInputKernel(TensorInfo<Real> gradInput, TensorInfo<Real> gradOutput, TensorInfo<Real> weight, TensorInfo<Real> basis, TensorInfo<int64_t> weightIndex, int n) { __global__ void weightingBackwardInputKernel(TensorInfo<Real> gradInput, TensorInfo<Real> gradOutput, TensorInfo<Real> weight, TensorInfo<Real> basis, TensorInfo<int64_t> weightIndex, int n) {
KERNEL_LOOP(i, n) { KERNEL_LOOP(i, n) {
int64_t edgeOffset = i / gradInput.size[1], gradOutputOffset = edgeOffset * M_out; int64_t edgeOffset = i / gradInput.size[1], gradOutputOffset = edgeOffset * gradOutput.stride[0];
int64_t s, S = basis.size[1], m_in = i % gradInput.size[1], M_in = gradInput.size[1], m_out, M_out = gradOutput.size[1], weightOffset; int64_t s, S = basis.size[1], m_in = i % gradInput.size[1], M_in = gradInput.size[1], m_out, M_out = gradOutput.size[1], weightOffset;
Real b, value = 0; Real value = 0;
for (s = 0; s < S; s++) { for (s = 0; s < S; s++) {
b = basis.data[edgeOffset * S + s]; weightOffset = weightIndex.data[edgeOffset * S + s] * M_in * M_out + m_in;
weightOffset = weightIndex.data[edgeOffset * S + s] * M_in * M_out;
for (m_out = 0; m_out < M_out; m_out++) { for (m_out = 0; m_out < M_out; m_out++) {
value += b * weight.data[weightOffset + m_in * M_out + m_out] * gradOutput.data[gradOutputOffset + m_out]; value += weight.data[weightOffset + M_in * m_out] * gradOutput.data[gradOutputOffset + m_out];
} }
value *= basis.data[edgeOffset * S + s];
} }
gradInput.data[i] = value; gradInput.data[i] = value;
} }
...@@ -60,7 +60,7 @@ __global__ void weightingBackwardWeightKernel(TensorInfo<Real> gradWeight, Tenso ...@@ -60,7 +60,7 @@ __global__ void weightingBackwardWeightKernel(TensorInfo<Real> gradWeight, Tenso
b = basis.data[edgeOffset * S + s]; b = basis.data[edgeOffset * S + s];
weightOffset = weightIndex.data[edgeOffset * S + s] * M_in * M_out + m_out; weightOffset = weightIndex.data[edgeOffset * S + s] * M_in * M_out + m_out;
for (m_in = 0; m_in < M_in; m_in++) { for (m_in = 0; m_in < M_in; m_in++) {
atomicAdd(&gradWeight.data[weightOffset + m_in * M_out], b * value * input.data[inputOffset + m_in * input.stride[1]]); gradWeight.data[weightOffset + m_in * M_out] += b * value * input.data[inputOffset + m_in * input.stride[1]];
} }
} }
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment