Commit e06d55f2 authored by peastman's avatar peastman
Browse files

Fixed bug in RPMD contractions

parent fa6025df
...@@ -87,6 +87,7 @@ vector<string> RPMDIntegrator::getKernelNames() { ...@@ -87,6 +87,7 @@ vector<string> RPMDIntegrator::getKernelNames() {
void RPMDIntegrator::setPositions(int copy, const vector<Vec3>& positions) { void RPMDIntegrator::setPositions(int copy, const vector<Vec3>& positions) {
kernel.getAs<IntegrateRPMDStepKernel>().setPositions(copy, positions); kernel.getAs<IntegrateRPMDStepKernel>().setPositions(copy, positions);
forcesAreValid = false;
hasSetPosition = true; hasSetPosition = true;
} }
......
...@@ -23,13 +23,16 @@ extern "C" __global__ void contractPositions(mixed4* posq, mixed4* contracted) { ...@@ -23,13 +23,16 @@ extern "C" __global__ void contractPositions(mixed4* posq, mixed4* contracted) {
const int indexInBlock = threadIdx.x-blockStart; const int indexInBlock = threadIdx.x-blockStart;
__shared__ mixed3 q[2*THREAD_BLOCK_SIZE]; __shared__ mixed3 q[2*THREAD_BLOCK_SIZE];
__shared__ mixed3 temp[2*THREAD_BLOCK_SIZE]; __shared__ mixed3 temp[2*THREAD_BLOCK_SIZE];
__shared__ mixed2 w[NUM_COPIES]; __shared__ mixed2 w1[NUM_COPIES];
__shared__ mixed2 w2[NUM_CONTRACTED_COPIES];
mixed3* qreal = &q[blockStart]; mixed3* qreal = &q[blockStart];
mixed3* qimag = &q[blockStart+blockDim.x]; mixed3* qimag = &q[blockStart+blockDim.x];
mixed3* tempreal = &temp[blockStart]; mixed3* tempreal = &temp[blockStart];
mixed3* tempimag = &temp[blockStart+blockDim.x]; mixed3* tempimag = &temp[blockStart+blockDim.x];
if (threadIdx.x < NUM_COPIES) if (threadIdx.x < NUM_COPIES)
w[indexInBlock] = make_mixed2(cos(-indexInBlock*2*M_PI/NUM_COPIES), sin(-indexInBlock*2*M_PI/NUM_COPIES)); w1[indexInBlock] = make_mixed2(cos(-indexInBlock*2*M_PI/NUM_COPIES), sin(-indexInBlock*2*M_PI/NUM_COPIES));
if (threadIdx.x < NUM_CONTRACTED_COPIES)
w2[indexInBlock] = make_mixed2(cos(-indexInBlock*2*M_PI/NUM_CONTRACTED_COPIES), sin(-indexInBlock*2*M_PI/NUM_CONTRACTED_COPIES));
__syncthreads(); __syncthreads();
for (int particle = (blockIdx.x*blockDim.x+threadIdx.x)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) { for (int particle = (blockIdx.x*blockDim.x+threadIdx.x)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) {
// Load the particle position. // Load the particle position.
...@@ -41,6 +44,7 @@ extern "C" __global__ void contractPositions(mixed4* posq, mixed4* contracted) { ...@@ -41,6 +44,7 @@ extern "C" __global__ void contractPositions(mixed4* posq, mixed4* contracted) {
// Forward FFT. // Forward FFT.
__syncthreads(); __syncthreads();
mixed2* w = w1;
FFT_Q_FORWARD FFT_Q_FORWARD
if (NUM_CONTRACTED_COPIES > 1) { if (NUM_CONTRACTED_COPIES > 1) {
// Compress the data to remove high frequencies. // Compress the data to remove high frequencies.
...@@ -54,6 +58,7 @@ extern "C" __global__ void contractPositions(mixed4* posq, mixed4* contracted) { ...@@ -54,6 +58,7 @@ extern "C" __global__ void contractPositions(mixed4* posq, mixed4* contracted) {
qimag[indexInBlock] = tempimag[indexInBlock < start ? indexInBlock : indexInBlock+(NUM_COPIES-NUM_CONTRACTED_COPIES)]; qimag[indexInBlock] = tempimag[indexInBlock < start ? indexInBlock : indexInBlock+(NUM_COPIES-NUM_CONTRACTED_COPIES)];
} }
__syncthreads(); __syncthreads();
w = w2;
FFT_Q_BACKWARD FFT_Q_BACKWARD
} }
...@@ -74,13 +79,16 @@ extern "C" __global__ void contractForces(long long* force, long long* contracte ...@@ -74,13 +79,16 @@ extern "C" __global__ void contractForces(long long* force, long long* contracte
const mixed forceScale = 1/(mixed) 0x100000000; const mixed forceScale = 1/(mixed) 0x100000000;
__shared__ mixed3 f[2*THREAD_BLOCK_SIZE]; __shared__ mixed3 f[2*THREAD_BLOCK_SIZE];
__shared__ mixed3 temp[2*THREAD_BLOCK_SIZE]; __shared__ mixed3 temp[2*THREAD_BLOCK_SIZE];
__shared__ mixed2 w[NUM_COPIES]; __shared__ mixed2 w1[NUM_COPIES];
__shared__ mixed2 w2[NUM_CONTRACTED_COPIES];
mixed3* freal = &f[blockStart]; mixed3* freal = &f[blockStart];
mixed3* fimag = &f[blockStart+blockDim.x]; mixed3* fimag = &f[blockStart+blockDim.x];
mixed3* tempreal = &temp[blockStart]; mixed3* tempreal = &temp[blockStart];
mixed3* tempimag = &temp[blockStart+blockDim.x]; mixed3* tempimag = &temp[blockStart+blockDim.x];
if (threadIdx.x < NUM_COPIES) if (threadIdx.x < NUM_COPIES)
w[indexInBlock] = make_mixed2(cos(-indexInBlock*2*M_PI/NUM_COPIES), sin(-indexInBlock*2*M_PI/NUM_COPIES)); w1[indexInBlock] = make_mixed2(cos(-indexInBlock*2*M_PI/NUM_COPIES), sin(-indexInBlock*2*M_PI/NUM_COPIES));
if (threadIdx.x < NUM_CONTRACTED_COPIES)
w2[indexInBlock] = make_mixed2(cos(-indexInBlock*2*M_PI/NUM_CONTRACTED_COPIES), sin(-indexInBlock*2*M_PI/NUM_CONTRACTED_COPIES));
__syncthreads(); __syncthreads();
for (int particle = (blockIdx.x*blockDim.x+threadIdx.x)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) { for (int particle = (blockIdx.x*blockDim.x+threadIdx.x)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) {
// Load the force. // Load the force.
...@@ -94,6 +102,7 @@ extern "C" __global__ void contractForces(long long* force, long long* contracte ...@@ -94,6 +102,7 @@ extern "C" __global__ void contractForces(long long* force, long long* contracte
// Forward FFT. // Forward FFT.
mixed2* w = w2;
if (NUM_CONTRACTED_COPIES > 1) { if (NUM_CONTRACTED_COPIES > 1) {
FFT_F_FORWARD FFT_F_FORWARD
} }
...@@ -110,6 +119,7 @@ extern "C" __global__ void contractForces(long long* force, long long* contracte ...@@ -110,6 +119,7 @@ extern "C" __global__ void contractForces(long long* force, long long* contracte
fimag[indexInBlock] = (indexInBlock < end ? make_mixed3(0) : tempimag[indexInBlock-(NUM_COPIES-NUM_CONTRACTED_COPIES)]); fimag[indexInBlock] = (indexInBlock < end ? make_mixed3(0) : tempimag[indexInBlock-(NUM_COPIES-NUM_CONTRACTED_COPIES)]);
} }
__syncthreads(); __syncthreads();
w = w1;
FFT_F_BACKWARD FFT_F_BACKWARD
// Store results. // Store results.
......
...@@ -23,13 +23,16 @@ __kernel void contractPositions(__global mixed4* posq, __global mixed4* contract ...@@ -23,13 +23,16 @@ __kernel void contractPositions(__global mixed4* posq, __global mixed4* contract
const int indexInBlock = get_local_id(0)-blockStart; const int indexInBlock = get_local_id(0)-blockStart;
__local mixed4 q[2*THREAD_BLOCK_SIZE]; __local mixed4 q[2*THREAD_BLOCK_SIZE];
__local mixed4 temp[2*THREAD_BLOCK_SIZE]; __local mixed4 temp[2*THREAD_BLOCK_SIZE];
__local mixed2 w[NUM_COPIES]; __local mixed2 w1[NUM_COPIES];
__local mixed2 w2[NUM_CONTRACTED_COPIES];
__local mixed4* qreal = &q[blockStart]; __local mixed4* qreal = &q[blockStart];
__local mixed4* qimag = &q[blockStart+get_local_size(0)]; __local mixed4* qimag = &q[blockStart+get_local_size(0)];
__local mixed4* tempreal = &temp[blockStart]; __local mixed4* tempreal = &temp[blockStart];
__local mixed4* tempimag = &temp[blockStart+get_local_size(0)]; __local mixed4* tempimag = &temp[blockStart+get_local_size(0)];
if (get_local_id(0) < NUM_COPIES) if (get_local_id(0) < NUM_COPIES)
w[indexInBlock] = (mixed2) (cos(-indexInBlock*2*M_PI/NUM_COPIES), sin(-indexInBlock*2*M_PI/NUM_COPIES)); w1[indexInBlock] = (mixed2) (cos(-indexInBlock*2*M_PI/NUM_COPIES), sin(-indexInBlock*2*M_PI/NUM_COPIES));
if (get_local_id(0) < NUM_CONTRACTED_COPIES)
w2[indexInBlock] = (mixed2) (cos(-indexInBlock*2*M_PI/NUM_CONTRACTED_COPIES), sin(-indexInBlock*2*M_PI/NUM_CONTRACTED_COPIES));
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
for (int particle = get_global_id(0)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) { for (int particle = get_global_id(0)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) {
// Load the particle position. // Load the particle position.
...@@ -41,6 +44,7 @@ __kernel void contractPositions(__global mixed4* posq, __global mixed4* contract ...@@ -41,6 +44,7 @@ __kernel void contractPositions(__global mixed4* posq, __global mixed4* contract
// Forward FFT. // Forward FFT.
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
__local mixed2* w = w1;
FFT_Q_FORWARD FFT_Q_FORWARD
if (NUM_CONTRACTED_COPIES > 1) { if (NUM_CONTRACTED_COPIES > 1) {
// Compress the data to remove high frequencies. // Compress the data to remove high frequencies.
...@@ -54,6 +58,7 @@ __kernel void contractPositions(__global mixed4* posq, __global mixed4* contract ...@@ -54,6 +58,7 @@ __kernel void contractPositions(__global mixed4* posq, __global mixed4* contract
qimag[indexInBlock] = tempimag[indexInBlock < start ? indexInBlock : indexInBlock+(NUM_COPIES-NUM_CONTRACTED_COPIES)]; qimag[indexInBlock] = tempimag[indexInBlock < start ? indexInBlock : indexInBlock+(NUM_COPIES-NUM_CONTRACTED_COPIES)];
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
w = w2;
FFT_Q_BACKWARD FFT_Q_BACKWARD
} }
...@@ -73,13 +78,16 @@ __kernel void contractForces(__global real4* force, __global real4* contracted) ...@@ -73,13 +78,16 @@ __kernel void contractForces(__global real4* force, __global real4* contracted)
const int indexInBlock = get_local_id(0)-blockStart; const int indexInBlock = get_local_id(0)-blockStart;
__local mixed4 f[2*THREAD_BLOCK_SIZE]; __local mixed4 f[2*THREAD_BLOCK_SIZE];
__local mixed4 temp[2*THREAD_BLOCK_SIZE]; __local mixed4 temp[2*THREAD_BLOCK_SIZE];
__local mixed2 w[NUM_COPIES]; __local mixed2 w1[NUM_COPIES];
__local mixed2 w2[NUM_CONTRACTED_COPIES];
__local mixed4* freal = &f[blockStart]; __local mixed4* freal = &f[blockStart];
__local mixed4* fimag = &f[blockStart+get_local_size(0)]; __local mixed4* fimag = &f[blockStart+get_local_size(0)];
__local mixed4* tempreal = &temp[blockStart]; __local mixed4* tempreal = &temp[blockStart];
__local mixed4* tempimag = &temp[blockStart+get_local_size(0)]; __local mixed4* tempimag = &temp[blockStart+get_local_size(0)];
if (get_local_id(0) < NUM_COPIES) if (get_local_id(0) < NUM_COPIES)
w[indexInBlock] = (mixed2) (cos(-indexInBlock*2*M_PI/NUM_COPIES), sin(-indexInBlock*2*M_PI/NUM_COPIES)); w1[indexInBlock] = (mixed2) (cos(-indexInBlock*2*M_PI/NUM_COPIES), sin(-indexInBlock*2*M_PI/NUM_COPIES));
if (get_local_id(0) < NUM_CONTRACTED_COPIES)
w2[indexInBlock] = (mixed2) (cos(-indexInBlock*2*M_PI/NUM_CONTRACTED_COPIES), sin(-indexInBlock*2*M_PI/NUM_CONTRACTED_COPIES));
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
for (int particle = get_global_id(0)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) { for (int particle = get_global_id(0)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) {
// Load the force. // Load the force.
...@@ -93,6 +101,7 @@ __kernel void contractForces(__global real4* force, __global real4* contracted) ...@@ -93,6 +101,7 @@ __kernel void contractForces(__global real4* force, __global real4* contracted)
// Forward FFT. // Forward FFT.
__local mixed2* w = w2;
if (NUM_CONTRACTED_COPIES > 1) { if (NUM_CONTRACTED_COPIES > 1) {
FFT_F_FORWARD FFT_F_FORWARD
} }
...@@ -109,6 +118,7 @@ __kernel void contractForces(__global real4* force, __global real4* contracted) ...@@ -109,6 +118,7 @@ __kernel void contractForces(__global real4* force, __global real4* contracted)
fimag[indexInBlock] = (indexInBlock < end ? (mixed4) (0.0f, 0.0f, 0.0f, 0.0f) : tempimag[indexInBlock-(NUM_COPIES-NUM_CONTRACTED_COPIES)]); fimag[indexInBlock] = (indexInBlock < end ? (mixed4) (0.0f, 0.0f, 0.0f, 0.0f) : tempimag[indexInBlock-(NUM_COPIES-NUM_CONTRACTED_COPIES)]);
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
w = w1;
FFT_F_BACKWARD FFT_F_BACKWARD
// Store results. // Store results.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment