Commit e06d55f2 authored by peastman's avatar peastman
Browse files

Fixed bug in RPMD contractions

parent fa6025df
......@@ -87,6 +87,7 @@ vector<string> RPMDIntegrator::getKernelNames() {
void RPMDIntegrator::setPositions(int copy, const vector<Vec3>& positions) {
kernel.getAs<IntegrateRPMDStepKernel>().setPositions(copy, positions);
forcesAreValid = false;
hasSetPosition = true;
}
......
......@@ -23,13 +23,16 @@ extern "C" __global__ void contractPositions(mixed4* posq, mixed4* contracted) {
const int indexInBlock = threadIdx.x-blockStart;
__shared__ mixed3 q[2*THREAD_BLOCK_SIZE];
__shared__ mixed3 temp[2*THREAD_BLOCK_SIZE];
__shared__ mixed2 w[NUM_COPIES];
__shared__ mixed2 w1[NUM_COPIES];
__shared__ mixed2 w2[NUM_CONTRACTED_COPIES];
mixed3* qreal = &q[blockStart];
mixed3* qimag = &q[blockStart+blockDim.x];
mixed3* tempreal = &temp[blockStart];
mixed3* tempimag = &temp[blockStart+blockDim.x];
if (threadIdx.x < NUM_COPIES)
w[indexInBlock] = make_mixed2(cos(-indexInBlock*2*M_PI/NUM_COPIES), sin(-indexInBlock*2*M_PI/NUM_COPIES));
w1[indexInBlock] = make_mixed2(cos(-indexInBlock*2*M_PI/NUM_COPIES), sin(-indexInBlock*2*M_PI/NUM_COPIES));
if (threadIdx.x < NUM_CONTRACTED_COPIES)
w2[indexInBlock] = make_mixed2(cos(-indexInBlock*2*M_PI/NUM_CONTRACTED_COPIES), sin(-indexInBlock*2*M_PI/NUM_CONTRACTED_COPIES));
__syncthreads();
for (int particle = (blockIdx.x*blockDim.x+threadIdx.x)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) {
// Load the particle position.
......@@ -41,6 +44,7 @@ extern "C" __global__ void contractPositions(mixed4* posq, mixed4* contracted) {
// Forward FFT.
__syncthreads();
mixed2* w = w1;
FFT_Q_FORWARD
if (NUM_CONTRACTED_COPIES > 1) {
// Compress the data to remove high frequencies.
......@@ -54,6 +58,7 @@ extern "C" __global__ void contractPositions(mixed4* posq, mixed4* contracted) {
qimag[indexInBlock] = tempimag[indexInBlock < start ? indexInBlock : indexInBlock+(NUM_COPIES-NUM_CONTRACTED_COPIES)];
}
__syncthreads();
w = w2;
FFT_Q_BACKWARD
}
......@@ -74,13 +79,16 @@ extern "C" __global__ void contractForces(long long* force, long long* contracte
const mixed forceScale = 1/(mixed) 0x100000000;
__shared__ mixed3 f[2*THREAD_BLOCK_SIZE];
__shared__ mixed3 temp[2*THREAD_BLOCK_SIZE];
__shared__ mixed2 w[NUM_COPIES];
__shared__ mixed2 w1[NUM_COPIES];
__shared__ mixed2 w2[NUM_CONTRACTED_COPIES];
mixed3* freal = &f[blockStart];
mixed3* fimag = &f[blockStart+blockDim.x];
mixed3* tempreal = &temp[blockStart];
mixed3* tempimag = &temp[blockStart+blockDim.x];
if (threadIdx.x < NUM_COPIES)
w[indexInBlock] = make_mixed2(cos(-indexInBlock*2*M_PI/NUM_COPIES), sin(-indexInBlock*2*M_PI/NUM_COPIES));
w1[indexInBlock] = make_mixed2(cos(-indexInBlock*2*M_PI/NUM_COPIES), sin(-indexInBlock*2*M_PI/NUM_COPIES));
if (threadIdx.x < NUM_CONTRACTED_COPIES)
w2[indexInBlock] = make_mixed2(cos(-indexInBlock*2*M_PI/NUM_CONTRACTED_COPIES), sin(-indexInBlock*2*M_PI/NUM_CONTRACTED_COPIES));
__syncthreads();
for (int particle = (blockIdx.x*blockDim.x+threadIdx.x)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) {
// Load the force.
......@@ -94,6 +102,7 @@ extern "C" __global__ void contractForces(long long* force, long long* contracte
// Forward FFT.
mixed2* w = w2;
if (NUM_CONTRACTED_COPIES > 1) {
FFT_F_FORWARD
}
......@@ -110,6 +119,7 @@ extern "C" __global__ void contractForces(long long* force, long long* contracte
fimag[indexInBlock] = (indexInBlock < end ? make_mixed3(0) : tempimag[indexInBlock-(NUM_COPIES-NUM_CONTRACTED_COPIES)]);
}
__syncthreads();
w = w1;
FFT_F_BACKWARD
// Store results.
......
......@@ -23,13 +23,16 @@ __kernel void contractPositions(__global mixed4* posq, __global mixed4* contract
const int indexInBlock = get_local_id(0)-blockStart;
__local mixed4 q[2*THREAD_BLOCK_SIZE];
__local mixed4 temp[2*THREAD_BLOCK_SIZE];
__local mixed2 w[NUM_COPIES];
__local mixed2 w1[NUM_COPIES];
__local mixed2 w2[NUM_CONTRACTED_COPIES];
__local mixed4* qreal = &q[blockStart];
__local mixed4* qimag = &q[blockStart+get_local_size(0)];
__local mixed4* tempreal = &temp[blockStart];
__local mixed4* tempimag = &temp[blockStart+get_local_size(0)];
if (get_local_id(0) < NUM_COPIES)
w[indexInBlock] = (mixed2) (cos(-indexInBlock*2*M_PI/NUM_COPIES), sin(-indexInBlock*2*M_PI/NUM_COPIES));
w1[indexInBlock] = (mixed2) (cos(-indexInBlock*2*M_PI/NUM_COPIES), sin(-indexInBlock*2*M_PI/NUM_COPIES));
if (get_local_id(0) < NUM_CONTRACTED_COPIES)
w2[indexInBlock] = (mixed2) (cos(-indexInBlock*2*M_PI/NUM_CONTRACTED_COPIES), sin(-indexInBlock*2*M_PI/NUM_CONTRACTED_COPIES));
barrier(CLK_LOCAL_MEM_FENCE);
for (int particle = get_global_id(0)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) {
// Load the particle position.
......@@ -41,6 +44,7 @@ __kernel void contractPositions(__global mixed4* posq, __global mixed4* contract
// Forward FFT.
barrier(CLK_LOCAL_MEM_FENCE);
__local mixed2* w = w1;
FFT_Q_FORWARD
if (NUM_CONTRACTED_COPIES > 1) {
// Compress the data to remove high frequencies.
......@@ -54,6 +58,7 @@ __kernel void contractPositions(__global mixed4* posq, __global mixed4* contract
qimag[indexInBlock] = tempimag[indexInBlock < start ? indexInBlock : indexInBlock+(NUM_COPIES-NUM_CONTRACTED_COPIES)];
}
barrier(CLK_LOCAL_MEM_FENCE);
w = w2;
FFT_Q_BACKWARD
}
......@@ -73,13 +78,16 @@ __kernel void contractForces(__global real4* force, __global real4* contracted)
const int indexInBlock = get_local_id(0)-blockStart;
__local mixed4 f[2*THREAD_BLOCK_SIZE];
__local mixed4 temp[2*THREAD_BLOCK_SIZE];
__local mixed2 w[NUM_COPIES];
__local mixed2 w1[NUM_COPIES];
__local mixed2 w2[NUM_CONTRACTED_COPIES];
__local mixed4* freal = &f[blockStart];
__local mixed4* fimag = &f[blockStart+get_local_size(0)];
__local mixed4* tempreal = &temp[blockStart];
__local mixed4* tempimag = &temp[blockStart+get_local_size(0)];
if (get_local_id(0) < NUM_COPIES)
w[indexInBlock] = (mixed2) (cos(-indexInBlock*2*M_PI/NUM_COPIES), sin(-indexInBlock*2*M_PI/NUM_COPIES));
w1[indexInBlock] = (mixed2) (cos(-indexInBlock*2*M_PI/NUM_COPIES), sin(-indexInBlock*2*M_PI/NUM_COPIES));
if (get_local_id(0) < NUM_CONTRACTED_COPIES)
w2[indexInBlock] = (mixed2) (cos(-indexInBlock*2*M_PI/NUM_CONTRACTED_COPIES), sin(-indexInBlock*2*M_PI/NUM_CONTRACTED_COPIES));
barrier(CLK_LOCAL_MEM_FENCE);
for (int particle = get_global_id(0)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) {
// Load the force.
......@@ -93,6 +101,7 @@ __kernel void contractForces(__global real4* force, __global real4* contracted)
// Forward FFT.
__local mixed2* w = w2;
if (NUM_CONTRACTED_COPIES > 1) {
FFT_F_FORWARD
}
......@@ -109,6 +118,7 @@ __kernel void contractForces(__global real4* force, __global real4* contracted)
fimag[indexInBlock] = (indexInBlock < end ? (mixed4) (0.0f, 0.0f, 0.0f, 0.0f) : tempimag[indexInBlock-(NUM_COPIES-NUM_CONTRACTED_COPIES)]);
}
barrier(CLK_LOCAL_MEM_FENCE);
w = w1;
FFT_F_BACKWARD
// Store results.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment