Fixed bug in RPMD contractions

e06d55f2 · peastman · fa6025df · e06d55f2 · e06d55f2 · e06d55f2
Commit e06d55f2 authored Jun 30, 2015 by peastman
3 changed files
--- a/plugins/rpmd/openmmapi/src/RPMDIntegrator.cpp
+++ b/plugins/rpmd/openmmapi/src/RPMDIntegrator.cpp
@@ -87,6 +87,7 @@ vector<string> RPMDIntegrator::getKernelNames() {
 void RPMDIntegrator::setPositions(int copy, const vector<Vec3>& positions) {
    kernel.getAs<IntegrateRPMDStepKernel>().setPositions(copy, positions);
+    forcesAreValid = false;
    hasSetPosition = true;
 }

--- a/plugins/rpmd/platforms/cuda/src/kernels/rpmdContraction.cu
+++ b/plugins/rpmd/platforms/cuda/src/kernels/rpmdContraction.cu
@@ -23,13 +23,16 @@ extern "C" __global__ void contractPositions(mixed4* posq, mixed4* contracted) {
    const int indexInBlock = threadIdx.x-blockStart;
    __shared__ mixed3 q[2*THREAD_BLOCK_SIZE];
    __shared__ mixed3 temp[2*THREAD_BLOCK_SIZE];
-    __shared__ mixed2 w[NUM_COPIES];
+    __shared__ mixed2 w1[NUM_COPIES];
+    __shared__ mixed2 w2[NUM_CONTRACTED_COPIES];
    mixed3* qreal = &q[blockStart];
    mixed3* qimag = &q[blockStart+blockDim.x];
    mixed3* tempreal = &temp[blockStart];
    mixed3* tempimag = &temp[blockStart+blockDim.x];
    if (threadIdx.x < NUM_COPIES)
-        w[indexInBlock] = make_mixed2(cos(-indexInBlock*2*M_PI/NUM_COPIES), sin(-indexInBlock*2*M_PI/NUM_COPIES));
+        w1[indexInBlock] = make_mixed2(cos(-indexInBlock*2*M_PI/NUM_COPIES), sin(-indexInBlock*2*M_PI/NUM_COPIES));
+    if (threadIdx.x < NUM_CONTRACTED_COPIES)
+        w2[indexInBlock] = make_mixed2(cos(-indexInBlock*2*M_PI/NUM_CONTRACTED_COPIES), sin(-indexInBlock*2*M_PI/NUM_CONTRACTED_COPIES));
    __syncthreads();
    for (int particle = (blockIdx.x*blockDim.x+threadIdx.x)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) {
        // Load the particle position.
@@ -41,6 +44,7 @@ extern "C" __global__ void contractPositions(mixed4* posq, mixed4* contracted) {
        // Forward FFT.
        __syncthreads();
+        mixed2* w = w1;
        FFT_Q_FORWARD
        if (NUM_CONTRACTED_COPIES > 1) {
            // Compress the data to remove high frequencies.
@@ -54,6 +58,7 @@ extern "C" __global__ void contractPositions(mixed4* posq, mixed4* contracted) {
                qimag[indexInBlock] = tempimag[indexInBlock < start ? indexInBlock : indexInBlock+(NUM_COPIES-NUM_CONTRACTED_COPIES)];
            }
            __syncthreads();
+            w = w2;
            FFT_Q_BACKWARD
        }
@@ -74,13 +79,16 @@ extern "C" __global__ void contractForces(long long* force, long long* contracte
    const mixed forceScale = 1/(mixed) 0x100000000;
    __shared__ mixed3 f[2*THREAD_BLOCK_SIZE];
    __shared__ mixed3 temp[2*THREAD_BLOCK_SIZE];
-    __shared__ mixed2 w[NUM_COPIES];
+    __shared__ mixed2 w1[NUM_COPIES];
+    __shared__ mixed2 w2[NUM_CONTRACTED_COPIES];
    mixed3* freal = &f[blockStart];
    mixed3* fimag = &f[blockStart+blockDim.x];
    mixed3* tempreal = &temp[blockStart];
    mixed3* tempimag = &temp[blockStart+blockDim.x];
    if (threadIdx.x < NUM_COPIES)
-        w[indexInBlock] = make_mixed2(cos(-indexInBlock*2*M_PI/NUM_COPIES), sin(-indexInBlock*2*M_PI/NUM_COPIES));
+        w1[indexInBlock] = make_mixed2(cos(-indexInBlock*2*M_PI/NUM_COPIES), sin(-indexInBlock*2*M_PI/NUM_COPIES));
+    if (threadIdx.x < NUM_CONTRACTED_COPIES)
+        w2[indexInBlock] = make_mixed2(cos(-indexInBlock*2*M_PI/NUM_CONTRACTED_COPIES), sin(-indexInBlock*2*M_PI/NUM_CONTRACTED_COPIES));
    __syncthreads();
    for (int particle = (blockIdx.x*blockDim.x+threadIdx.x)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) {
        // Load the force.
@@ -94,6 +102,7 @@ extern "C" __global__ void contractForces(long long* force, long long* contracte
        // Forward FFT.
+        mixed2* w = w2;
        if (NUM_CONTRACTED_COPIES > 1) {
            FFT_F_FORWARD
        }
@@ -110,6 +119,7 @@ extern "C" __global__ void contractForces(long long* force, long long* contracte
            fimag[indexInBlock] = (indexInBlock < end ? make_mixed3(0) : tempimag[indexInBlock-(NUM_COPIES-NUM_CONTRACTED_COPIES)]);
        }
        __syncthreads();
+        w = w1;
        FFT_F_BACKWARD
        // Store results.

--- a/plugins/rpmd/platforms/opencl/src/kernels/rpmdContraction.cl
+++ b/plugins/rpmd/platforms/opencl/src/kernels/rpmdContraction.cl
@@ -23,13 +23,16 @@ __kernel void contractPositions(__global mixed4* posq, __global mixed4* contract
    const int indexInBlock = get_local_id(0)-blockStart;
    __local mixed4 q[2*THREAD_BLOCK_SIZE];
    __local mixed4 temp[2*THREAD_BLOCK_SIZE];
-    __local mixed2 w[NUM_COPIES];
+    __local mixed2 w1[NUM_COPIES];
+    __local mixed2 w2[NUM_CONTRACTED_COPIES];
    __local mixed4* qreal = &q[blockStart];
    __local mixed4* qimag = &q[blockStart+get_local_size(0)];
    __local mixed4* tempreal = &temp[blockStart];
    __local mixed4* tempimag = &temp[blockStart+get_local_size(0)];
    if (get_local_id(0) < NUM_COPIES)
-        w[indexInBlock] = (mixed2) (cos(-indexInBlock*2*M_PI/NUM_COPIES), sin(-indexInBlock*2*M_PI/NUM_COPIES));
+        w1[indexInBlock] = (mixed2) (cos(-indexInBlock*2*M_PI/NUM_COPIES), sin(-indexInBlock*2*M_PI/NUM_COPIES));
+    if (get_local_id(0) < NUM_CONTRACTED_COPIES)
+        w2[indexInBlock] = (mixed2) (cos(-indexInBlock*2*M_PI/NUM_CONTRACTED_COPIES), sin(-indexInBlock*2*M_PI/NUM_CONTRACTED_COPIES));
    barrier(CLK_LOCAL_MEM_FENCE);
    for (int particle = get_global_id(0)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) {
        // Load the particle position.
@@ -41,6 +44,7 @@ __kernel void contractPositions(__global mixed4* posq, __global mixed4* contract
        // Forward FFT.
        barrier(CLK_LOCAL_MEM_FENCE);
+        __local mixed2* w = w1;
        FFT_Q_FORWARD
        if (NUM_CONTRACTED_COPIES > 1) {
            // Compress the data to remove high frequencies.
@@ -54,6 +58,7 @@ __kernel void contractPositions(__global mixed4* posq, __global mixed4* contract
                qimag[indexInBlock] = tempimag[indexInBlock < start ? indexInBlock : indexInBlock+(NUM_COPIES-NUM_CONTRACTED_COPIES)];
            }
            barrier(CLK_LOCAL_MEM_FENCE);
+            w = w2;
            FFT_Q_BACKWARD
        }
@@ -73,13 +78,16 @@ __kernel void contractForces(__global real4* force, __global real4* contracted)
    const int indexInBlock = get_local_id(0)-blockStart;
    __local mixed4 f[2*THREAD_BLOCK_SIZE];
    __local mixed4 temp[2*THREAD_BLOCK_SIZE];
-    __local mixed2 w[NUM_COPIES];
+    __local mixed2 w1[NUM_COPIES];
+    __local mixed2 w2[NUM_CONTRACTED_COPIES];
    __local mixed4* freal = &f[blockStart];
    __local mixed4* fimag = &f[blockStart+get_local_size(0)];
    __local mixed4* tempreal = &temp[blockStart];
    __local mixed4* tempimag = &temp[blockStart+get_local_size(0)];
    if (get_local_id(0) < NUM_COPIES)
-        w[indexInBlock] = (mixed2) (cos(-indexInBlock*2*M_PI/NUM_COPIES), sin(-indexInBlock*2*M_PI/NUM_COPIES));
+        w1[indexInBlock] = (mixed2) (cos(-indexInBlock*2*M_PI/NUM_COPIES), sin(-indexInBlock*2*M_PI/NUM_COPIES));
+    if (get_local_id(0) < NUM_CONTRACTED_COPIES)
+        w2[indexInBlock] = (mixed2) (cos(-indexInBlock*2*M_PI/NUM_CONTRACTED_COPIES), sin(-indexInBlock*2*M_PI/NUM_CONTRACTED_COPIES));
    barrier(CLK_LOCAL_MEM_FENCE);
    for (int particle = get_global_id(0)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) {
        // Load the force.
@@ -93,6 +101,7 @@ __kernel void contractForces(__global real4* force, __global real4* contracted)
        // Forward FFT.
+        __local mixed2* w = w2;
        if (NUM_CONTRACTED_COPIES > 1) {
            FFT_F_FORWARD
        }
@@ -109,6 +118,7 @@ __kernel void contractForces(__global real4* force, __global real4* contracted)
            fimag[indexInBlock] = (indexInBlock < end ? (mixed4) (0.0f, 0.0f, 0.0f, 0.0f) : tempimag[indexInBlock-(NUM_COPIES-NUM_CONTRACTED_COPIES)]);
        }
        barrier(CLK_LOCAL_MEM_FENCE);
+        w = w1;
        FFT_F_BACKWARD
        // Store results.