Bug fixes to AMOEBA kernels

a5640f95 · Peter Eastman · 42ddf5c3 · a5640f95 · a5640f95 · a5640f95
Commit a5640f95 authored Sep 25, 2012 by Peter Eastman
3 changed files
--- a/plugins/amoeba/platforms/cuda2/src/kernels/multipoles.cu
+++ b/plugins/amoeba/platforms/cuda2/src/kernels/multipoles.cu
@@ -410,15 +410,15 @@ extern "C" __global__ void mapTorqueToForce(unsigned long long* __restrict__ for
                // z-only
                forces[Z] = vector[UV]*dphi[V]/(norms[U]*angles[UV][1]);
-                forces[X] = make_float3(0);
+                forces[X] = make_real3(0);
-                forces[Y] = make_float3(0);
+                forces[Y] = make_real3(0);
                forces[I] = -forces[Z];
            }
            else {
-                forces[Z] = make_float3(0);
+                forces[Z] = make_real3(0);
-                forces[X] = make_float3(0);
+                forces[X] = make_real3(0);
-                forces[Y] = make_float3(0);
+                forces[Y] = make_real3(0);
-                forces[I] = make_float3(0);
+                forces[I] = make_real3(0);
            }
            // Store results
@@ -450,10 +450,11 @@ extern "C" __global__ void computePotentialAtPoints(const real4* __restrict__ po
        const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole, const real4* __restrict__ points,
        real* __restrict__ potential, int numPoints, real4 periodicBoxSize, real4 invPeriodicBoxSize) {
    extern __shared__ real4 localPosq[];
-    real3* localDipole = (real3*) &localPosq[NUM_ATOMS];
+    real3* localDipole = (real3*) &localPosq[blockDim.x];
-    real3* localInducedDipole = (real3*) &localDipole[NUM_ATOMS];
+    real3* localInducedDipole = (real3*) &localDipole[blockDim.x];
-    real* localQuadrupole = (real*) &localInducedDipole[NUM_ATOMS];
+    real* localQuadrupole = (real*) &localInducedDipole[blockDim.x];
-    for (int point = blockIdx.x*blockDim.x+threadIdx.x; point < numPoints; point += gridDim.x*blockDim.x) {
+    for (int basePoint = blockIdx.x*blockDim.x; basePoint < numPoints; basePoint += gridDim.x*blockDim.x) {
+        int point = basePoint+threadIdx.x;
        real4 pointPos = points[point];
        real p = 0;
        for (int baseAtom = 0; baseAtom < NUM_ATOMS; baseAtom += blockDim.x) {
@@ -475,6 +476,7 @@ extern "C" __global__ void computePotentialAtPoints(const real4* __restrict__ po
            // Loop over atoms and compute the potential at this point.
+            if (point < numPoints) {
                int end = min(blockDim.x, NUM_ATOMS-baseAtom);
                for (int i = 0; i < end; i++) {
                    real3 delta = trimTo3(localPosq[i]-pointPos);
@@ -498,6 +500,8 @@ extern "C" __global__ void computePotentialAtPoints(const real4* __restrict__ po
                    p += scq*rr5;
                }
            }
+            __syncthreads();
+        }
        potential[point] = p*ENERGY_SCALE_FACTOR;
    }
 }
--- a/plugins/amoeba/platforms/cuda2/src/kernels/pmeMultipoleElectrostatics.cu
+++ b/plugins/amoeba/platforms/cuda2/src/kernels/pmeMultipoleElectrostatics.cu
@@ -60,7 +60,7 @@ __device__ float computePScaleFactor(uint2 covalent, unsigned int polarizationGr
 __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, bool hasExclusions, float dScale, float pScale, float mScale, float forceFactor,
                                      real& energy, real4 periodicBoxSize, real4 invPeriodicBoxSize) {
-    float4 delta;
+    real4 delta;
    delta.x = atom2.pos.x - atom1.pos.x;
    delta.y = atom2.pos.y - atom1.pos.y;
    delta.z = atom2.pos.z - atom1.pos.z;
@@ -91,7 +91,7 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, bool has
    real rr2 = rr1*rr1;
    alsq2n *= alsq2;
-    float4 bn;
+    real4 bn;
    bn.x = (bn0+alsq2n*exp2a)*rr2;
    alsq2n *= alsq2;

--- a/plugins/amoeba/platforms/cuda2/tests/TestCudaAmoebaMultipoleForce.cpp
+++ b/plugins/amoeba/platforms/cuda2/tests/TestCudaAmoebaMultipoleForce.cpp
@@ -47,7 +47,7 @@
 #define ASSERT_EQUAL_TOL_MOD(expected, found, tol, testname) {double _scale_ = std::abs(expected) > 1.0 ? std::abs(expected) : 1.0; if (!(std::abs((expected)-(found))/_scale_ <= (tol))) {std::stringstream details; details << testname << " Expected "<<(expected)<<", found "<<(found); throwException(__FILE__, __LINE__, details.str());}};
-#define ASSERT_EQUAL_VEC_MOD(expected, found, tol,testname) {ASSERT_EQUAL_TOL_MOD((expected)[0], (found)[0], (tol),(testname)); ASSERT_EQUAL_TOL_MOD((expected)[1], (found)[1], (tol),(testname)); ASSERT_EQUAL_TOL_MOD((expected)[2], (found)[2], (tol),(testname));};
+#define ASSERT_EQUAL_VEC_MOD(expected, found, tol, testname) {double _norm_ = std::sqrt(expected.dot(expected)); double _scale_ = _norm_ > 1.0 ? _norm_ : 1.0; if ((std::abs((expected[0])-(found[0]))/_scale_ > (tol)) || (std::abs((expected[1])-(found[1]))/_scale_ > (tol)) || (std::abs((expected[2])-(found[2]))/_scale_ > (tol))) {std::stringstream details; details << testname << " Expected "<<(expected)<<", found "<<(found); throwException(__FILE__, __LINE__, details.str());}};
 using namespace OpenMM;
@@ -683,7 +683,7 @@ static void testMultipoleWaterPMEDirectPolarization( FILE* log ) {
    expectedForces[10]        = Vec3(   9.2036949e-01,  -1.4717629e+00,  -3.3362339e+00 );
    expectedForces[11]        = Vec3(   1.2523841e+00,  -1.9794292e+00,  -3.4670129e+00 );
-    double tolerance          = 3.0e-03;
+    double tolerance          = 1.0e-03;
    compareForcesEnergy( testName, expectedEnergy, energy, expectedForces, forces, tolerance, log );
 }
@@ -718,7 +718,7 @@ static void testMultipoleWaterPMEMutualPolarization( FILE* log ) {
    expectedForces[10]        = Vec3(   9.1775539e-01,  -1.4651882e+00,  -3.3322516e+00 );
    expectedForces[11]        = Vec3(   1.2467701e+00,  -1.9832979e+00,  -3.4684052e+00 );
-    double tolerance          = 3.0e-03;
+    double tolerance          = 1.0e-03;
    compareForcesEnergy( testName, expectedEnergy, energy, expectedForces, forces, tolerance, log );
 }
@@ -2677,7 +2677,7 @@ static void testPMEMutualPolarizationLargeWater( FILE* log ) {
    expectedForces[646]       = Vec3(  -6.4620310e+02,   2.5885783e+02,  -2.0567224e+02 );
    expectedForces[647]       = Vec3(  -4.7388806e+02,  -5.5561844e+02,  -8.5019295e+02 );
-    double tolerance          = 1.0e-03;
+    double tolerance          = 1.0e-04;
    compareForcesEnergy( testName, expectedEnergy, energy, expectedForces, forces, tolerance, log );
 }