Converted AMOEBA to common platform (#3120)

* Began converting AMOEBA to common platform * Beginning of OpenCL platform for AMOEBA * Converted AmoebaVdwForce to common platform * Cleaned up reference AMOEBA tests * Began converting AmoebaMultipoleForce to common platform * Continue converting AmoebaMultipoleForce to common platform * Bug fixes * Bug fix * Continue converting AmoebaMultipoleForce to common platform * Converting AmoebaMultipoleForce and AmoebaGeneralizedKirkwoodForce to common platform * Converting AmoebaMultipoleForce and AmoebaGeneralizedKirkwoodForce to common platform * Creating OpenCL version of AmoebaMultipoleForce and AmoebaGeneralizedKirkwoodForce * Creating OpenCL version of AmoebaMultipoleForce and AmoebaGeneralizedKirkwoodForce * Creating OpenCL version of AmoebaMultipoleForce and AmoebaGeneralizedKirkwoodForce * Converted arrays from real3 to real * Bug fix to OpenCL AmoebaGeneralizedKirkwoodForce * Fixes for AMD GPUs * Began converting HippoNonbondedForce to common platform * Continuing to convert HippoNonbondedForce to common platform * Continuing to convert HippoNonbondedForce to common platform * Working on unifying PME kernels * Fixed error on devices without 64 bit atomics * Unified PME kernels * Converted HippoNonbondedForce to common platform * Creating OpenCL implementation of HippoNonbondedForce * Continuing OpenCL implementation of HippoNonbondedForce * Mostly finished OpenCL implementation of HippoNonbondedForce * Eliminated three component vector types in host code * Fix errors on CPU OpenCL * Skip double precision tests for AMOEBA on OpenCL * Bug fixes * Bug fixes * Fixed compilation error

Converted AMOEBA to common platform (#3120)
* Began converting AMOEBA to common platform * Beginning of OpenCL platform for AMOEBA * Converted AmoebaVdwForce to common platform * Cleaned up reference AMOEBA tests * Began converting AmoebaMultipoleForce to common platform * Continue converting AmoebaMultipoleForce to common platform * Bug fixes * Bug fix * Continue converting AmoebaMultipoleForce to common platform * Converting AmoebaMultipoleForce and AmoebaGeneralizedKirkwoodForce to common platform * Converting AmoebaMultipoleForce and AmoebaGeneralizedKirkwoodForce to common platform * Creating OpenCL version of AmoebaMultipoleForce and AmoebaGeneralizedKirkwoodForce * Creating OpenCL version of AmoebaMultipoleForce and AmoebaGeneralizedKirkwoodForce * Creating OpenCL version of AmoebaMultipoleForce and AmoebaGeneralizedKirkwoodForce * Converted arrays from real3 to real * Bug fix to OpenCL AmoebaGeneralizedKirkwoodForce * Fixes for AMD GPUs * Began converting HippoNonbondedForce to common platform * Continuing to convert HippoNonbondedForce to common platform * Continuing to convert HippoNonbondedForce to common platform * Working on unifying PME kernels * Fixed error on devices without 64 bit atomics * Unified PME kernels * Converted HippoNonbondedForce to common platform * Creating OpenCL implementation of HippoNonbondedForce * Continuing OpenCL implementation of HippoNonbondedForce * Mostly finished OpenCL implementation of HippoNonbondedForce * Eliminated three component vector types in host code * Fix errors on CPU OpenCL * Skip double precision tests for AMOEBA on OpenCL * Bug fixes * Bug fixes * Fixed compilation error
8e8923a7 · Peter Eastman · GitHub · 393a4dbd · 8e8923a7 · 8e8923a7
Unverified Commit 8e8923a7 authored May 22, 2021 by Peter Eastman Committed by GitHub May 22, 2021
20 changed files
--- a/plugins/amoeba/platforms/cuda/src/kernels/hippoComputeField.cu
+++ b/plugins/amoeba/platforms/cuda/src/kernels/hippoComputeField.cu
--- a/plugins/amoeba/platforms/cuda/src/kernels/hippoFixedField.cu
+++ b/plugins/amoeba/platforms/cuda/src/kernels/hippoFixedField.cu
@@ -32,7 +32,7 @@ real bn3 = (5*bn2+alsq2n*exp2a)*invR2;
 // Calculate the field at particle 1 due to multipoles at particle 2

 real fdamp3, fdamp5, fdamp7;
-computeDirectFieldDampingFactors(alpha2, r, fdamp3, fdamp5, fdamp7);
+computeDirectFieldDampingFactors(alpha2, r, &fdamp3, &fdamp5, &fdamp7);
 #ifndef COMPUTING_EXCEPTIONS
 real scale = 1;
 #endif
@@ -58,7 +58,7 @@ tempField1 = -delta*factor2 - dipole2*rr3j + qDotDelta2*2*rr5j;

 // Calculate the field at particle 2 due to multipoles at particle 1

-computeDirectFieldDampingFactors(alpha1, r, fdamp3, fdamp5, fdamp7);
+computeDirectFieldDampingFactors(alpha1, r, &fdamp3, &fdamp5, &fdamp7);
 #ifdef USE_EWALD
 real rr3i = bn1 - (1-scale*fdamp3)*invR3;
 real rr5i = bn2 - (1-scale*fdamp5)*3*invR5;

--- a/plugins/amoeba/platforms/cuda/src/kernels/hippoInteraction.cu
+++ b/plugins/amoeba/platforms/cuda/src/kernels/hippoInteraction.cu
--- a/plugins/amoeba/platforms/cuda/src/kernels/hippoInteractionHeader.cu
+++ b/plugins/amoeba/platforms/cuda/src/kernels/hippoInteractionHeader.cu
--- a/plugins/amoeba/platforms/cuda/src/kernels/hippoMultipoles.cu
+++ b/plugins/amoeba/platforms/cuda/src/kernels/hippoMultipoles.cu
--- a/plugins/amoeba/platforms/cuda/src/kernels/hippoMutualField.cu
+++ b/plugins/amoeba/platforms/cuda/src/kernels/hippoMutualField.cu
 real fdamp3, fdamp5;
-computeMutualFieldDampingFactors(alpha1, alpha2, r, fdamp3, fdamp5);
+computeMutualFieldDampingFactors(alpha1, alpha2, r, &fdamp3, &fdamp5);
 #ifdef COMPUTING_EXCEPTIONS
 fdamp3 *= scale;
 fdamp5 *= scale;

--- a/plugins/amoeba/platforms/cuda/src/kernels/hippoNonbonded.cu
+++ b/plugins/amoeba/platforms/cuda/src/kernels/hippoNonbonded.cu
--- a/plugins/amoeba/platforms/cuda/src/kernels/hippoNonbondedExceptions.cu
+++ b/plugins/amoeba/platforms/cuda/src/kernels/hippoNonbondedExceptions.cu
@@ -3,15 +3,15 @@
 /**
 * Compute exceptions for HIPPO.
 */
-extern "C" __global__ void computeNonbondedExceptions(
-        unsigned long long* __restrict__ forceBuffers, mixed* __restrict__ energyBuffer, unsigned long long* __restrict__ torqueBuffers,
-        const real4* __restrict__ posq, const real3* __restrict__ extDipole, const int2* __restrict__ exceptionAtoms, const real* __restrict__ mmScale,
-        const real* __restrict__ dmScale, const real* __restrict__ ddScale, const real* __restrict__ dispScale, const real* __restrict__ repScale, const real* __restrict__ ctScale,
-        const real* __restrict__ coreCharge, const real* __restrict__ valenceCharge, const real* __restrict__ alpha, const real* __restrict__ epsilon,
-        const real* __restrict__ damping, const real* __restrict__ c6, const real* __restrict__ pauliK, const real* __restrict__ pauliQ,
-        const real* __restrict__ pauliAlpha, const real3* __restrict__ dipole, const real3* __restrict__ inducedDipole, const real* __restrict__ qXX,
-        const real* __restrict__ qXY, const real* __restrict__ qXZ, const real* __restrict__ qYY, const real* __restrict__ qYZ,
-        const real3* __restrict__ extrapolatedDipole
+KERNEL void computeNonbondedExceptions(
+        GLOBAL mm_ulong* RESTRICT forceBuffers, GLOBAL mixed* RESTRICT energyBuffer, GLOBAL mm_ulong* RESTRICT torqueBuffers,
+        GLOBAL const real4* RESTRICT posq, GLOBAL const int2* RESTRICT exceptionAtoms, GLOBAL const real* RESTRICT mmScale,
+        GLOBAL const real* RESTRICT dmScale, GLOBAL const real* RESTRICT ddScale, GLOBAL const real* RESTRICT dispScale, GLOBAL const real* RESTRICT repScale, GLOBAL const real* RESTRICT ctScale,
+        GLOBAL const real* RESTRICT coreCharge, GLOBAL const real* RESTRICT valenceCharge, GLOBAL const real* RESTRICT alpha, GLOBAL const real* RESTRICT epsilon,
+        GLOBAL const real* RESTRICT damping, GLOBAL const real* RESTRICT c6, GLOBAL const real* RESTRICT pauliK, GLOBAL const real* RESTRICT pauliQ,
+        GLOBAL const real* RESTRICT pauliAlpha, GLOBAL const real* RESTRICT dipole, GLOBAL const real* RESTRICT inducedDipole, GLOBAL const real* RESTRICT qXX,
+        GLOBAL const real* RESTRICT qXY, GLOBAL const real* RESTRICT qXZ, GLOBAL const real* RESTRICT qYY, GLOBAL const real* RESTRICT qYZ,
+        GLOBAL const real* RESTRICT extrapolatedDipole
 #ifdef USE_CUTOFF
        , real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVec
 #endif
@@ -19,7 +19,7 @@ extern "C" __global__ void computeNonbondedExceptions(
    mixed energy = 0;
    const bool isExcluded = false;
    const real interactionScale = 1.0f;
-    for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_EXCEPTIONS; index += blockDim.x*gridDim.x) {
+    for (int index = GLOBAL_ID; index < NUM_EXCEPTIONS; index += GLOBAL_SIZE) {
        int2 atoms = exceptionAtoms[index];
        int atom1 = atoms.x;
        int atom2 = atoms.y;
@@ -42,8 +42,8 @@ extern "C" __global__ void computeNonbondedExceptions(
            real pauliK1 = pauliK[atom1];
            real pauliQ1 = pauliQ[atom1];
            real pauliAlpha1 = pauliAlpha[atom1];
-            real3 dipole1 = dipole[atom1];
-            real3 inducedDipole1 = inducedDipole[atom1];
+            real3 dipole1 = make_real3(dipole[3*atom1], dipole[3*atom1+1], dipole[3*atom1+2]);
+            real3 inducedDipole1 = make_real3(inducedDipole[3*atom1], inducedDipole[3*atom1+1], inducedDipole[3*atom1+2]);
            real qXX1 = qXX[atom1];
            real qXY1 = qXY[atom1];
            real qXZ1 = qXZ[atom1];
@@ -58,8 +58,8 @@ extern "C" __global__ void computeNonbondedExceptions(
            real pauliK2 = pauliK[atom2];
            real pauliQ2 = pauliQ[atom2];
            real pauliAlpha2 = pauliAlpha[atom2];
-            real3 dipole2 = dipole[atom2];
-            real3 inducedDipole2 = inducedDipole[atom2];
+            real3 dipole2 = make_real3(dipole[3*atom2], dipole[3*atom2+1], dipole[3*atom2+2]);
+            real3 inducedDipole2 = make_real3(inducedDipole[3*atom2], inducedDipole[3*atom2+1], inducedDipole[3*atom2+2]);
            real qXX2 = qXX[atom2];
            real qXY2 = qXY[atom2];
            real qXZ2 = qXZ[atom2];
@@ -79,21 +79,21 @@ extern "C" __global__ void computeNonbondedExceptions(
            real tempEnergy = 0.0f;
            COMPUTE_INTERACTION
            energy += tempEnergy;
-            atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>((long long) (tempForce.x*0x100000000)));
-            atomicAdd(&forceBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (tempForce.y*0x100000000)));
-            atomicAdd(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (tempForce.z*0x100000000)));
-            atomicAdd(&forceBuffers[atom2], static_cast<unsigned long long>((long long) (-tempForce.x*0x100000000)));
-            atomicAdd(&forceBuffers[atom2+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (-tempForce.y*0x100000000)));
-            atomicAdd(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (-tempForce.z*0x100000000)));
-            atomicAdd(&torqueBuffers[atom1], static_cast<unsigned long long>((long long) (tempTorque1.x*0x100000000)));
-            atomicAdd(&torqueBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (tempTorque1.y*0x100000000)));
-            atomicAdd(&torqueBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (tempTorque1.z*0x100000000)));
-            atomicAdd(&torqueBuffers[atom2], static_cast<unsigned long long>((long long) (tempTorque2.x*0x100000000)));
-            atomicAdd(&torqueBuffers[atom2+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (tempTorque2.y*0x100000000)));
-            atomicAdd(&torqueBuffers[atom2+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (tempTorque2.z*0x100000000)));
+            ATOMIC_ADD(&forceBuffers[atom1], (mm_ulong) ((mm_long) (tempForce.x*0x100000000)));
+            ATOMIC_ADD(&forceBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (tempForce.y*0x100000000)));
+            ATOMIC_ADD(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (tempForce.z*0x100000000)));
+            ATOMIC_ADD(&forceBuffers[atom2], (mm_ulong) ((mm_long) (-tempForce.x*0x100000000)));
+            ATOMIC_ADD(&forceBuffers[atom2+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (-tempForce.y*0x100000000)));
+            ATOMIC_ADD(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (-tempForce.z*0x100000000)));
+            ATOMIC_ADD(&torqueBuffers[atom1], (mm_ulong) ((mm_long) (tempTorque1.x*0x100000000)));
+            ATOMIC_ADD(&torqueBuffers[atom1+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (tempTorque1.y*0x100000000)));
+            ATOMIC_ADD(&torqueBuffers[atom1+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (tempTorque1.z*0x100000000)));
+            ATOMIC_ADD(&torqueBuffers[atom2], (mm_ulong) ((mm_long) (tempTorque2.x*0x100000000)));
+            ATOMIC_ADD(&torqueBuffers[atom2+PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (tempTorque2.y*0x100000000)));
+            ATOMIC_ADD(&torqueBuffers[atom2+2*PADDED_NUM_ATOMS], (mm_ulong) ((mm_long) (tempTorque2.z*0x100000000)));
 #ifdef USE_CUTOFF
        }
 #endif
    }
-    energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;
+    energyBuffer[GLOBAL_ID] += energy;
 }
--- a/plugins/amoeba/platforms/cuda/src/kernels/multipoleElectrostatics.cu
+++ b/plugins/amoeba/platforms/cuda/src/kernels/multipoleElectrostatics.cu
--- a/plugins/amoeba/platforms/cuda/src/kernels/multipoleFixedField.cu
+++ b/plugins/amoeba/platforms/cuda/src/kernels/multipoleFixedField.cu
--- a/plugins/amoeba/platforms/common/src/kernels/multipoleInducedField.cc
+++ b/plugins/amoeba/platforms/common/src/kernels/multipoleInducedField.cc
--- a/plugins/amoeba/platforms/common/src/kernels/multipolePme.cc
+++ b/plugins/amoeba/platforms/common/src/kernels/multipolePme.cc
--- a/plugins/amoeba/platforms/cuda/src/kernels/multipoles.cu
+++ b/plugins/amoeba/platforms/cuda/src/kernels/multipoles.cu
--- a/plugins/amoeba/platforms/cuda/src/kernels/pmeMultipoleElectrostatics.cu
+++ b/plugins/amoeba/platforms/cuda/src/kernels/pmeMultipoleElectrostatics.cu
--- a/plugins/amoeba/platforms/cuda/src/kernels/sphericalMultipoles.cu
+++ b/plugins/amoeba/platforms/cuda/src/kernels/sphericalMultipoles.cu
--- a/plugins/amoeba/platforms/cuda/CMakeLists.txt
+++ b/plugins/amoeba/platforms/cuda/CMakeLists.txt
--- a/plugins/amoeba/platforms/cuda/src/AmoebaCudaKernelFactory.cpp
+++ b/plugins/amoeba/platforms/cuda/src/AmoebaCudaKernelFactory.cpp
--- a/plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
+++ b/plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
--- a/plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.h
+++ b/plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.h
--- a/plugins/amoeba/platforms/cuda/src/kernels/gkEDiffPairForce.cu
+++ b/plugins/amoeba/platforms/cuda/src/kernels/gkEDiffPairForce.cu