Converted more code to common platform (#3073)

* Converted more code to common platform * Converted more code to common platform

Converted more code to common platform (#3073)
* Converted more code to common platform * Converted more code to common platform
98d81730 · Peter Eastman · GitHub · 72c70cfe · 98d81730 · 98d81730
Unverified Commit 98d81730 authored Mar 19, 2021 by Peter Eastman Committed by GitHub Mar 19, 2021
14 changed files
--- a/platforms/opencl/src/OpenCLContext.cpp
+++ b/platforms/opencl/src/OpenCLContext.cpp
@@ -413,6 +413,8 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
    compilationDefines["ACOS"] = "acos";
    compilationDefines["ASIN"] = "asin";
    compilationDefines["ATAN"] = "atan";
+    compilationDefines["ERF"] = "erf";
+    compilationDefines["ERFC"] = "erfc";

    // Set defines for applying periodic boundary conditions.


--- a/platforms/opencl/src/OpenCLKernelFactory.cpp
+++ b/platforms/opencl/src/OpenCLKernelFactory.cpp
@@ -72,9 +72,9 @@ KernelImpl* OpenCLKernelFactory::createKernelImpl(std::string name, const Platfo
    if (name == UpdateStateDataKernel::Name())
        return new OpenCLUpdateStateDataKernel(name, platform, cl);
    if (name == ApplyConstraintsKernel::Name())
-        return new OpenCLApplyConstraintsKernel(name, platform, cl);
+        return new CommonApplyConstraintsKernel(name, platform, cl);
    if (name == VirtualSitesKernel::Name())
-        return new OpenCLVirtualSitesKernel(name, platform, cl);
+        return new CommonVirtualSitesKernel(name, platform, cl);
    if (name == CalcHarmonicBondForceKernel::Name())
        return new CommonCalcHarmonicBondForceKernel(name, platform, cl, context.getSystem());
    if (name == CalcCustomBondForceKernel::Name())
@@ -134,7 +134,7 @@ KernelImpl* OpenCLKernelFactory::createKernelImpl(std::string name, const Platfo
    if (name == IntegrateNoseHooverStepKernel::Name())
        return new CommonIntegrateNoseHooverStepKernel(name, platform, cl);
    if (name == ApplyMonteCarloBarostatKernel::Name())
-        return new OpenCLApplyMonteCarloBarostatKernel(name, platform, cl);
+        return new CommonApplyMonteCarloBarostatKernel(name, platform, cl);
    if (name == RemoveCMMotionKernel::Name())
        return new CommonRemoveCMMotionKernel(name, platform, cl);
    throw OpenMMException((std::string("Tried to create kernel with illegal kernel name '")+name+"'").c_str());

--- a/platforms/opencl/src/OpenCLKernels.cpp
+++ b/platforms/opencl/src/OpenCLKernels.cpp
--- a/platforms/opencl/src/kernels/angleForce.cl
+++ b/platforms/opencl/src/kernels/angleForce.cl
-real4 v0 = pos2-pos1;
-real4 v1 = pos2-pos3;
-#if APPLY_PERIODIC
-APPLY_PERIODIC_TO_DELTA(v0)
-APPLY_PERIODIC_TO_DELTA(v1)
-#endif
-real4 cp = cross(v0, v1);
-real rp = cp.x*cp.x + cp.y*cp.y + cp.z*cp.z;
-rp = max(SQRT(rp), (real) 1.0e-06f);
-real r21 = v0.x*v0.x + v0.y*v0.y + v0.z*v0.z;
-real r23 = v1.x*v1.x + v1.y*v1.y + v1.z*v1.z;
-real dot = v0.x*v1.x + v0.y*v1.y + v0.z*v1.z;
-real cosine = clamp(dot*RSQRT(r21*r23), (real) -1, (real) 1);
-real theta = acos(cosine);
-COMPUTE_FORCE
-real4 force1 = cross(v0, cp)*(dEdAngle/(r21*rp));
-real4 force3 = cross(cp, v1)*(dEdAngle/(r23*rp));
-real4 force2 = -force1-force3;
--- a/platforms/opencl/src/kernels/bondForce.cl
+++ b/platforms/opencl/src/kernels/bondForce.cl
-real4 delta = pos2-pos1;
-#if APPLY_PERIODIC
-APPLY_PERIODIC_TO_DELTA(delta)
-#endif
-real r = SQRT(delta.x*delta.x + delta.y*delta.y + delta.z*delta.z);
-COMPUTE_FORCE
-dEdR = (r > 0.0f) ? (dEdR / r) : 0.0f;
-delta.xyz *= dEdR;
-real4 force1 = delta;
-real4 force2 = -delta;
\ No newline at end of file
--- a/platforms/opencl/src/kernels/constraints.cl
+++ b/platforms/opencl/src/kernels/constraints.cl
-__kernel void applyPositionDeltas(__global real4* restrict posq, __global real4* restrict posqCorrection, __global mixed4* restrict posDelta) {
-    for (unsigned int index = get_global_id(0); index < NUM_ATOMS; index += get_global_size(0)) {
-#ifdef USE_MIXED_PRECISION
-        real4 pos1 = posq[index];
-        real4 pos2 = posqCorrection[index];
-        mixed4 pos = (mixed4) (pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
-#else
-        mixed4 pos = posq[index];
-#endif
-        pos.xyz += posDelta[index].xyz;
-#ifdef USE_MIXED_PRECISION
-        posq[index] = (real4) ((real) pos.x, (real) pos.y, (real) pos.z, (real) pos.w);
-        posqCorrection[index] = (real4) (pos.x-(real) pos.x, pos.y-(real) pos.y, pos.z-(real) pos.z, 0);
-#else
-        posq[index] = pos;
-#endif
-    }
-}
--- a/platforms/opencl/src/kernels/coulombLennardJones.cl
+++ b/platforms/opencl/src/kernels/coulombLennardJones.cl
-{
-#ifdef USE_DOUBLE_PRECISION
-    unsigned long includeInteraction;
-#else
-    unsigned int includeInteraction;
-#endif
-#if USE_EWALD
-    includeInteraction = (!isExcluded && r2 < CUTOFF_SQUARED);
-    const real alphaR = EWALD_ALPHA*r;
-    const real expAlphaRSqr = EXP(-alphaR*alphaR);
-#if HAS_COULOMB
-    const real prefactor = ONE_4PI_EPS0*CHARGE1*CHARGE2*invR;
-#else
-    const real prefactor = 0.0f;
-#endif
-
-#ifdef USE_DOUBLE_PRECISION
-    const real erfcAlphaR = erfc(alphaR);
-#else
-    // This approximation for erfc is from Abramowitz and Stegun (1964) p. 299.  They cite the following as
-    // the original source: C. Hastings, Jr., Approximations for Digital Computers (1955).  It has a maximum
-    // error of 1.5e-7.
-
-    const real t = RECIP(1.0f+0.3275911f*alphaR);
-    const real erfcAlphaR = (0.254829592f+(-0.284496736f+(1.421413741f+(-1.453152027f+1.061405429f*t)*t)*t)*t)*t*expAlphaRSqr;
-#endif
-    real tempForce = 0;
-#if HAS_LENNARD_JONES
-    real sig = SIGMA_EPSILON1.x + SIGMA_EPSILON2.x;
-    real sig2 = invR*sig;
-    sig2 *= sig2;
-    real sig6 = sig2*sig2*sig2;
-    real eps = SIGMA_EPSILON1.y*SIGMA_EPSILON2.y;
-    real epssig6 = sig6*eps;
-    tempForce = epssig6*(12.0f*sig6 - 6.0f);
-    real ljEnergy = epssig6*(sig6 - 1.0f);
-    #if USE_LJ_SWITCH
-    if (r > LJ_SWITCH_CUTOFF) {
-        real x = r-LJ_SWITCH_CUTOFF;
-        real switchValue = 1+x*x*x*(LJ_SWITCH_C3+x*(LJ_SWITCH_C4+x*LJ_SWITCH_C5));
-        real switchDeriv = x*x*(3*LJ_SWITCH_C3+x*(4*LJ_SWITCH_C4+x*5*LJ_SWITCH_C5));
-        tempForce = tempForce*switchValue - ljEnergy*switchDeriv*r;
-        ljEnergy *= switchValue;
-    }
-    #endif
-#if DO_LJPME
-    // The multiplicative term to correct for the multiplicative terms that are always
-    // present in reciprocal space.
-    const real dispersionAlphaR = EWALD_DISPERSION_ALPHA*r;
-    const real dar2 = dispersionAlphaR*dispersionAlphaR;
-    const real dar4 = dar2*dar2;
-    const real dar6 = dar4*dar2;
-    const real invR2 = invR*invR;
-    const real expDar2 = EXP(-dar2);
-    const float2 sigExpProd = SIGMA_EPSILON1*SIGMA_EPSILON2;
-    const real c6 = 64*sigExpProd.x*sigExpProd.x*sigExpProd.x*sigExpProd.y;
-    const real coef = invR2*invR2*invR2*c6;
-    const real eprefac = 1.0f + dar2 + 0.5f*dar4;
-    const real dprefac = eprefac + dar6/6.0f;
-    // The multiplicative grid term
-    ljEnergy += coef*(1.0f - expDar2*eprefac);
-    tempForce += 6.0f*coef*(1.0f - expDar2*dprefac);
-    // The potential shift accounts for the step at the cutoff introduced by the
-    // transition from additive to multiplicative combintion rules and is only
-    // needed for the real (not excluded) terms.  By addin these terms to ljEnergy
-    // instead of tempEnergy here, the includeInteraction mask is correctly applied.
-    sig2 = sig*sig;
-    sig6 = sig2*sig2*sig2*INVCUT6;
-    epssig6 = eps*sig6;
-    // The additive part of the potential shift
-    ljEnergy += epssig6*(1.0f - sig6);
-    // The multiplicative part of the potential shift
-    ljEnergy += MULTSHIFT6*c6;
-#endif
-    tempForce += prefactor*(erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
-    tempEnergy += select((real) 0, ljEnergy + prefactor*erfcAlphaR, includeInteraction);
-#else
-    tempForce = prefactor*(erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
-    tempEnergy += select((real) 0, prefactor*erfcAlphaR, includeInteraction);
-#endif
-    dEdR += select((real) 0, tempForce*invR*invR, includeInteraction);
-#else
-#ifdef USE_CUTOFF
-    includeInteraction = (!isExcluded && r2 < CUTOFF_SQUARED);
-#else
-    includeInteraction = (!isExcluded);
-#endif
-    real tempForce = 0;
-  #if HAS_LENNARD_JONES
-    real sig = SIGMA_EPSILON1.x + SIGMA_EPSILON2.x;
-    real sig2 = invR*sig;
-    sig2 *= sig2;
-    real sig6 = sig2*sig2*sig2;
-    real epssig6 = sig6*(SIGMA_EPSILON1.y*SIGMA_EPSILON2.y);
-    tempForce = epssig6*(12.0f*sig6 - 6.0f);
-    real ljEnergy = epssig6*(sig6-1);
-    #if USE_LJ_SWITCH
-    if (r > LJ_SWITCH_CUTOFF) {
-        real x = r-LJ_SWITCH_CUTOFF;
-        real switchValue = 1+x*x*x*(LJ_SWITCH_C3+x*(LJ_SWITCH_C4+x*LJ_SWITCH_C5));
-        real switchDeriv = x*x*(3*LJ_SWITCH_C3+x*(4*LJ_SWITCH_C4+x*5*LJ_SWITCH_C5));
-        tempForce = tempForce*switchValue - ljEnergy*switchDeriv*r;
-        ljEnergy *= switchValue;
-    }
-    #endif
-    ljEnergy = select((real) 0, ljEnergy, includeInteraction);
-    tempEnergy += ljEnergy;
-  #endif
-#if HAS_COULOMB
-  #ifdef USE_CUTOFF
-    const real prefactor = ONE_4PI_EPS0*CHARGE1*CHARGE2;
-    tempForce += prefactor*(invR - 2.0f*REACTION_FIELD_K*r2);
-    tempEnergy += select((real) 0, prefactor*(invR + REACTION_FIELD_K*r2 - REACTION_FIELD_C), includeInteraction);
-  #else
-    const real prefactor = ONE_4PI_EPS0*CHARGE1*CHARGE2*invR;
-    tempForce += prefactor;
-    tempEnergy += select((real) 0, prefactor, includeInteraction);
-  #endif
-#endif
-    dEdR += select((real) 0, tempForce*invR*invR, includeInteraction);
-#endif
-}
--- a/platforms/opencl/src/kernels/customCVForce.cl
+++ b/platforms/opencl/src/kernels/customCVForce.cl
-/**
- * Copy the positions and velocities to the inner context.
- */
-__kernel void copyState(__global real4* posq, __global real4* posqCorrection, __global mixed4* velm, __global int* restrict atomOrder,
-        __global real4* innerPosq, __global real4* innerPosqCorrection, __global mixed4* innerVelm, __global int* restrict innerInvAtomOrder,
-        int numAtoms) {
-    for (int i = get_global_id(0); i < numAtoms; i += get_global_size(0)) {
-        int index = innerInvAtomOrder[atomOrder[i]];
-        innerPosq[index] = posq[i];
-        innerVelm[index] = velm[i];
-#ifdef USE_MIXED_PRECISION
-        innerPosqCorrection[index] = posqCorrection[i];
-#endif
-    }
-}
-
-/**
- * Copy the forces back to the main context.
- */
-__kernel void copyForces(__global real4* forces, __global int* restrict invAtomOrder, __global real4* innerForces,
-        __global int* restrict innerAtomOrder, int numAtoms) {
-    for (int i = get_global_id(0); i < numAtoms; i += get_global_size(0)) {
-        int index = invAtomOrder[innerAtomOrder[i]];
-        forces[index] = innerForces[i];
-    }
-}
-
-/**
- * Add all the forces from the CVs.
- */
-__kernel void addForces(__global real4* forces, int numAtoms
-    PARAMETER_ARGUMENTS) {
-    for (int i = get_global_id(0); i < numAtoms; i += get_global_size(0)) {
-        real4 f = forces[i];
-        ADD_FORCES
-        forces[i] = f;
-    }
-}
\ No newline at end of file
--- a/platforms/opencl/src/kernels/ewald.cl
+++ b/platforms/opencl/src/kernels/ewald.cl
-real2 multofReal2(real2 a, real2 b) {
-    return (real2) (a.x*b.x - a.y*b.y, a.x*b.y + a.y*b.x);
-}
-
-/**
- * Precompute the cosine and sine sums which appear in each force term.
- */
-
-__kernel void calculateEwaldCosSinSums(__global mixed* restrict energyBuffer, __global const real4* restrict posq, __global real2* restrict cosSinSum, real4 reciprocalPeriodicBoxSize, real reciprocalCoefficient) {
-    const unsigned int ksizex = 2*KMAX_X-1;
-    const unsigned int ksizey = 2*KMAX_Y-1;
-    const unsigned int ksizez = 2*KMAX_Z-1;
-    const unsigned int totalK = ksizex*ksizey*ksizez;
-    unsigned int index = get_global_id(0);
-    mixed energy = 0;
-    while (index < (KMAX_Y-1)*ksizez+KMAX_Z)
-        index += get_global_size(0);
-    while (index < totalK) {
-        // Find the wave vector (kx, ky, kz) this index corresponds to.
-
-        int rx = index/(ksizey*ksizez);
-        int remainder = index - rx*ksizey*ksizez;
-        int ry = remainder/ksizez;
-        int rz = remainder - ry*ksizez - KMAX_Z + 1;
-        ry += -KMAX_Y + 1;
-        real kx = rx*reciprocalPeriodicBoxSize.x;
-        real ky = ry*reciprocalPeriodicBoxSize.y;
-        real kz = rz*reciprocalPeriodicBoxSize.z;
-
-        // Compute the sum for this wave vector.
-
-        real2 sum = 0.0f;
-        for (int atom = 0; atom < NUM_ATOMS; atom++) {
-            real4 apos = posq[atom];
-            real phase = apos.x*kx;
-            real2 structureFactor = (real2) (cos(phase), sin(phase));
-            phase = apos.y*ky;
-            structureFactor = multofReal2(structureFactor, (real2) (cos(phase), sin(phase)));
-            phase = apos.z*kz;
-            structureFactor = multofReal2(structureFactor, (real2) (cos(phase), sin(phase)));
-            sum += apos.w*structureFactor;
-        }
-        cosSinSum[index] = sum;
-
-        // Compute the contribution to the energy.
-
-        real k2 = kx*kx + ky*ky + kz*kz;
-        real ak = EXP(k2*EXP_COEFFICIENT) / k2;
-        energy += reciprocalCoefficient*ak*(sum.x*sum.x + sum.y*sum.y);
-        index += get_global_size(0);
-    }
-    energyBuffer[get_global_id(0)] += energy;
-}
-
-/**
- * Compute the reciprocal space part of the Ewald force, using the precomputed sums from the
- * previous routine.
- */
-
-__kernel void calculateEwaldForces(__global real4* restrict forceBuffers, __global const real4* restrict posq, __global const real2* restrict cosSinSum, real4 reciprocalPeriodicBoxSize, real reciprocalCoefficient) {
-    unsigned int atom = get_global_id(0);
-    while (atom < NUM_ATOMS) {
-        real4 force = forceBuffers[atom];
-        real4 apos = posq[atom];
-
-        // Loop over all wave vectors.
-
-        int lowry = 0;
-        int lowrz = 1;
-        for (int rx = 0; rx < KMAX_X; rx++) {
-            real kx = rx*reciprocalPeriodicBoxSize.x;
-            for (int ry = lowry; ry < KMAX_Y; ry++) {
-                real ky = ry*reciprocalPeriodicBoxSize.y;
-                real phase = apos.x*kx;
-                real2 tab_xy = (real2) (cos(phase), sin(phase));
-                phase = apos.y*ky;
-                tab_xy = multofReal2(tab_xy, (real2) (cos(phase), sin(phase)));
-                for (int rz = lowrz; rz < KMAX_Z; rz++) {
-                    real kz = rz*reciprocalPeriodicBoxSize.z;
-
-                    // Compute the force contribution of this wave vector.
-
-                    int index = rx*(KMAX_Y*2-1)*(KMAX_Z*2-1) + (ry+KMAX_Y-1)*(KMAX_Z*2-1) + (rz+KMAX_Z-1);
-                    real k2 = kx*kx + ky*ky + kz*kz;
-                    real ak = EXP(k2*EXP_COEFFICIENT)/k2;
-                    phase = apos.z*kz;
-                    real2 structureFactor = multofReal2(tab_xy, (real2) (cos(phase), sin(phase)));
-                    real2 sum = cosSinSum[index];
-                    real dEdR = 2*reciprocalCoefficient*ak*apos.w*(sum.x*structureFactor.y - sum.y*structureFactor.x);
-                    force.x += dEdR*kx;
-                    force.y += dEdR*ky;
-                    force.z += dEdR*kz;
-                    lowrz = 1 - KMAX_Z;
-                }
-                lowry = 1 - KMAX_Y;
-            }
-        }
-
-        // Record the force on the atom.
-
-        forceBuffers[atom] = force;
-        atom += get_global_size(0);
-    }
-}
--- a/platforms/opencl/src/kernels/monteCarloBarostat.cl
+++ b/platforms/opencl/src/kernels/monteCarloBarostat.cl
-/**
- * Scale the particle positions with each axis independent.
- */
-
-__kernel void scalePositions(float scaleX, float scaleY, float scaleZ, int numMolecules, real4 periodicBoxSize, real4 invPeriodicBoxSize,
-        real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, __global real4* restrict posq,
-        __global const int* restrict moleculeAtoms, __global const int* restrict moleculeStartIndex) {
-    for (int index = get_global_id(0); index < numMolecules; index += get_global_size(0)) {
-        int first = moleculeStartIndex[index];
-        int last = moleculeStartIndex[index+1];
-        int numAtoms = last-first;
-
-        // Find the center of each molecule.
-
-        real3 center = (real3) 0;
-        for (int atom = first; atom < last; atom++)
-            center += posq[moleculeAtoms[atom]].xyz;
-        center /= (real) numAtoms;
-
-        // Move it into the first periodic box.
-
-        real3 oldCenter = center;
-        APPLY_PERIODIC_TO_POS(center)
-        real3 delta = oldCenter-center;;
-        real3 scaleXYZ = (real3) (scaleX, scaleY, scaleZ);
-
-        // Now scale the position of the molecule center.
-
-        delta = center*(scaleXYZ-1)-delta;
-        for (int atom = first; atom < last; atom++) {
-            real4 pos = posq[moleculeAtoms[atom]];
-            pos.xyz += delta.xyz;
-            posq[moleculeAtoms[atom]] = pos;
-        }
-    }
-}
--- a/platforms/opencl/src/kernels/nonbondedExceptions.cl
+++ b/platforms/opencl/src/kernels/nonbondedExceptions.cl
-float4 exceptionParams = PARAMS[index];
-real4 delta = pos2-pos1;
-#if APPLY_PERIODIC
-APPLY_PERIODIC_TO_DELTA(delta)
-#endif
-real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
-real invR = RSQRT(r2);
-real sig2 = invR*exceptionParams.y;
-sig2 *= sig2;
-real sig6 = sig2*sig2*sig2;
-real dEdR = exceptionParams.z*(12.0f*sig6-6.0f)*sig6;
-real tempEnergy = exceptionParams.z*(sig6-1.0f)*sig6;
-dEdR += exceptionParams.x*invR;
-dEdR *= invR*invR;
-tempEnergy += exceptionParams.x*invR;
-energy += tempEnergy;
-delta.xyz *= dEdR;
-real4 force1 = -delta;
-real4 force2 = delta;
--- a/platforms/opencl/src/kernels/nonbondedParameters.cl
+++ b/platforms/opencl/src/kernels/nonbondedParameters.cl
-/**
- * Compute the nonbonded parameters for particles and exceptions.
- */
-__kernel void computeParameters(__global mixed* restrict energyBuffer, int includeSelfEnergy, __global real* restrict globalParams,
-        int numAtoms, __global const float4* restrict baseParticleParams, __global real4* restrict posq, __global real* restrict charge,
-        __global float2* restrict sigmaEpsilon, __global float4* restrict particleParamOffsets, __global int* restrict particleOffsetIndices
-#ifdef HAS_EXCEPTIONS
-        , int numExceptions, __global const float4* restrict baseExceptionParams, __global float4* restrict exceptionParams,
-        __global float4* restrict exceptionParamOffsets, __global int* restrict exceptionOffsetIndices
-#endif
-        ) {
-    mixed energy = 0;
-
-    // Compute particle parameters.
-    
-    for (int i = get_global_id(0); i < numAtoms; i += get_global_size(0)) {
-        float4 params = baseParticleParams[i];
-#ifdef HAS_OFFSETS
-        int start = particleOffsetIndices[i], end = particleOffsetIndices[i+1];
-        for (int j = start; j < end; j++) {
-            float4 offset = particleParamOffsets[j];
-            real value = globalParams[(int) offset.w];
-            params.x += value*offset.x;
-            params.y += value*offset.y;
-            params.z += value*offset.z;
-        }
-#endif
-#ifdef USE_POSQ_CHARGES
-        posq[i].w = params.x;
-#else
-        charge[i] = params.x;
-#endif
-        sigmaEpsilon[i] = (float2) (0.5f*params.y, 2*SQRT(params.z));
-#ifdef HAS_OFFSETS
-    #ifdef INCLUDE_EWALD
-        energy -= EWALD_SELF_ENERGY_SCALE*params.x*params.x;
-    #endif
-    #ifdef INCLUDE_LJPME
-        real sig3 = params.y*params.y*params.y;
-        energy += LJPME_SELF_ENERGY_SCALE*sig3*sig3*params.z;
-    #endif
-#endif
-    }
-
-    // Compute exception parameters.
-    
-#ifdef HAS_EXCEPTIONS
-    for (int i = get_global_id(0); i < numExceptions; i += get_global_size(0)) {
-        float4 params = baseExceptionParams[i];
-#ifdef HAS_OFFSETS
-        int start = exceptionOffsetIndices[i], end = exceptionOffsetIndices[i+1];
-        for (int j = start; j < end; j++) {
-            float4 offset = exceptionParamOffsets[j];
-            real value = globalParams[(int) offset.w];
-            params.x += value*offset.x;
-            params.y += value*offset.y;
-            params.z += value*offset.z;
-        }
-#endif
-        exceptionParams[i] = (float4) ((float) (ONE_4PI_EPS0*params.x), (float) params.y, (float) (4*params.z), 0);
-    }
-#endif
-    if (includeSelfEnergy)
-        energyBuffer[get_global_id(0)] += energy;
-}
-
-/**
- * Compute parameters for subtracting the reciprocal part of excluded interactions.
- */
-__kernel void computeExclusionParameters(__global real4* restrict posq, __global real* restrict charge, __global float2* restrict sigmaEpsilon,
-        int numExclusions, __global const int2* restrict exclusionAtoms, __global float4* restrict exclusionParams) {
-    for (int i = get_global_id(0); i < numExclusions; i += get_global_size(0)) {
-        int2 atoms = exclusionAtoms[i];
-#ifdef USE_POSQ_CHARGES
-        real chargeProd = posq[atoms.x].w*posq[atoms.y].w;
-#else
-        real chargeProd = charge[atoms.x]*charge[atoms.y];
-#endif
-#ifdef INCLUDE_LJPME
-        float2 sigEps1 = sigmaEpsilon[atoms.x];
-        float2 sigEps2 = sigmaEpsilon[atoms.y];
-        float sigma = sigEps1.x*sigEps2.x;
-        float epsilon = sigEps1.y*sigEps2.y;
-#else
-        float sigma = 0;
-        float epsilon = 0;
-#endif
-        exclusionParams[i] = (float4) ((float) (ONE_4PI_EPS0*chargeProd), sigma, epsilon, 0);
-    }
-}
--- a/platforms/opencl/src/kernels/pmeExclusions.cl
+++ b/platforms/opencl/src/kernels/pmeExclusions.cl
-const float4 exclusionParams = PARAMS[index];
-real3 delta = (real3) (pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z);
-#if USE_PERIODIC
-    APPLY_PERIODIC_TO_DELTA(delta)
-#endif
-const real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
-const real r = SQRT(r2);
-const real invR = RECIP(r);
-const real alphaR = EWALD_ALPHA*r;
-const real expAlphaRSqr = EXP(-alphaR*alphaR);
-real tempForce = 0.0f;
-if (alphaR > 1e-6f) {
-    const real erfAlphaR = erf(alphaR);
-    const real prefactor = exclusionParams.x*invR;
-    tempForce = -prefactor*(erfAlphaR-alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
-    energy -= prefactor*erfAlphaR;
-}
-else {
-    energy -= TWO_OVER_SQRT_PI*EWALD_ALPHA*exclusionParams.x;
-}
-#if DO_LJPME
-const real dispersionAlphaR = EWALD_DISPERSION_ALPHA*r;
-const real dar2 = dispersionAlphaR*dispersionAlphaR;
-const real dar4 = dar2*dar2;
-const real dar6 = dar4*dar2;
-const real invR2 = invR*invR;
-const real expDar2 = EXP(-dar2);
-const real c6 = 64*exclusionParams.y*exclusionParams.y*exclusionParams.y*exclusionParams.z;
-const real coef = invR2*invR2*invR2*c6;
-const real eprefac = 1.0f + dar2 + 0.5f*dar4;
-const real dprefac = eprefac + dar6/6.0f;
-energy += coef*(1.0f - expDar2*eprefac);
-tempForce += 6.0f*coef*(1.0f - expDar2*dprefac);
-#endif
-if (r > 0)
-    delta *= tempForce*invR*invR;
-real3 force1 = -delta;
-real3 force2 = delta;
--- a/platforms/opencl/src/kernels/torsionForce.cl
+++ b/platforms/opencl/src/kernels/torsionForce.cl
-const real PI = 3.14159265358979323846f;
-real4 v0 = (real4) (pos1.xyz-pos2.xyz, 0.0f);
-real4 v1 = (real4) (pos3.xyz-pos2.xyz, 0.0f);
-real4 v2 = (real4) (pos3.xyz-pos4.xyz, 0.0f);
-#if APPLY_PERIODIC
-APPLY_PERIODIC_TO_DELTA(v0)
-APPLY_PERIODIC_TO_DELTA(v1)
-APPLY_PERIODIC_TO_DELTA(v2)
-#endif
-real4 cp0 = cross(v0, v1);
-real4 cp1 = cross(v1, v2);
-real cosangle = dot(normalize(cp0), normalize(cp1));
-real theta;
-if (cosangle > 0.99f || cosangle < -0.99f) {
-    // We're close to the singularity in acos(), so take the cross product and use asin() instead.
-
-    real4 cross_prod = cross(cp0, cp1);
-    real scale = dot(cp0, cp0)*dot(cp1, cp1);
-    theta = asin(SQRT(dot(cross_prod, cross_prod)/scale));
-    if (cosangle < 0)
-        theta = PI-theta;
-}
-else
-   theta = acos(cosangle);
-theta = (dot(v0, cp1) >= 0 ? theta : -theta);
-COMPUTE_FORCE
-real normCross1 = dot(cp0, cp0);
-real normSqrBC = dot(v1, v1);
-real normBC = SQRT(normSqrBC);
-real normCross2 = dot(cp1, cp1);
-real dp = 1.0f/normSqrBC;
-real4 ff = (real4) ((-dEdAngle*normBC)/normCross1, dot(v0, v1)*dp, dot(v2, v1)*dp, (dEdAngle*normBC)/normCross2);
-real4 force1 = ff.x*cp0;
-real4 force4 = ff.w*cp1;
-real4 s = ff.y*force1 - ff.z*force4;
-real4 force2 = s-force1;
-real4 force3 = -s-force4;