Extened Free energy plugin to allow cutoffs; cleaned up code and added tests

bc85b9f0 · Mark Friedrichs · d4441c15 · bc85b9f0 · bc85b9f0 · bc85b9f0
Commit bc85b9f0 authored Oct 24, 2011 by Mark Friedrichs
20 changed files
--- a/plugins/freeEnergy/openmmapi/include/openmm/GBSAOBCSoftcoreForce.h
+++ b/plugins/freeEnergy/openmmapi/include/openmm/GBSAOBCSoftcoreForce.h
@@ -52,7 +52,29 @@ namespace OpenMM {
 */
 class OPENMM_EXPORT GBSAOBCSoftcoreForce : public Force {
 public:
+    /** 
+     * This is an enumeration of the different methods that may be used for handling long range nonbonded forces.
+     */
+    enum NonbondedSoftcoreMethod {
+        /** 
+         * No cutoff is applied to nonbonded interactions.  The full set of N^2 interactions is computed exactly.
+         * This necessarily means that periodic boundary conditions cannot be used.  This is the default.
+         */
+        NoCutoff = 0,
+        /** 
+         * Interactions beyond the cutoff distance are ignored.
+         */
+        CutoffNonPeriodic = 1,
+        /** 
+         * Periodic boundary conditions are used, so that each particle interacts only with the nearest periodic copy of
+         * each other particle.  Interactions beyond the cutoff distance are ignored.
+         */
+        CutoffPeriodic = 2,
+    };  
    /*
     * Create a GBSAOBCSoftcoreForce.
     */
@@ -134,17 +156,41 @@ public:
    double getNonPolarPrefactor() const {
        return nonPolarPrefactor;
    }
    /**
     * Set the nonPolarPrefactor; units are kJ/mol/nm^2
     */
    void setNonPolarPrefactor(double inputNonPolarPrefactor) {
        nonPolarPrefactor = inputNonPolarPrefactor;
    }
+    /** 
+     * Get the method used for handling long range nonbonded interactions.
+     */
+    NonbondedSoftcoreMethod getNonbondedMethod() const;
+    /** 
+     * Set the method used for handling long range nonbonded interactions.
+     */
+    void setNonbondedMethod(NonbondedSoftcoreMethod method);
+    /** 
+     * Get the cutoff distance (in nm) being used for nonbonded interactions.  If the NonbondedMethod in use
+     * is NoCutoff, this value will have no effect.
+     */
+    double getCutoffDistance() const;
+    /** 
+     * Set the cutoff distance (in nm) being used for nonbonded interactions.  If the NonbondedMethod in use
+     * is NoCutoff, this value will have no effect.
+     */
+    void setCutoffDistance(double distance);
 protected:
    ForceImpl* createImpl();
 private:
    class ParticleInfo;
-    double solventDielectric, soluteDielectric, nonPolarPrefactor;
+    NonbondedSoftcoreMethod nonbondedMethod;
+    double cutoffDistance, solventDielectric, soluteDielectric, nonPolarPrefactor;
 // Retarded visual studio compiler complains about being unable to 
 // export private stl class members.

--- a/plugins/freeEnergy/openmmapi/include/openmm/GBVISoftcoreForce.h
+++ b/plugins/freeEnergy/openmmapi/include/openmm/GBVISoftcoreForce.h
@@ -82,10 +82,6 @@ public:
         * No scaling method is applied.
         */
        NoScaling          = 0,
-        /**
-         * Use the method outlined in Proteins 55, 383-394 (2004), Eq. 6
-         */
-        Tanh               = 1,
        /**
         * Use quintic spline scaling function
         */
@@ -240,16 +236,6 @@ public:
     * Set the upper limit used in the quintic spline scaling method (typically 0.008)
     */
    void setQuinticUpperBornRadiusLimit(double quinticUpperBornRadiusLimit);
-    /** 
-     * Get the cutoff distance (in nm) being used for nonbonded interactions.  If the NonbondedMethod in use
-     * is NoCutoff, this value will have no effect.
-     */
-    void getTanhParameters( double& alpha, double& beta, double& gamma) const;
-    /** 
-     * Set the cutoff distance (in nm) being used for nonbonded interactions.  If the NonbondedMethod in use
-     * is NoCutoff, this value will have no effect.
-     */
-    void setTanhParameters(double alpha, double beta, double gamma);
 protected:
    ForceImpl* createImpl();
 private:

--- a/plugins/freeEnergy/openmmapi/include/openmm/NonbondedSoftcoreForce.h
+++ b/plugins/freeEnergy/openmmapi/include/openmm/NonbondedSoftcoreForce.h
@@ -86,16 +86,6 @@ public:
         * cutoff distance are modified using the reaction field method.
         */
        CutoffPeriodic = 2,
-        /**
-         * Periodic boundary conditions are used, and Ewald summation is used to compute the interaction of each particle
-         * with all periodic copies of every other particle.
-         */
-        Ewald = 3,
-        /**
-         * Periodic boundary conditions are used, and Particle-Mesh Ewald (PME) summation is used to compute the interaction of each particle
-         * with all periodic copies of every other particle.
-         */
-        PME = 4
    };
    /**
@@ -132,20 +122,6 @@ public:
     * is NoCutoff, this value will have no effect.
     */
    void setCutoffDistance(double distance);
-    /**
-     * Get the error tolerance for Ewald summation.  This corresponds to the fractional error in the forces
-     * which is acceptable.  This value is used to select the reciprocal space cutoff and separation
-     * parameter so that the average error level will be less than the tolerance.  There is not a
-     * rigorous guarantee that all forces on all atoms will be less than the tolerance, however.
-     */
-    double getEwaldErrorTolerance() const;
-    /**
-     * Get the error tolerance for Ewald summation.  This corresponds to the fractional error in the forces
-     * which is acceptable.  This value is used to select the reciprocal space cutoff and separation
-     * parameter so that the average error level will be less than the tolerance.  There is not a
-     * rigorous guarantee that all forces on all atoms will be less than the tolerance, however.
-     */
-    void setEwaldErrorTolerance(double tol);
    /**
     * Get the dielectric constant to use for the solvent in the reaction field approximation.
     */

--- a/plugins/freeEnergy/openmmapi/src/GBSAOBCSoftcoreForce.cpp
+++ b/plugins/freeEnergy/openmmapi/src/GBSAOBCSoftcoreForce.cpp
@@ -38,7 +38,7 @@ using namespace OpenMM;
 // units of nonPolarPrefactor are now kJ/mol/nm^2
-GBSAOBCSoftcoreForce::GBSAOBCSoftcoreForce() : solventDielectric(78.3), soluteDielectric(1.0), nonPolarPrefactor( 2.25936 ){
+GBSAOBCSoftcoreForce::GBSAOBCSoftcoreForce() :  nonbondedMethod(NoCutoff), cutoffDistance(1.0), solventDielectric(78.3), soluteDielectric(1.0), nonPolarPrefactor( 2.25936 ){
 }
 int GBSAOBCSoftcoreForce::addParticle(double charge, double radius, double scalingFactor, double nonPolarScalingFactor) {
@@ -66,6 +66,22 @@ void GBSAOBCSoftcoreForce::setParticleParameters(int index, double charge, doubl
    particles[index].nonPolarScalingFactor  = nonPolarScalingFactor;
 }
+GBSAOBCSoftcoreForce::NonbondedSoftcoreMethod GBSAOBCSoftcoreForce::getNonbondedMethod() const {
+    return nonbondedMethod;
+}
+void GBSAOBCSoftcoreForce::setNonbondedMethod(NonbondedSoftcoreMethod method) {
+    nonbondedMethod = method;
+}
+double GBSAOBCSoftcoreForce::getCutoffDistance() const {
+    return cutoffDistance;
+}
+void GBSAOBCSoftcoreForce::setCutoffDistance(double distance) {
+    cutoffDistance = distance;
+}
 ForceImpl* GBSAOBCSoftcoreForce::createImpl() {
    return new GBSAOBCSoftcoreForceImpl(*this);
 }
--- a/plugins/freeEnergy/openmmapi/src/GBVISoftcoreForce.cpp
+++ b/plugins/freeEnergy/openmmapi/src/GBVISoftcoreForce.cpp
@@ -38,7 +38,7 @@
 using namespace OpenMM;
 GBVISoftcoreForce::GBVISoftcoreForce() :  nonbondedMethod(NoCutoff), cutoffDistance(1.0), solventDielectric(78.3), soluteDielectric(1.0),
-               scalingMethod(NoScaling), alpha(1.0), beta(0.8), gamma(4.85), quinticLowerLimitFactor(0.8), quinticUpperBornRadiusLimit(5.0) {
+               scalingMethod(QuinticSpline), alpha(1.0), beta(0.8), gamma(4.85), quinticLowerLimitFactor(0.8), quinticUpperBornRadiusLimit(5.0) {
 }
@@ -106,19 +106,6 @@ void GBVISoftcoreForce::setQuinticUpperBornRadiusLimit(double inputQuinticUpperB
    quinticUpperBornRadiusLimit = inputQuinticUpperBornRadiusLimit;
 }
-void GBVISoftcoreForce::getTanhParameters( double& returnAlpha, double& returnBeta, double& returnGamma) const {
-    returnAlpha   = alpha;
-    returnBeta    = beta;
-    returnGamma   = gamma;
-    return;
-}
-void GBVISoftcoreForce::setTanhParameters(double inputAlpha, double inputBeta, double inputGamma){
-    alpha   = inputAlpha;
-    beta    = inputBeta;
-    gamma   = inputGamma;
-}
 int GBVISoftcoreForce::addBond(int particle1, int particle2, double bondLength) {
    bonds.push_back(BondInfo(particle1, particle2, bondLength));
    return bonds.size()-1;

--- a/plugins/freeEnergy/openmmapi/src/NonbondedSoftcoreForce.cpp
+++ b/plugins/freeEnergy/openmmapi/src/NonbondedSoftcoreForce.cpp
@@ -33,6 +33,7 @@
 #include "openmm/OpenMMException.h"
 #include "openmm/NonbondedSoftcoreForce.h"
 #include "openmm/internal/NonbondedSoftcoreForceImpl.h"
 #include <cmath>
 #include <map>
 #include <sstream>
@@ -73,15 +74,6 @@ void NonbondedSoftcoreForce::setReactionFieldDielectric(double dielectric) {
    rfDielectric = dielectric;
 }
-double NonbondedSoftcoreForce::getEwaldErrorTolerance( void ) const {
-    return ewaldErrorTol;
-}
-void NonbondedSoftcoreForce::setEwaldErrorTolerance(double tol)
-{
-    ewaldErrorTol = tol;
-}
 int NonbondedSoftcoreForce::addParticle(double charge, double sigma, double epsilon, double softcoreLJLambda) {
    particles.push_back(ParticleInfo(charge, sigma, epsilon, softcoreLJLambda));
    return particles.size()-1;

--- a/plugins/freeEnergy/platforms/cuda/src/CudaFreeEnergyKernelFactory.cpp
+++ b/plugins/freeEnergy/platforms/cuda/src/CudaFreeEnergyKernelFactory.cpp
@@ -27,6 +27,7 @@
 #include "CudaFreeEnergyKernelFactory.h"
 #include "CudaFreeEnergyKernels.h"
 #include "openmm/freeEnergyKernels.h"
+#include "FreeEnergyCudaData.h"
 #include "openmm/internal/ContextImpl.h"
 #include "openmm/OpenMMException.h"
 #include "kernels/GpuFreeEnergyCudaKernels.h"
@@ -51,18 +52,69 @@ extern "C" void registerKernelFactories() {
    }   
 }
+extern "C" OPENMMCUDA_EXPORT void registerFreeEnergyCudaKernelFactories( void ) { 
+    int hasCudaPlatform = 0;
+    for( int ii = 0; ii < Platform::getNumPlatforms() && hasCudaPlatform == 0; ii++ ){
+        Platform& platform = Platform::getPlatform(ii);
+        if( platform.getName() == "Cuda" ){
+            hasCudaPlatform = 1;
+        }   
+    }   
+    if( hasCudaPlatform == 0 ){
+        if (gpuIsAvailable() ){
+            Platform::registerPlatform(new CudaPlatform());
+        }   
+    }   
+    registerKernelFactories();
+}
+static std::map<ContextImpl*, FreeEnergyCudaData*> contextToFreeEnergyDataMap;
+// look up FreeEnergyCudaData for input contextImpl in contextToFreeEnergyDataMap
+extern "C" void* getFreeEnergyCudaData( ContextImpl& context ) { 
+    std::map<ContextImpl*, FreeEnergyCudaData*>::const_iterator mapIterator  = contextToFreeEnergyDataMap.find(&context);
+    if( mapIterator == contextToFreeEnergyDataMap.end() ){
+        return NULL;
+    } else {
+        return static_cast<void*>(mapIterator->second);
+    }   
+}
+// remove FreeEnergyCudaData from contextToFreeEnergyDataMap
+extern "C" void removeFreeEnergyCudaDataFromContextMap( void* inputContext ) { 
+    ContextImpl* context = static_cast<ContextImpl*>(inputContext);
+    contextToFreeEnergyDataMap.erase( context );
+    return;
+}
 KernelImpl* CudaFreeEnergyKernelFactory::createKernelImpl(std::string name, const Platform& platform, ContextImpl& context) const {
-    CudaPlatform::PlatformData& data = *static_cast<CudaPlatform::PlatformData*>(context.getPlatformData());
+    // create FreeEnergyCudaData object if contextToFreeEnergyDataMap does not contain
+    // key equal to current context
+    FreeEnergyCudaData* freeEnergyCudaData;
+    std::map<ContextImpl*, FreeEnergyCudaData*>::const_iterator mapIterator  = contextToFreeEnergyDataMap.find(&context);
+    if( mapIterator == contextToFreeEnergyDataMap.end() ){
+        CudaPlatform::PlatformData& cudaPlatformData = *static_cast<CudaPlatform::PlatformData*>(context.getPlatformData());
+        freeEnergyCudaData                           = new FreeEnergyCudaData( cudaPlatformData );
+        contextToFreeEnergyDataMap[&context]         = freeEnergyCudaData;
+        //freeEnergyCudaData->setLog( stderr );
+        freeEnergyCudaData->setContextImpl( static_cast<void*>(&context) );
+    } else {
+        freeEnergyCudaData                           = mapIterator->second;
+    }
    if (name == CalcNonbondedSoftcoreForceKernel::Name())
-        return new CudaFreeEnergyCalcNonbondedSoftcoreForceKernel(name, platform, data, context.getSystem());
+        return new CudaFreeEnergyCalcNonbondedSoftcoreForceKernel(name, platform, *freeEnergyCudaData, context.getSystem());
    if (name == CalcGBSAOBCSoftcoreForceKernel::Name())
-        return new CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel(name, platform, data);
+        return new CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel(name, platform, *freeEnergyCudaData);
    if (name == CalcGBVISoftcoreForceKernel::Name())
-        return new CudaFreeEnergyCalcGBVISoftcoreForceKernel(name, platform, data);
+        return new CudaFreeEnergyCalcGBVISoftcoreForceKernel(name, platform, *freeEnergyCudaData);
    throw OpenMMException( (std::string("Tried to create kernel with illegal kernel name '") + name + "'").c_str() );
 }
--- a/plugins/freeEnergy/platforms/cuda/src/CudaFreeEnergyKernels.cpp
+++ b/plugins/freeEnergy/platforms/cuda/src/CudaFreeEnergyKernels.cpp
@@ -25,13 +25,14 @@
 * -------------------------------------------------------------------------- */
 #include "CudaFreeEnergyKernels.h"
+#include "CudaForceInfo.h"
 #include "openmm/Context.h"
 #include "openmm/OpenMMException.h"
 #include "openmm/internal/ContextImpl.h"
 #include "kernels/gputypes.h"
 #include "kernels/cudaKernels.h"
 #include "kernels/GpuFreeEnergyCudaKernels.h" 
-#include "kernels/GpuLJ14Softcore.h" 
 #include <cmath>
 #include <map>
@@ -201,13 +202,44 @@ static void getForceMap(const System& system, MapStringInt& forceMap, FILE* log)
     }
 }
+class CudaFreeEnergyCalcNonbondedSoftcoreForceKernel::ForceInfo : public CudaForceInfo {
+public:
+    ForceInfo(const NonbondedSoftcoreForce& force) : force(force) {
+    }    
+    bool areParticlesIdentical(int particle1, int particle2) {
+        double charge1, charge2, sigma1, sigma2, epsilon1, epsilon2, softcoreLJLambda1, softcoreLJLambda2;
+        force.getParticleParameters(particle1, charge1, sigma1, epsilon1, softcoreLJLambda1);
+        force.getParticleParameters(particle2, charge2, sigma2, epsilon2, softcoreLJLambda2);
+        return (charge1 == charge2 && sigma1 == sigma2 && epsilon1 == epsilon2 && softcoreLJLambda1 == softcoreLJLambda2);
+    }    
+    int getNumParticleGroups() {
+        return force.getNumExceptions();
+    }    
+    void getParticlesInGroup(int index, std::vector<int>& particles) {
+        int particle1, particle2;
+        double chargeProd, sigma, epsilon, softcoreLJLambda;
+        force.getExceptionParameters(index, particle1, particle2, chargeProd, sigma, epsilon, softcoreLJLambda);
+        particles.resize(2);
+        particles[0] = particle1;
+        particles[1] = particle2;
+    }    
+    bool areGroupsIdentical(int group1, int group2) {
+        int particle1, particle2;
+        double chargeProd1, chargeProd2, sigma1, sigma2, epsilon1, epsilon2, softcoreLJLambda1, softcoreLJLambda2;
+        force.getExceptionParameters(group1, particle1, particle2, chargeProd1, sigma1, epsilon1, softcoreLJLambda1);
+        force.getExceptionParameters(group2, particle1, particle2, chargeProd2, sigma2, epsilon2, softcoreLJLambda2);
+        return (chargeProd1 == chargeProd2 && sigma1 == sigma2 && epsilon1 == epsilon2 && softcoreLJLambda1 == softcoreLJLambda2);
+    }    
+private:
+    const NonbondedSoftcoreForce& force;
+};
 CudaFreeEnergyCalcNonbondedSoftcoreForceKernel::~CudaFreeEnergyCalcNonbondedSoftcoreForceKernel() {
-    if( log ){
+    if( 0 && data.getLog() ){
-        (void) fprintf( log, "CudaFreeEnergyCalcNonbondedSoftcoreForceKernel destructor called.\n" );
+        (void) fprintf( data.getLog(), "~CudaFreeEnergyCalcNonbondedSoftcoreForceKernel called.\n" );
-        (void) fflush( log );
+        (void) fflush( data.getLog() );
    }
-    delete gpuNonbondedSoftcore;
+    data.decrementKernelCount();
-    delete gpuLJ14Softcore;
 }
 void CudaFreeEnergyCalcNonbondedSoftcoreForceKernel::initialize(const System& system, const NonbondedSoftcoreForce& force) {
@@ -218,15 +250,15 @@ void CudaFreeEnergyCalcNonbondedSoftcoreForceKernel::initialize(const System& sy
 // ---------------------------------------------------------------------------------------
-    if( log ){
+    if( data.getLog() ){
-        (void) fprintf( log, "%s called.\n", methodName.c_str() );
+        (void) fprintf( data.getLog(), "%s called.\n", methodName.c_str() );
-        (void) fflush( log );
+        (void) fflush( data.getLog() );
    }
    // check forces and relevant parameters
    MapStringInt forceMap;
-    getForceMap( system, forceMap, log);
+    getForceMap( system, forceMap, data.getLog() );
    int softcore        = 0;
    if( forceMap.find( GBSA_OBC_FORCE ) != forceMap.end() ){
@@ -253,7 +285,6 @@ void CudaFreeEnergyCalcNonbondedSoftcoreForceKernel::initialize(const System& sy
    setIncludeSoftcore( softcore );
    numParticles      = force.getNumParticles();
-    _gpuContext* gpu  = data.gpu;
    // Identify which exceptions are 1-4 interactions.
@@ -270,7 +301,7 @@ void CudaFreeEnergyCalcNonbondedSoftcoreForceKernel::initialize(const System& sy
    // Initialize nonbonded interactions.
-    {
+    if( numParticles > 0 ){
        std::vector<int> particle(numParticles);
        std::vector<float> c6(numParticles);
        std::vector<float> c12(numParticles);
@@ -295,89 +326,51 @@ void CudaFreeEnergyCalcNonbondedSoftcoreForceKernel::initialize(const System& sy
        }
        Vec3 boxVectors[3];
        system.getDefaultPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
-        gpuSetPeriodicBoxSize(gpu, static_cast<float>(boxVectors[0][0] ), static_cast<float>(boxVectors[1][1] ), static_cast<float>(boxVectors[2][2] ));
+        freeEnergyGpuSetPeriodicBoxSize( data.getFreeEnergyGpu(), static_cast<float>(boxVectors[0][0] ), static_cast<float>(boxVectors[1][1] ), static_cast<float>(boxVectors[2][2] ));
-        CudaNonbondedMethod method = NO_CUTOFF;
+        CudaFreeEnergyNonbondedMethod method = FREE_ENERGY_NO_CUTOFF;
        if (force.getNonbondedMethod() != NonbondedSoftcoreForce::NoCutoff) {
-            throw OpenMMException( "NonbondedSoftcoreForce currently only handles NoCutoff option." );
+            method = FREE_ENERGY_CUTOFF;
-            //gpuSetNonbondedCutoff(gpu, static_cast<float>(force.getCutoffDistance() ), force.getReactionFieldDielectric());
-            method = CUTOFF;
        }
        if (force.getNonbondedMethod() == NonbondedSoftcoreForce::CutoffPeriodic) {
-            method = PERIODIC;
+            method = FREE_ENERGY_PERIODIC;
        }
-        if (force.getNonbondedMethod() == NonbondedSoftcoreForce::Ewald || force.getNonbondedMethod() == NonbondedSoftcoreForce::PME) {
-            double ewaldErrorTol = force.getEwaldErrorTolerance();
-            double alpha = (1.0/force.getCutoffDistance())*std::sqrt(-std::log(ewaldErrorTol));
-            double mx = boxVectors[0][0]/force.getCutoffDistance();
-            double my = boxVectors[1][1]/force.getCutoffDistance();
-            double mz = boxVectors[2][2]/force.getCutoffDistance();
-            double pi = 3.1415926535897932385;
-            int kmaxx = (int)std::ceil(-(mx/pi)*std::log(ewaldErrorTol));
-            int kmaxy = (int)std::ceil(-(my/pi)*std::log(ewaldErrorTol));
-            int kmaxz = (int)std::ceil(-(mz/pi)*std::log(ewaldErrorTol));
-            if (force.getNonbondedMethod() == NonbondedSoftcoreForce::Ewald) {
-                if (kmaxx%2 == 0)
-                    kmaxx++;
-                if (kmaxy%2 == 0)
-                    kmaxy++;
-                if (kmaxz%2 == 0)
-                    kmaxz++;
-                //gpuSetEwaldParameters(gpu, static_cast<float>( alpha ), kmaxx, kmaxy, kmaxz);
-                method = EWALD;
-            }
-            else {
-                int gridSizeX = kmaxx*3;
-                int gridSizeY = kmaxy*3;
-                int gridSizeZ = kmaxz*3;
-                gridSizeX = ((gridSizeX+3)/4)*4;
-                gridSizeY = ((gridSizeY+3)/4)*4;
-                gridSizeZ = ((gridSizeZ+3)/4)*4;
-                //gpuSetPMEParameters(gpu, static_cast<float>( alpha ), gridSizeX, gridSizeY, gridSizeZ);
-                method = PARTICLE_MESH_EWALD;
-            }
-        }
-        data.nonbondedMethod = method;
        // setup parameters
-        gpuNonbondedSoftcore = gpuSetNonbondedSoftcoreParameters(gpu, 138.935485f, particle, c6, c12, q,
+        gpuSetNonbondedSoftcoreParameters( data.getFreeEnergyGpu(), 138.935485f, particle, c6, c12, q,
-                                                                 softcoreLJLambdaArray, symbol, exclusionList, method);
+                                           softcoreLJLambdaArray, symbol, exclusionList, method,
+                                           static_cast<float>(force.getCutoffDistance() ), static_cast<float>(force.getReactionFieldDielectric()));
-        // Compute the Ewald self energy.
-        data.ewaldSelfEnergy = 0.0;
-        if (force.getNonbondedMethod() == NonbondedSoftcoreForce::Ewald || force.getNonbondedMethod() == NonbondedSoftcoreForce::PME) {
-            double selfEnergyScale = gpu->sim.epsfac*gpu->sim.alphaEwald/std::sqrt(PI);
-                for (int i = 0; i < numParticles; i++)
-                    data.ewaldSelfEnergy -= selfEnergyScale*q[i]*q[i];
-        }
    }
    // Initialize 1-4 nonbonded interactions.
-    {
+    numExceptions = exceptions.size();
-        numExceptions = exceptions.size();
+    if( numExceptions > 0 ){
        std::vector<int> particle1(numExceptions);
        std::vector<int> particle2(numExceptions);
        std::vector<float> c6(numExceptions);
        std::vector<float> c12(numExceptions);
-        std::vector<float> q1(numExceptions);
+        std::vector<float> qProd(numExceptions);
-        std::vector<float> q2(numExceptions);
        std::vector<float> softcoreLJLambdaArray(numExceptions);
        for (int i = 0; i < numExceptions; i++) {
            double charge, sig, eps, softcoreLJLambda;
            force.getExceptionParameters(exceptions[i], particle1[i], particle2[i], charge, sig, eps, softcoreLJLambda);
            c6[i]                    = static_cast<float>( (4.0f*eps*powf(sig, 6.0f)) );
            c12[i]                   = static_cast<float>( (4.0f*eps*powf(sig, 12.0f)) );
-            q1[i]                    = static_cast<float>( charge );
+            qProd[i]                 = static_cast<float>( charge );
-            q2[i]                    = 1.0f;
            softcoreLJLambdaArray[i] = static_cast<float>( softcoreLJLambda );
        }
-        gpuLJ14Softcore = gpuSetLJ14SoftcoreParameters(gpu, 138.935485f, 1.0f, particle1, particle2, c6, c12, q1, q2, softcoreLJLambdaArray);
+        gpuSetLJ14SoftcoreParameters( data.getFreeEnergyGpu(), 138.935485f, particle1, particle2, c6, c12, qProd, softcoreLJLambdaArray);
+    } else if( data.getLog() ){
+        (void) fprintf( data.getLog(), "Mo nonbonded softcore exceptions.\n" );
+        (void) fflush( data.getLog() );
    }
+    data.getFreeEnergyGpu()->gpuContext->forces.push_back(new ForceInfo(force));
 }
-double CudaFreeEnergyCalcNonbondedSoftcoreForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
+double CudaFreeEnergyCalcNonbondedSoftcoreForceKernel::execute( ContextImpl& context, bool includeForces, bool includeEnergy ){
 // ---------------------------------------------------------------------------------------
@@ -385,49 +378,22 @@ double CudaFreeEnergyCalcNonbondedSoftcoreForceKernel::execute(ContextImpl& cont
 // ---------------------------------------------------------------------------------------
-    _gpuContext* gpu = data.gpu;
+    freeEnergyGpuContext gpu = data.getFreeEnergyGpu();
-    // write array, ... address's to board
-    if( setSim == 0 ){
+    data.initializeGpu( );
-        setSim++;
-        if( log ){
-            (void) fprintf( log, "%s Obc=%d GB/VI=%d exceptions=%d\n",
-                            methodName.c_str(), getIncludeGBSA(), getIncludeGBVI(), getNumExceptions() );
-            (void) fflush( log );
-        }
-        SetCalculateCDLJSoftcoreGpuSim( gpu );
-        SetCalculateLocalSoftcoreGpuSim( gpu );
-        // flip strides (unsure if this is needed)
-#if 0
-        (void) fprintf( stderr, "flipping gpuLJ14Softcore\n" ); fflush( stderr );
-        GpuLJ14Softcore* gpuLJ14Softcore = getGpuLJ14Softcore( );
-        if( gpuLJ14Softcore ){
-            gpuLJ14Softcore->flipStrides( gpu );
-            if( log ){
-                (void) fprintf( log, "CudaFreeEnergyCalcNonbondedSoftcoreForceKernel::executeForces flipping LJ14\n" );
-                (void) fflush( log );
-            }
-        }
-#endif
-    }
    // calculate nonbonded ixns here, only if implicit solvent is inactive
    if ( !getIncludeGBSA() && !getIncludeGBVI() ) {
        kCalculateCDLJSoftcoreForces(gpu);
    }
    // local LJ-14 forces
-//kPrintForces( gpu, "Pre  kCalculateLocalSoftcoreForces ", call );
+    if( getNumExceptions() > 0 ){
-    kCalculateLocalSoftcoreForces(gpu);
+        kCalculateLocalSoftcoreForces(gpu);
-//kPrintForces( gpu, "Post kCalculateLocalSoftcoreForces ", call );
+    }
-//kPrintForces(gpu, "Post kCalculateLocalSoftcoreForces", call );
-//kReduceForces(gpu);
    return 0.0;
 }
@@ -459,27 +425,33 @@ void CudaFreeEnergyCalcNonbondedSoftcoreForceKernel::setIncludeSoftcore( int inp
    includeSoftcore = inputIncludeSoftcore;
 }
-GpuLJ14Softcore* CudaFreeEnergyCalcNonbondedSoftcoreForceKernel::getGpuLJ14Softcore( void ) const {
+class CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel::ForceInfo : public CudaForceInfo {
-    return gpuLJ14Softcore;
+public:
-}
+    ForceInfo(const GBSAOBCSoftcoreForce& force) : force(force) {
+    }
-void CudaFreeEnergyCalcNonbondedSoftcoreForceKernel::setGpuLJ14Softcore( GpuLJ14Softcore* inputGpuLJ14Softcore ){
+    bool areParticlesIdentical(int particle1, int particle2) {
-    gpuLJ14Softcore = inputGpuLJ14Softcore;
+        double charge1, charge2, radius1, radius2, scale1, scale2, particleNonPolarScalingFactor1, particleNonPolarScalingFactor2;
-}
+        force.getParticleParameters(particle1, charge1, radius1, scale1, particleNonPolarScalingFactor1);
+        force.getParticleParameters(particle2, charge2, radius2, scale2, particleNonPolarScalingFactor2);
+        return (charge1 == charge2 && radius1 == radius2 && scale1 == scale2 && particleNonPolarScalingFactor1 == particleNonPolarScalingFactor2);
+    }
+private:
+    const GBSAOBCSoftcoreForce& force;
+};
 CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel::~CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel() {
-    delete gpuObcGbsaSoftcore;
+    if( 0 && data.getLog() ){
+        (void) fprintf( data.getLog(), "~CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel called.\n" );
+        (void) fflush( data.getLog() );
+    }
+    data.decrementKernelCount();
 }
 void CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel::initialize(const System& system, const GBSAOBCSoftcoreForce& force) {
 // ---------------------------------------------------------------------------------------
-   //static const std::string methodName      = "CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel::initialize";
+    freeEnergyGpuContext gpu  = data.getFreeEnergyGpu();
-// ---------------------------------------------------------------------------------------
-    _gpuContext* gpu = data.gpu;
    MapStringInt forceMap;
    getForceMap( system, forceMap, log);
@@ -487,7 +459,7 @@ void CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel::initialize(const System& syst
    // check that nonbonded (non-softcore is not active)
    if( forceMap.find( NB_FORCE ) != forceMap.end() ){ 
-        throw OpenMMException( "Mixing NonbondedForce and GBSAOBCSoftoreForce not allowed -- use NonbondedSoftcoreForce " );
+        throw OpenMMException( "Mixing NonbondedForce and GBSAOBCSoftoreForce is not allowed -- use NonbondedSoftcoreForce " );
    }
    if( forceMap.find( NB_SOFTCORE_FORCE ) == forceMap.end() ){ 
        throw OpenMMException( "NonbondedSoftcore force must be included w/ GBSAOBCSoftcore force." );
@@ -500,99 +472,89 @@ void CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel::initialize(const System& syst
    std::vector<float> charge(numParticles);
    std::vector<float> nonPolarScalingFactors(numParticles);
-    for (int i = 0; i < numParticles; i++) {
+    for( int ii = 0; ii < numParticles; ii++ ){
        double particleCharge, particleRadius, scalingFactor, particleNonPolarScalingFactor;
-        force.getParticleParameters(i, particleCharge, particleRadius, scalingFactor, particleNonPolarScalingFactor);
+        force.getParticleParameters( ii, particleCharge, particleRadius, scalingFactor, particleNonPolarScalingFactor);
-        radius[i]                 = static_cast<float>( particleRadius);
+        radius[ii]                 = static_cast<float>( particleRadius);
-        scale[i]                  = static_cast<float>( scalingFactor);
+        scale[ii]                  = static_cast<float>( scalingFactor);
-        charge[i]                 = static_cast<float>( particleCharge);
+        charge[ii]                 = static_cast<float>( particleCharge);
-        nonPolarScalingFactors[i] = static_cast<float>( particleNonPolarScalingFactor);
+        nonPolarScalingFactors[ii] = static_cast<float>( particleNonPolarScalingFactor);
    }
-    gpuObcGbsaSoftcore = gpuSetObcSoftcoreParameters(gpu, static_cast<float>( force.getSoluteDielectric()),
+    gpuSetObcSoftcoreParameters( gpu, static_cast<float>( force.getSoluteDielectric()),
-                                                     static_cast<float>( force.getSolventDielectric()),
+                                 static_cast<float>( force.getSolventDielectric()),
-                                                     static_cast<float>( force.getNonPolarPrefactor()),
+                                 static_cast<float>( force.getNonPolarPrefactor()),
-                                                     radius, scale, charge, nonPolarScalingFactors );
+                                 radius, scale, charge, nonPolarScalingFactors );
-}
-double CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
+    data.getFreeEnergyGpu()->gpuContext->forces.push_back(new ForceInfo(force));
+    return;
-// ---------------------------------------------------------------------------------------
+}
-   static const std::string methodName      = "CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel::executeForces";
+double CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
 // ---------------------------------------------------------------------------------------
-    _gpuContext* gpu = data.gpu;
+    freeEnergyGpuContext freeEnergyGpu = data.getFreeEnergyGpu();
+    gpuContext gpu                     = freeEnergyGpu->gpuContext;
-    int debug        = 1;
+    int call = 0;
    // send address's of arrays, ... to device on first call
-    // required since force/energy buffers not set when CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel::initialize() was called
+    // required since force/energy buffers not set when CudaFreeEnergyCalcGBVISoftcoreForceKernel::initialize() was called
-    if( setSim == 0 ){
-       setSim++;
-       SetCalculateObcGbsaSoftcoreBornSumSim( gpu );
-       SetCalculateCDLJObcGbsaSoftcoreGpu1Sim( gpu );
-       SetCalculateObcGbsaSoftcoreForces2Sim( gpu );
-    }
-    // required!!
-    gpu->bRecalculateBornRadii = true;
-    // calculate Born radii and first loop of Obc forces
+    data.initializeGpu( );
-    if( debug && log ){
+    // (1) clear Born force array
-        if( log ){
+    // (2) calculate Born radii and sum
-            (void) fprintf( log, "\n%s: calling kCalculateCDLJObcGbsaSoftcoreForces1\n", methodName.c_str() );
+    // (3) loop 1
-            (void) fflush( log );
+    // (4) sum/calculate Born forces
-        }
+    // (5) loop 2
-    }
    kClearSoftcoreBornForces(gpu);
-    kCalculateObcGbsaSoftcoreBornSum(gpu);
+    kCalculateObcGbsaSoftcoreBornSum( freeEnergyGpu );
    kReduceObcGbsaSoftcoreBornSum(gpu);
-    kCalculateCDLJObcGbsaSoftcoreForces1(gpu);
+    kCalculateCDLJObcGbsaSoftcoreForces1( freeEnergyGpu );
-//kPrintForces(gpu, "Post kCalculateCDLJObcGbsaSoftcoreForces1", call );
-    if( debug && log ){
-        (void) fprintf( log, "\n%s: calling kReduceObcGbsaBornForces\n", methodName.c_str()  );
-        (void) fflush( log );
-    }
-    // compute Born forces
+    // sum Born forces and  execute second OBC loop
    kReduceObcGbsaSoftcoreBornForces(gpu);
+    kCalculateObcGbsaSoftcoreForces2( freeEnergyGpu );
-    if( debug && log ){
+    if( data.getLog() ){
-        (void) fprintf( log, "\n%s calling kCalculateObcGbsaForces2\n", methodName.c_str() );
+        kPrintObcGbsaSoftcore( freeEnergyGpu, "Post kCalculateObcGbsaSoftcoreForces2", call, data.getLog() );
-        (void) fflush( log );
    }
-    // second loop of Obc GBSA forces
-    kCalculateObcGbsaSoftcoreForces2(gpu);
    return 0.0;
 }
+class CudaFreeEnergyCalcGBVISoftcoreForceKernel::ForceInfo : public CudaForceInfo {
+public:
+    ForceInfo(const GBVISoftcoreForce& force) : force(force) {
+    }    
+    bool areParticlesIdentical(int particle1, int particle2) {
+        double charge1, charge2, radius1, radius2, gamma1, gamma2, bornRadiusScaleFactor1, bornRadiusScaleFactor2;
+        force.getParticleParameters(particle1, charge1, radius1, gamma1, bornRadiusScaleFactor1);
+        force.getParticleParameters(particle2, charge2, radius2, gamma2, bornRadiusScaleFactor2);
+        return (charge1 == charge2 && radius1 == radius2 && gamma1 == gamma2 && bornRadiusScaleFactor1 == bornRadiusScaleFactor2);
+    }    
+private:
+    const GBVISoftcoreForce& force;
+};
 CudaFreeEnergyCalcGBVISoftcoreForceKernel::~CudaFreeEnergyCalcGBVISoftcoreForceKernel() {
-    if( log ){
+    if( 0 && data.getLog() ){
-        (void) fprintf( log, "CudaFreeEnergyCalcGBVISoftcoreForceKernel destructor called -- freeing gpuGBVISoftcore.\n" );
+        (void) fprintf( data.getLog(), "~CudaFreeEnergyCalcGBVISoftcoreForceKernel called.\n" );
-        (void) fflush( log );
+        (void) fflush( data.getLog() );
    }
-    delete gpuGBVISoftcore;
+    data.decrementKernelCount();
 }
 void CudaFreeEnergyCalcGBVISoftcoreForceKernel::initialize(const System& system, const GBVISoftcoreForce& force, const std::vector<double> & inputScaledRadii) {
 // ---------------------------------------------------------------------------------------
-   //static const std::string methodName      = "CudaFreeEnergyCalcGBVISoftcoreForceKernel::initialize";
+    int numParticles          = system.getNumParticles();
+    freeEnergyGpuContext gpu  = data.getFreeEnergyGpu();
-// ---------------------------------------------------------------------------------------
-    int numParticles = system.getNumParticles();
-    _gpuContext* gpu = data.gpu;
    // check forces and relevant parameters
@@ -611,7 +573,7 @@ void CudaFreeEnergyCalcGBVISoftcoreForceKernel::initialize(const System& system,
    std::vector<float> gammas(numParticles);
    std::vector<float> bornRadiusScaleFactors(numParticles);
-    for (int i = 0; i < numParticles; i++) {
+    for( int i = 0; i < numParticles; i++ ){
        double charge, particleRadius, gamma, bornRadiusScaleFactor;
        force.getParticleParameters(i, charge, particleRadius, gamma, bornRadiusScaleFactor);
        particle[i]                  = i;
@@ -621,20 +583,8 @@ void CudaFreeEnergyCalcGBVISoftcoreForceKernel::initialize(const System& system,
        bornRadiusScaleFactors[i]    = static_cast<float>( bornRadiusScaleFactor );
    }
-    // tanh not implemented
-//    std::vector<float> tanhScaleFactors;
    std::vector<float> quinticSplineParameters;
-    if( force.getBornRadiusScalingMethod() == GBVISoftcoreForce::Tanh ){
+    if( force.getBornRadiusScalingMethod() == GBVISoftcoreForce::QuinticSpline ){
-/*
-        double alpha, beta, gamma;
-        force.getTanhParameters( alpha, beta, gamma );
-        tanhScaleFactors.resize( 3 );
-        tanhScaleFactors[0] = static_cast<float>(alpha);
-        tanhScaleFactors[1] = static_cast<float>(beta);
-        tanhScaleFactors[2] = static_cast<float>(gamma);
-*/
-    } else if( force.getBornRadiusScalingMethod() == GBVISoftcoreForce::QuinticSpline ){
        // quintic spline
@@ -642,125 +592,62 @@ void CudaFreeEnergyCalcGBVISoftcoreForceKernel::initialize(const System& system,
        quinticSplineParameters[0] = static_cast<float>(force.getQuinticLowerLimitFactor());
        quinticSplineParameters[1] = static_cast<float>(force.getQuinticUpperBornRadiusLimit());
        quinticSplineParameters[1] = powf( quinticSplineParameters[1], -3.0f ); 
-        setQuinticScaling( 1 );
+        quinticScaling =  1;
    }
    // load parameters onto board
    // defined in kCalculateGBVISoftcore.cu
-    gpuGBVISoftcore = gpuSetGBVISoftcoreParameters(gpu, static_cast<float>( force.getSoluteDielectric() ), static_cast<float>( force.getSolventDielectric() ),
+    gpuSetGBVISoftcoreParameters( gpu, static_cast<float>( force.getSoluteDielectric() ), static_cast<float>( force.getSolventDielectric() ),
-                                                   particle, radius, gammas, scaledRadii, bornRadiusScaleFactors, quinticSplineParameters);
+                                  particle, radius, gammas, scaledRadii, bornRadiusScaleFactors, quinticSplineParameters);
+    data.getFreeEnergyGpu()->gpuContext->forces.push_back(new ForceInfo(force));
+    return;
 }
 double CudaFreeEnergyCalcGBVISoftcoreForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
-// ---------------------------------------------------------------------------------------
+    freeEnergyGpuContext freeEnergyGpu = data.getFreeEnergyGpu();
+    gpuContext gpu                     = freeEnergyGpu->gpuContext;
-   static const std::string methodName      = "CudaFreeEnergyCalcGBVISoftcoreForceKernel::executeForces";
-// ---------------------------------------------------------------------------------------
-    _gpuContext* gpu = data.gpu;
-    int debug        = 1;
    // send address's of arrays, ... to device on first call
    // required since force/energy buffers not set when CudaFreeEnergyCalcGBVISoftcoreForceKernel::initialize() was called
-    if( setSim == 0 ){
+    data.initializeGpu( );
-       setSim++;
-       SetCalculateGBVISoftcoreBornSumGpuSim( gpu );
-       SetCalculateObcGbsaSoftcoreBornSumSim( gpu );
-       SetCalculateCDLJObcGbsaSoftcoreGpu1Sim( gpu );
-       SetCalculateGBVISoftcoreForces2Sim( gpu );
-    }
-    // calculate Born radii and first loop of GB/VI forces
+    // (1) clear Born force array
+    // (2) calculate Born radii and sum
+    // (3) loop 1
+    // (4) sum/calculate Born forces
+    // (5) loop 2
-    if( debug && log ){
+    // calculate Born radii and first loop of GB/VI forces
-        if( log ){
-            (void) fprintf( log, "\n%s: calling kCalculateCDLJObcGbsaSoftcoreForces1 & %s\n", methodName.c_str(),
-                            getQuinticScaling() ? "kReduceGBVIBornSumQuinticScaling" : "kReduceGBVIBornSum" );
-            (void) fflush( log );
-        }
-    }
-    // In kCalculateObcGbsaSoftcoreBornSum: SetCalculateObcGbsaSoftcoreBornSumSim
    kClearSoftcoreBornForces(gpu);
-    // In kCalculateGBVISoftcoreBornSum: SetCalculateGBVISoftcoreBornSumGpuSim
+    kCalculateGBVISoftcoreBornSum( freeEnergyGpu );
-    kCalculateGBVISoftcoreBornSum(gpu);
-    if( getQuinticScaling() ){
-        // kCalculateGBVISoftcoreBornSum.cu
-        kReduceGBVIBornSumQuinticScaling(gpu, gpuGBVISoftcore );
+    if( quinticScaling ){
+        kReduceGBVIBornSumQuinticScaling( freeEnergyGpu );
    } else {
+        kReduceGBVISoftcoreBornSum( freeEnergyGpu );
-        // In kCalculateGBVISoftcoreBornSum.cu
-        kReduceGBVISoftcoreBornSum(gpu);
-    }
-    // In kCalculateCDLJObcGbsaSoftcoreForces1.cu
-    //    SetCalculateCDLJObcGbsaSoftcoreGpu1Sim
-    //    SetCalculateCDLJObcGbsaSoftcoreSupplementary1Sim (called in GpuNonbondedSoftcore.cpp)
-    kCalculateCDLJObcGbsaSoftcoreForces1(gpu);
-    if( debug && log ){
-        (void) fprintf( log, "\n%s: calling %s\n", methodName.c_str(),
-                        getQuinticScaling() ? "kReduceGBVIBornForcesQuinticScaling" : "kReduceObcGbsaBornForces" );
-        (void) fflush( log );
    }
-    // compute Born forces
+    kCalculateCDLJObcGbsaSoftcoreForces1( freeEnergyGpu );
-    if( getQuinticScaling() ){
-        // In kCalculateGBVISoftcoreBornSum.cu
+    if( quinticScaling ){
+        kReduceGBVIBornForcesQuinticScaling(freeEnergyGpu);
-        kReduceGBVIBornForcesQuinticScaling(gpu);
    } else {
+        kReduceGBVISoftcoreBornForces( freeEnergyGpu );
-        // In kCalculateGBVISoftcoreBornSum.cu
-        kReduceGBVISoftcoreBornForces(gpu);
-    }
-    if( debug && log ){
-        (void) fprintf( log, "\n%s: calling kCalculateGBVIForces2\n", methodName.c_str() );
-        (void) fflush( log );
    }
    // second loop of GB/VI forces
-    // In kCalculateGBVISoftcoreForces2.cu (SetCalculateGBVISoftcoreForces2Sim)
+    kCalculateGBVISoftcoreForces2( freeEnergyGpu );
+    if( data.getLog() ){
+        kPrintGBVISoftcore( freeEnergyGpu, "Post kCalculateGBVISoftcoreForces2", 0, data.getLog() );
+    }
-    kCalculateGBVISoftcoreForces2(gpu);
-//kPrintForces( gpu, "Post GBVISoftcoreForces2", call );
    return 0.0;
 }
-int CudaFreeEnergyCalcGBVISoftcoreForceKernel::getQuinticScaling( void ) const {
-// ---------------------------------------------------------------------------------------
-   //static const std::string methodName      = "CudaFreeEnergyCalcGBVISoftcoreForceKernel::getQuinticScaling";
-// ---------------------------------------------------------------------------------------
-    return quinticScaling;
-}
-void CudaFreeEnergyCalcGBVISoftcoreForceKernel::setQuinticScaling( int inputQuinticScaling) {
-// ---------------------------------------------------------------------------------------
-   //static const std::string methodName      = "CudaFreeEnergyCalcGBVISoftcoreForceKernel::setQuinticScaling";
-// ---------------------------------------------------------------------------------------
-    quinticScaling = inputQuinticScaling;
-}
--- a/plugins/freeEnergy/platforms/cuda/src/CudaFreeEnergyKernels.h
+++ b/plugins/freeEnergy/platforms/cuda/src/CudaFreeEnergyKernels.h
@@ -33,12 +33,7 @@
 #include "openmm/System.h"
 #include "OpenMMFreeEnergy.h"
 #include "openmm/freeEnergyKernels.h"
-#include "kernels/GpuNonbondedSoftcore.h"
+#include "FreeEnergyCudaData.h"
-#include "kernels/GpuLJ14Softcore.h"
-#include "kernels/GpuObcGbsaSoftcore.h"
-#include "kernels/GpuGBVISoftcore.h"
-//#define FreeEnergyDebug
 namespace OpenMM {
@@ -46,25 +41,22 @@ namespace OpenMM {
 * This kernel is invoked by NonbondedSoftcoreForce to calculate the forces acting on the system.
 */
 class CudaFreeEnergyCalcNonbondedSoftcoreForceKernel : public CalcNonbondedSoftcoreForceKernel {
 public:
-    CudaFreeEnergyCalcNonbondedSoftcoreForceKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data, System& system) :
+    CudaFreeEnergyCalcNonbondedSoftcoreForceKernel(std::string name, const Platform& platform, FreeEnergyCudaData& data, System& system) :
             CalcNonbondedSoftcoreForceKernel(name, platform), data(data), system(system) {
-        gpuNonbondedSoftcore = NULL;
-        gpuLJ14Softcore      = NULL;
-#ifdef FreeEnergyDebug
-        log                  = stderr;
-#else
-        log                  = NULL;
-#endif
-        setSim               = 0;
        numExceptions        = 0;
        numParticles         = 0;
        bIncludeGBSA         = false;
        bIncludeGBVI         = false;
        includeSoftcore      = false;
+        log                  = NULL;
+        data.incrementKernelCount();
    }
    ~CudaFreeEnergyCalcNonbondedSoftcoreForceKernel();
    /**
     * Initialize the kernel.
     * 
@@ -123,30 +115,16 @@ public:
     * @return number of exceptions
     */
    int getNumExceptions( void ) const;
-    /**
-     * Get GpuLJ14Softcore
-     *
-     * @return GpuLJ14Softcore object
-     */
-    GpuLJ14Softcore* getGpuLJ14Softcore( void ) const;
-    /**
-     * Set GpuLJ14Softcore
-     *
-     * @param GpuLJ14Softcore object
-     */
-    void setGpuLJ14Softcore( GpuLJ14Softcore* gpuLJ14Softcore );
 private:
-    CudaPlatform::PlatformData& data;
+    FreeEnergyCudaData& data;
+    class ForceInfo;
    int numParticles;
    System& system;
-    GpuNonbondedSoftcore* gpuNonbondedSoftcore;
-    GpuLJ14Softcore* gpuLJ14Softcore;
    bool bIncludeGBSA;
    bool bIncludeGBVI;
    int includeSoftcore;
    int numExceptions;
    FILE* log;
-    int setSim;
 };
 /**
@@ -154,15 +132,10 @@ private:
 */
 class CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel : public CalcGBSAOBCSoftcoreForceKernel {
 public:
-    CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data) :
+    CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel(std::string name, const Platform& platform, FreeEnergyCudaData& data) :
       CalcGBSAOBCSoftcoreForceKernel(name, platform), data(data) {
-#ifdef FreeEnergyDebug
-        log                  = stderr;
-#else
        log                  = NULL;
-#endif
+        data.incrementKernelCount();
-        setSim               = 0;
-        gpuObcGbsaSoftcore   = NULL;
    }
    ~CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel();
    /**
@@ -182,10 +155,9 @@ public:
     */
    double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
 private:
-    CudaPlatform::PlatformData& data;
+    FreeEnergyCudaData& data;
+    class ForceInfo;
    FILE* log;
-    int setSim;
-    GpuObcGbsaSoftcore* gpuObcGbsaSoftcore;
 };
 /**
@@ -193,16 +165,12 @@ private:
 */
 class CudaFreeEnergyCalcGBVISoftcoreForceKernel : public CalcGBVISoftcoreForceKernel {
 public:
-    CudaFreeEnergyCalcGBVISoftcoreForceKernel(std::string name, const Platform& platform, CudaPlatform::PlatformData& data) :
+    CudaFreeEnergyCalcGBVISoftcoreForceKernel(std::string name, const Platform& platform, FreeEnergyCudaData& data) :
         CalcGBVISoftcoreForceKernel(name, platform), data(data) {
-#ifdef FreeEnergyDebug
-        log                  = stderr;
-#else
        log                  = NULL;
-#endif
-        setSim               = 0;
        quinticScaling       = 0;
-        gpuGBVISoftcore      = NULL;
+        data.incrementKernelCount();
    }
    ~CudaFreeEnergyCalcGBVISoftcoreForceKernel();
@@ -224,25 +192,10 @@ public:
     */
    double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
-    /**
-     * Apply quintic scaling for Born radii
-     * 
-     * @return nonzero value if scaling is to be applied
-     */
-    int getQuinticScaling(void) const;
-    /**
-     * Set flag for quintic scaling for Born radii
-     * 
-     * @param nonzero value if scaling is to be applied
-     */
-    void setQuinticScaling(int quinticScaling );
 private:
-    CudaPlatform::PlatformData& data;
+    FreeEnergyCudaData& data;
-    GpuGBVISoftcore* gpuGBVISoftcore;
+    class ForceInfo;
    FILE* log;
-    int setSim;
    int quinticScaling;
 };

--- a/plugins/freeEnergy/platforms/cuda/src/kernels/GpuGBVISoftcore.h
+++ b/plugins/freeEnergy/platforms/cuda/src/kernels/GpuGBVISoftcore.h
-#ifndef OPENMM_FREE_ENERGY_GPU_GBVI_SOFTCORE_
+/* -------------------------------------------------------------------------- *
-#define OPENMM_FREE_ENERGY_GPU_GBVI_SOFTCORE_
+ *                               OpenMMFreeEnergy                                 *
-/* -------------------------------------------------------------------------- *
+ * -------------------------------------------------------------------------- *
- *                                   OpenMM                                   *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
- * -------------------------------------------------------------------------- *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
- * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ *                                                                            *
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ * Portions copyright (c) 2008-2009 Stanford University and the Authors.      *
- *                                                                            *
+ * Authors:                                                                   *
- * Portions copyright (c) 2009 Stanford University and the Authors.           *
+ * Contributors:                                                              *
- * Authors: Scott Le Grand, Peter Eastman                                     *
+ *                                                                            *
- * Contributors:                                                              *
+ * This program is free software: you can redistribute it and/or modify       *
- *                                                                            *
+ * it under the terms of the GNU Lesser General Public License as published   *
- * This program is free software: you can redistribute it and/or modify       *
+ * by the Free Software Foundation, either version 3 of the License, or       *
- * it under the terms of the GNU Lesser General Public License as published   *
+ * (at your option) any later version.                                        *
- * by the Free Software Foundation, either version 3 of the License, or       *
+ *                                                                            *
- * (at your option) any later version.                                        *
+ * This program is distributed in the hope that it will be useful,            *
- *                                                                            *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
- * This program is distributed in the hope that it will be useful,            *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
- * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+ * GNU Lesser General Public License for more details.                        *
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+ *                                                                            *
- * GNU Lesser General Public License for more details.                        *
+ * You should have received a copy of the GNU Lesser General Public License   *
- *                                                                            *
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
- * You should have received a copy of the GNU Lesser General Public License   *
+ * -------------------------------------------------------------------------- */
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
- * -------------------------------------------------------------------------- */
+#include "FreeEnergyCudaData.h"
+#include "openmm/OpenMMException.h"
-#include "gputypes.h"
+#include <sstream>
-#include "cudatypes.h"
-#include "openmm/OpenMMException.h"
+extern "C" void removeFreeEnergyCudaDataFromContextMap( void* context ); 
-// info related to nonbonded softcore
+namespace OpenMM {
-class GpuGBVISoftcore {
+FreeEnergyCudaData::FreeEnergyCudaData( CudaPlatform::PlatformData& data ) : cudaPlatformData(data) {
-    public:
+    kernelCount                   = 0;
+    freeEnergyGpu                 = freeEnergyGpuInit( cudaPlatformData.gpu );
-   /** 
-     * This is an enumeration of the different methods that may be used for scaling of the Born radii.
+    log                           = NULL;
-     */
+    contextImpl                   = NULL;
-        /**
+    gpuInitialized                = false;
-         * No scaling method is applied.
-         */
+    boxDimensions[0]              = 0.0;
-        static const int NoScaling          = 0;
+    boxDimensions[1]              = 0.0;
-        /**
+    boxDimensions[2]              = 0.0;
-         * Use the method outlined in Proteins 55, 383-394 (2004), Eq. 6
+}   
-         */
-        static const int Tanh               = 1;
+FreeEnergyCudaData::~FreeEnergyCudaData() {
-        /**
+    if( getLog() ){
-         * Use quintic spline scaling function
+        (void) fprintf( getLog(), "~FreeEnergyCudaData called kernelCount=%d\n", kernelCount );
-         */
+        (void) fflush( getLog() );
-        static const int QuinticSpline      = 2;
+    }   
+    freeEnergyGpuShutDown( freeEnergyGpu );
-        GpuGBVISoftcore();
+}
-        ~GpuGBVISoftcore();
+void FreeEnergyCudaData::decrementKernelCount( void ) {
-        /** 
-         * Set softcore value
+    kernelCount--;
-         */
+    if( getLog() ){
+        (void) fprintf( getLog(), "~reeEnergyCudaData decrementKernelCount called. %d\n", kernelCount );
-        int setSoftCoreLambda( float softCoreLambda );
+        (void) fflush( getLog() );
+    }   
-        /** 
+    if( kernelCount == 0 && contextImpl != NULL ){
-         * Get softcore value
+        removeFreeEnergyCudaDataFromContextMap( contextImpl );
-         */
+        freeEnergyGpuShutDown( freeEnergyGpu );
+    }
-        float getSoftCoreLambda( void ) const;
+}
-        /** 
+void FreeEnergyCudaData::incrementKernelCount( void ) {
-         * Set quintic lower limit factor value
+    kernelCount++;
-         */
+}
-        int setQuinticLowerLimitFactor( float quinticLowerLimitFactor );
+freeEnergyGpuContext FreeEnergyCudaData::getFreeEnergyGpu( void ) const {
+    return freeEnergyGpu;
-        /** 
+}
-         * Get quintic lower limit factor value
-         */
+void FreeEnergyCudaData::setLog( FILE* inputLog ) {
+    log            = inputLog;
-        float getQuinticLowerLimitFactor( void ) const;
+    freeEnergyGpu->log = inputLog;
+}
-        /** 
-         * Set quintic upper limit value
+FILE* FreeEnergyCudaData::getLog( void ) const {
-         */
+    return log;
+}
-        int setQuinticUpperLimit( float quinticUpperLimit );
+void FreeEnergyCudaData::setContextImpl( void* inputContextImpl ) {
-        /** 
+    contextImpl = inputContextImpl;
-         * Get quintic upper limit value
+}
-         */
+void FreeEnergyCudaData::initializeGpu( void ) {
-        float getQuinticUpperLimit( void ) const;
+    if( !gpuInitialized ){
-        /** 
-         * Get Born radii scaling method
+        gpuContext gpu = freeEnergyGpu->gpuContext;
-         */
+        if( freeEnergyGpu->freeEnergySim.nonbondedCutoff != gpu->sim.nonbondedCutoff ){
-        int getBornRadiiScalingMethod( void ) const;
+            std::stringstream msg;
+            msg << "The softcore non-bonded cutoff=" << freeEnergyGpu->freeEnergySim.nonbondedCutoff;
-        /** 
+            msg << "does not agree with the non-softcore cutoff= " << gpu->sim.nonbondedCutoff;
-         * Set Born radii scaling method
+            throw OpenMM::OpenMMException( msg.str() );
-         */
+        }
+/*
-        int setBornRadiiScalingMethod( int bornRadiiScalingMethod );
+        freeEnergyGpuBuildOutputBuffers( freeEnergyGpu, getHasFreeEnergyGeneralizedKirkwood() );
+        freeEnergyGpuBuildThreadBlockWorkList( freeEnergyGpu );
-        // initialize SoftCoreLJLambda particle array
+        boxDimensions[0] = freeEnergyGpu->gpuContext->sim.periodicBoxSizeX;
-        int initializeGpuSwitchDerivative( unsigned int numberOfParticles );
+        boxDimensions[1] = freeEnergyGpu->gpuContext->sim.periodicBoxSizeY;
+        boxDimensions[2] = freeEnergyGpu->gpuContext->sim.periodicBoxSizeZ;
-       /** 
+*/
-         * Get address for switch derivative array
+        gpuBuildExclusionList( gpu );
-         * 
+        gpuSetConstants( gpu );
-         * @return address
+        freeEnergyGpuSetConstants( freeEnergyGpu );
-         */
+        gpuInitialized   = true;
-         float* getGpuSwitchDerivative( void ) const;
+        if( log ){
+            //gpuPrintCudaFreeEnergyGmxSimulation( freeEnergyGpu, getLog() );
-       /** 
+            (void) fprintf( log, "FreeEnergyCudaGpu initialized kernelCount=%d\n", kernelCount );
-         * Get switch derivative array
+            (void) fflush( log );
-         * 
+        }
-         * @return address
-         */
+    } else {
+/*
-         CUDAStream<float>* getSwitchDerivative( void ) const;
+        if( boxDimensions[0] != freeEnergyGpu->gpuContext->sim.periodicBoxSizeX ||
+            boxDimensions[1] != freeEnergyGpu->gpuContext->sim.periodicBoxSizeY ||
-        /** 
+            boxDimensions[2] != freeEnergyGpu->gpuContext->sim.periodicBoxSizeZ ){
-         * Upload data
+            freeEnergyGpuSetConstants( freeEnergyGpu, 1 );
-         * 
-         * @return 0 always
+            boxDimensions[0] = freeEnergyGpu->gpuContext->sim.periodicBoxSizeX;
-         */
+            boxDimensions[1] = freeEnergyGpu->gpuContext->sim.periodicBoxSizeY;
+            boxDimensions[2] = freeEnergyGpu->gpuContext->sim.periodicBoxSizeZ;
-        int upload( gpuContext gpu );
+        }
+*/
-    private:
-       float _quinticLowerLimitFactor;
+    }
-       float _quinticUpperLimit;
-       unsigned int _bornRadiiScalingMethod;
+    return;
-       CUDAStream<float>*  _psSwitchDerivative;
+}
-};
+}
-#endif // OPENMM_FREE_ENERGY_GPU_GBVI_SOFTCORE_
--- a/plugins/freeEnergy/platforms/cuda/src/kernels/GpuObcGbsaSoftcore.h
+++ b/plugins/freeEnergy/platforms/cuda/src/kernels/GpuObcGbsaSoftcore.h
-#ifndef OPENMM_FREE_ENERGY_GPU_OBC_GBSA_SOFTCORE_
+#ifndef FREE_ENERGY_CUDA_DATA_H_
-#define OPENMM_FREE_ENERGY_GPU_OBC_GBSA_SOFTCORE_
+#define FREE_ENERGY_CUDA_DATA_H_
 /* -------------------------------------------------------------------------- *
- *                                   OpenMM                                   *
+ *                              OpenMMFreeEnergy                                  *
 * -------------------------------------------------------------------------- *
 * This is part of the OpenMM molecular simulation toolkit originating from   *
 * Simbios, the NIH National Center for Physics-Based Simulation of           *
 * Biological Structures at Stanford, funded under the NIH Roadmap for        *
 * Medical Research, grant U54 GM072970. See https://simtk.org.               *
 *                                                                            *
- * Portions copyright (c) 2009 Stanford University and the Authors.           *
+ * Portions copyright (c) 2008 Stanford University and the Authors.           *
- * Authors: Scott Le Grand, Peter Eastman                                     *
+ * Authors:                                                                   *
 * Contributors:                                                              *
 *                                                                            *
 * This program is free software: you can redistribute it and/or modify       *
 * it under the terms of the GNU Lesser General Public License as published   *
 * by the Free Software Foundation, either version 3 of the License, or       *
 * (at your option) any later version.                                        *
 *                                                                            *
 * This program is distributed in the hope that it will be useful,            *
 * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
 * GNU Lesser General Public License for more details.                        *
 *                                                                            *
 * You should have received a copy of the GNU Lesser General Public License   *
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
 * -------------------------------------------------------------------------- */
-#include "gputypes.h"
+#include "CudaPlatform.h"
+#include "kernels/freeEnergyGpuTypes.h"
-// info related to nonbonded softcore
+#include "kernels/cudaKernels.h"
+#include "openmm/KernelImpl.h"
-class GpuObcGbsaSoftcore {
+namespace OpenMM {
-    public:
+/**
-        GpuObcGbsaSoftcore();
+ * Free energy Cuda data
-        ~GpuObcGbsaSoftcore();
+ */
+class FreeEnergyCudaData {
-        /** 
-         * Initialize NonPolarScalingFactors array
+public:
-         * 
-         * @param numberOfParticles number of particles
+    FreeEnergyCudaData( CudaPlatform::PlatformData& data );
-         *
+    ~FreeEnergyCudaData();
-         * @return 0 always
-         */
+    /**
+     * Increment kernel count
-        int initializeNonPolarScalingFactors( unsigned int numberOfParticles );
+     * 
+     */
-        /** 
+    void incrementKernelCount( void );
-         * Upload data
-         * 
+    /**
-         * @param implicitSolvent set if implicit solvent is included in system
+     * Decrement kernel count
-         *
+     * 
-         * @return 0 always
+     */
-         */
+    void decrementKernelCount( void );
-        int upload( gpuContext gpu );
+    /**
+     * Return freeEnergyGpuContext context
-        /** 
+     * 
-         * Set nonPolarScalingFactor entry
+     * @return freeEnergyGpuContext
-         * 
+     */
-         * @param particleIndex             index of particle
+    freeEnergyGpuContext OPENMMCUDA_EXPORT getFreeEnergyGpu( void ) const;
-         * @param nonPolarScalingFactor     nonPolarScalingFactor value
-         *
+    /**
-         * @return 0 always
+     * Set log file reference
-         */
+     * 
+     * @param log file reference; if not set, then no logging
-        int setNonPolarScalingFactors( unsigned int particleIndex, float nonPolarScalingFactor );
+     */
+    void setLog( FILE* inputLog );
-        /** 
-         * Get address for NonPolarScalingFactors array on board
+    /**
-         * 
+     * Get log file reference
-         * @return address
+     * 
-         */
+     * @return log file reference
+     */
-         float* getGpuNonPolarScalingFactors( void ) const;
+    FILE* getLog( void ) const;
-    private:
+    /**
+     * if gpuInitialized is false, write data to board
-       CUDAStream<float>*  _psNonPolarScalingFactors;
+     * 
+     * @param log file reference; if not set, then no logging
-};
+     */
+    void initializeGpu( void ); 
-#endif // OPENMM_FREE_ENERGY_GPU_OBC_GBSA_SOFTCORE_
+    /**
+     * Set contextImpl
+     * 
+     * @param contextImpl reference
+     */
+    void setContextImpl( void* contextImpl ); 
+    CudaPlatform::PlatformData& cudaPlatformData;
+private:
+    freeEnergyGpuContext freeEnergyGpu;
+    unsigned int kernelCount;
+    void* contextImpl;
+    FILE* log;
+    bool gpuInitialized;
+    double boxDimensions[3];
+};
+} // namespace OpenMM
+#endif /*FREE_ENERGY_CUDA_DATA_H_*/
--- a/plugins/freeEnergy/platforms/cuda/src/kernels/GpuFreeEnergyCudaKernels.h
+++ b/plugins/freeEnergy/platforms/cuda/src/kernels/GpuFreeEnergyCudaKernels.h
@@ -28,11 +28,8 @@
 * -------------------------------------------------------------------------- */
 #include "gputypes.h"
+#include "freeEnergyGpuTypes.h"
 #include "cudatypes.h"
-#include "GpuNonbondedSoftcore.h"
-#include "GpuLJ14Softcore.h"
-#include "GpuObcGbsaSoftcore.h"
-#include "GpuGBVISoftcore.h"
 #include <vector>
 #include <cuda.h>
@@ -43,78 +40,55 @@
 // setup methods called from CudaFreeEnergyKernels
 // nonbonded and 1-4 ixns
-extern "C"
+extern "C" bool gpuIsAvailableSoftcore();
-bool gpuIsAvailableSoftcore();
-extern "C" 
+extern "C" void freeEnergyGpuSetPeriodicBoxSize( freeEnergyGpuContext gpu, float xsize, float ysize, float zsize);
-GpuNonbondedSoftcore* gpuSetNonbondedSoftcoreParameters(gpuContext gpu, float epsfac, const std::vector<int>& atom, const std::vector<float>& c6,
-                                                        const std::vector<float>& c12, const std::vector<float>& q,
-                                                        const std::vector<float>& softcoreLJLambdaArray, const std::vector<char>& symbol,
-                                                        const std::vector<std::vector<int> >& exclusions, CudaNonbondedMethod method);
-extern "C"
+extern "C" void gpuSetNonbondedSoftcoreParameters( freeEnergyGpuContext gpu, float epsfac, const std::vector<int>& atom, const std::vector<float>& c6,
-GpuLJ14Softcore* gpuSetLJ14SoftcoreParameters(gpuContext gpu, float epsfac, float fudge, const std::vector<int>& atom1,
+                                                   const std::vector<float>& c12, const std::vector<float>& q,
-                                              const std::vector<int>& atom2, const std::vector<float>& c6, const std::vector<float>& c12,
+                                                   const std::vector<float>& softcoreLJLambdaArray, const std::vector<char>& symbol,
-                                              const std::vector<float>& q1, const std::vector<float>& q2, const std::vector<float>& softcoreLJLambdaArray);
+                                                   const std::vector<std::vector<int> >& exclusions, CudaFreeEnergyNonbondedMethod method,
+                                                   float cutoffDistance, float reactionFieldDielectric);
-// delete supplemtentary objects, ...
-extern "C"
+extern "C" void gpuSetLJ14SoftcoreParameters( freeEnergyGpuContext gpu, float epsfac, const std::vector<int>& atom1,
-void gpuDeleteNonbondedSoftcoreParameters( void* gpuNonbondedSoftcore);
+                                              const std::vector<int>& atom2, const std::vector<float>& c6, const std::vector<float>& c12,
+                                              const std::vector<float>& qProd, const std::vector<float>& softcoreLJLambdaArray);
 // write address's to device
-extern "C"
+extern "C" void SetCalculateCDLJSoftcoreGpuSim( freeEnergyGpuContext gpu );
-void SetCalculateCDLJSoftcoreGpuSim( gpuContext gpu );
-extern "C"
-void SetCalculateCDLJSoftcoreSupplementarySim( float* gpuParticleSoftCoreLJLambda);
-extern "C"
+extern "C" void SetCalculateLocalSoftcoreGpuSim( freeEnergyGpuContext gpu );
-void SetCalculateLocalSoftcoreGpuSim( gpuContext gpu );
 // kernel calls to device
-extern "C"
+extern "C" void kCalculateCDLJSoftcoreForces( freeEnergyGpuContext gpu );
-void kCalculateCDLJSoftcoreForces(gpuContext gpu );
-extern void kCalculateLocalSoftcoreForces( gpuContext gpu );
+extern "C" void kCalculateLocalSoftcoreForces( freeEnergyGpuContext gpu );
 // GB/VI softcore
 // setup method called from CudaFreeEnergyKernels
-extern "C" 
+extern "C" void gpuSetGBVISoftcoreParameters( freeEnergyGpuContext gpu, float innerDielectric, float solventDielectric, const std::vector<int>& atom, const std::vector<float>& radius, 
-GpuGBVISoftcore* gpuSetGBVISoftcoreParameters(gpuContext gpu, float innerDielectric, float solventDielectric, const std::vector<int>& atom, const std::vector<float>& radius, 
                                              const std::vector<float>& gamma, const std::vector<float>& scaledRadii,
                                              const std::vector<float>& bornRadiusScaleFactors, const std::vector<float>& quinticSplineParameters);
 // write address's to device
-extern "C"
+extern "C" void SetCalculateGBVISoftcoreForcesSim( gpuContext gpu, float* softCoreLJLambda);
-void SetCalculateGBVISoftcoreForcesSim( gpuContext gpu, float* softCoreLJLambda);
+extern "C" void SetCalculateGBVISoftcoreBornSumGpuSim( freeEnergyGpuContext gpu );
+extern "C" void SetCalculateGBVISoftcoreForces2Sim( freeEnergyGpuContext gpu);
-extern "C"
-void SetCalculateGBVISoftcoreBornSumGpuSim( gpuContext gpu);
-extern "C"
-void SetCalculateGBVISoftcoreSupplementarySim( GpuGBVISoftcore* gpuGBVISoftcore );
-extern "C"
-void SetCalculateGBVISoftcoreForces2Sim(gpuContext gpu);
-extern "C"
-void GetCalculateGBVISoftcoreForces2Sim(gpuContext gpu);
 // kernel calls to device
-extern void kReduceGBVIBornSumQuinticScaling( gpuContext gpu, GpuGBVISoftcore* gpuGBVISoftcore );
+extern void kReduceGBVIBornSumQuinticScaling( freeEnergyGpuContext gpu );
-extern void kCalculateGBVISoftcoreBornSum( gpuContext gpu );
+extern void kCalculateGBVISoftcoreBornSum( freeEnergyGpuContext gpu );
-extern void kReduceGBVIBornForcesQuinticScaling( gpuContext gpu );
+extern void kReduceGBVIBornForcesQuinticScaling( freeEnergyGpuContext gpu );
-extern void kCalculateGBVISoftcoreForces2( gpuContext gpu );
+extern void kCalculateGBVISoftcoreForces2( freeEnergyGpuContext gpu );
-extern void kReduceGBVISoftcoreBornForces(gpuContext gpu);
+extern void kReduceGBVISoftcoreBornForces( freeEnergyGpuContext gpu);
-extern void kReduceGBVISoftcoreBornSum(gpuContext gpu);
+extern void kReduceGBVISoftcoreBornSum( freeEnergyGpuContext gpu);
-extern void kPrintGBVISoftcore(gpuContext gpu, GpuGBVISoftcore* gpuGBVISoftcore, std::string callId, int call);
+extern void kPrintGBVISoftcore( freeEnergyGpuContext gpu, std::string callId, int call, FILE* log);
 extern void kClearSoftcoreBornForces(gpuContext gpu);
@@ -135,53 +109,34 @@ extern void kClearSoftcoreBornForces(gpuContext gpu);
 *
 */
-extern "C" 
+extern "C" void gpuSetObcSoftcoreParameters( freeEnergyGpuContext gpu, float innerDielectric, float solventDielectric, float nonPolarPrefactor,
-GpuObcGbsaSoftcore* gpuSetObcSoftcoreParameters(gpuContext gpu, float innerDielectric, float solventDielectric, float nonPolarPrefactor,
+                                             const std::vector<float>& radius, const std::vector<float>& scale,
-                                                const std::vector<float>& radius, const std::vector<float>& scale,
+                                             const std::vector<float>& charge, const std::vector<float>& nonPolarScalingFactors);
-                                                const std::vector<float>& charge, const std::vector<float>& nonPolarScalingFactors);
-// delete supplemtentary objects, ...
-extern "C"
-void gpuDeleteObcSoftcoreParameters( void* gpuNonbondedSoftcore);
 // write address's to device
-extern "C"
+extern "C" void SetCalculateObcGbsaSoftcoreBornSumSim( freeEnergyGpuContext gpu );
-void SetCalculateObcGbsaSoftcoreBornSumSim( gpuContext gpu );
-extern "C"
-void SetCalculateObcGbsaSoftcoreNonPolarScalingFactorsSim( float* nonPolarScalingFactors );
-extern "C"
-void SetCalculateObcGbsaSoftcoreNonPolarScalingFactorsObc2Sim( float* nonPolarScalingFactors );
 // this method and kCalculateObcGbsaSoftcoreForces2() are being
 // used until changes in OpenMM version are made 
-extern "C"
+extern "C" void SetCalculateObcGbsaSoftcoreForces2Sim( freeEnergyGpuContext gpu );
-void SetCalculateObcGbsaSoftcoreForces2Sim( gpuContext gpu );
 // kernel calls to device
 extern void kClearObcGbsaSoftcoreBornSum( gpuContext gpu );
 extern void kReduceObcGbsaSoftcoreBornForces( gpuContext gpu );
-extern void kCalculateObcGbsaSoftcoreBornSum( gpuContext gpu );
+extern void kCalculateObcGbsaSoftcoreBornSum( freeEnergyGpuContext gpu );
 extern void kReduceObcGbsaSoftcoreBornSum( gpuContext gpu );
 // this method is not needed; the OpenMM version can be used
-extern void kCalculateObcGbsaSoftcoreForces2( gpuContext gpu );
+extern void kCalculateObcGbsaSoftcoreForces2( freeEnergyGpuContext gpu );
-extern void kPrintForces( gpuContext gpu, std::string idString, int call );
+extern void kPrintObcGbsaSoftcore( freeEnergyGpuContext gpu, std::string callId, int call, FILE* log);
 // shared
-extern "C"
+extern "C" void SetCalculateCDLJObcGbsaSoftcoreGpu1Sim( freeEnergyGpuContext gpu );
-void SetCalculateCDLJObcGbsaSoftcoreGpu1Sim( gpuContext gpu );
+extern void kCalculateCDLJObcGbsaSoftcoreForces1( freeEnergyGpuContext gpu );
-extern "C"
-void SetCalculateCDLJObcGbsaSoftcoreSupplementary1Sim( float* gpuParticleSoftCoreLJLambda);
-extern void kCalculateCDLJObcGbsaSoftcoreForces1( gpuContext gpu );
+extern "C" void showWorkUnitsFreeEnergy( freeEnergyGpuContext freeEnergyGpu, int interactingWorkUnit );
 #endif //__GPU_FREE_ENERGY_KERNELS_H__
--- a/plugins/freeEnergy/platforms/cuda/src/kernels/GpuGBVISoftcore.cpp
+++ b/plugins/freeEnergy/platforms/cuda/src/kernels/GpuGBVISoftcore.cpp
-/* -------------------------------------------------------------------------- *
- *                                   OpenMM                                   *
- * -------------------------------------------------------------------------- *
- * This is part of the OpenMM molecular simulation toolkit originating from   *
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
- *                                                                            *
- * Portions copyright (c) 2009 Stanford University and the Authors.           *
- * Authors: Scott Le Grand, Peter Eastman                                     *
- * Contributors:                                                              *
- *                                                                            *
- * This program is free software: you can redistribute it and/or modify       *
- * it under the terms of the GNU Lesser General Public License as published   *
- * by the Free Software Foundation, either version 3 of the License, or       *
- * (at your option) any later version.                                        *
- *                                                                            *
- * This program is distributed in the hope that it will be useful,            *
- * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
- * GNU Lesser General Public License for more details.                        *
- *                                                                            *
- * You should have received a copy of the GNU Lesser General Public License   *
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
- * -------------------------------------------------------------------------- */
-#include "GpuGBVISoftcore.h"
-#include "GpuFreeEnergyCudaKernels.h"
-// GpuGBVISoftcore constructor
-GpuGBVISoftcore::GpuGBVISoftcore( ){
-    _bornRadiiScalingMethod   = 0;
-    _quinticLowerLimitFactor  = 0.8f;
-    _quinticUpperLimit        = 0.008f;
-    _psSwitchDerivative       = NULL;
-}
-// GpuGBVISoftcore destructor
-GpuGBVISoftcore::~GpuGBVISoftcore( ){
-    delete _psSwitchDerivative;
-}
-// set quintic lower limit factor value
-int GpuGBVISoftcore::setQuinticLowerLimitFactor( float inputQuinticLowerLimitFactor ){
-    _quinticLowerLimitFactor = inputQuinticLowerLimitFactor;
-    return 0;
-}
-// get quintic lower limit factor value
-float GpuGBVISoftcore::getQuinticLowerLimitFactor( void ) const {
-    return _quinticLowerLimitFactor;
-}
-// set quintic upper limit value
-int GpuGBVISoftcore::setQuinticUpperLimit( float inputQuinticUpperLimit ){
-    _quinticUpperLimit = inputQuinticUpperLimit;
-    return 0;
-}
-// get quintic upper limit value
-float GpuGBVISoftcore::getQuinticUpperLimit( void ) const {
-    return _quinticUpperLimit;
-}
-// get Born radii scaling method
-int GpuGBVISoftcore::getBornRadiiScalingMethod( void ) const {
-    return _bornRadiiScalingMethod;
-}
-// set Born radii scaling method
-int GpuGBVISoftcore::setBornRadiiScalingMethod( int inputBornRadiiScalingMethod ){
-    _bornRadiiScalingMethod = inputBornRadiiScalingMethod;
-    return 0;
-}
-// get address for SwitchDerivative array on board
-float* GpuGBVISoftcore::getGpuSwitchDerivative( void ) const {
-    return _psSwitchDerivative->_pDevStream[0];
-}
-// get SwitchDerivative array 
-CUDAStream<float>* GpuGBVISoftcore::getSwitchDerivative( void ) const {
-    return _psSwitchDerivative;
-}
-// initialize SwitchDerivative array
-int GpuGBVISoftcore::initializeGpuSwitchDerivative( unsigned int numberOfParticles ){
-    _psSwitchDerivative = new CUDAStream<float>( numberOfParticles, 1, "SwitchDerivative");
-    for( unsigned int ii = 0; ii < numberOfParticles; ii++ ){
-        (*_psSwitchDerivative)[ii] = 1.0f;
-    }   
-    return 0;
-}
-// upload SoftCoreLambda array
-int GpuGBVISoftcore::upload( gpuContext gpu ){
-    if( getBornRadiiScalingMethod() > 0 ){
-        SetCalculateGBVISoftcoreSupplementarySim( this );
-    }
-    return 0;
-}
--- a/plugins/freeEnergy/platforms/cuda/src/kernels/GpuNonbondedSoftcore.cpp
+++ b/plugins/freeEnergy/platforms/cuda/src/kernels/GpuNonbondedSoftcore.cpp
-/* -------------------------------------------------------------------------- *
- *                                   OpenMM                                   *
- * -------------------------------------------------------------------------- *
- * This is part of the OpenMM molecular simulation toolkit originating from   *
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
- *                                                                            *
- * Portions copyright (c) 2009 Stanford University and the Authors.           *
- * Authors: Scott Le Grand, Peter Eastman                                     *
- * Contributors:                                                              *
- *                                                                            *
- * This program is free software: you can redistribute it and/or modify       *
- * it under the terms of the GNU Lesser General Public License as published   *
- * by the Free Software Foundation, either version 3 of the License, or       *
- * (at your option) any later version.                                        *
- *                                                                            *
- * This program is distributed in the hope that it will be useful,            *
- * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
- * GNU Lesser General Public License for more details.                        *
- *                                                                            *
- * You should have received a copy of the GNU Lesser General Public License   *
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
- * -------------------------------------------------------------------------- */
-#include "GpuNonbondedSoftcore.h"
-#include "GpuFreeEnergyCudaKernels.h"
-// GpuNonbondedSoftcore constructor
-GpuNonbondedSoftcore::GpuNonbondedSoftcore( ){
-    _softcoreLJLambda     = 1.0f;
-    _psSoftcoreLJLambda   = NULL;
-}
-GpuNonbondedSoftcore::~GpuNonbondedSoftcore( ){
-    delete _psSoftcoreLJLambda;
-}
-// set global softCoreLJLambda
-int GpuNonbondedSoftcore::setSoftCoreLJLambda( float softCoreLJLambda ){
-    _softcoreLJLambda  = softCoreLJLambda;
-    return 0;
-}
-// get global softCoreLJLambda
-float GpuNonbondedSoftcore::getSoftCoreLJLambda( void ) const {
-    return _softcoreLJLambda;
-}
-// initialize SoftCoreLJLambda particle array
-int GpuNonbondedSoftcore::initializeParticleSoftCoreLJLambda( unsigned int numberOfParticles ){
-    _psSoftcoreLJLambda = new CUDAStream<float>( numberOfParticles, 1, "SoftcoreLJLambda");
-    for( unsigned int ii = 0; ii < numberOfParticles; ii++ ){
-        (*_psSoftcoreLJLambda)[ii] = 1.0f;
-    }
-    return 0;
-}
-// set entry in SoftCoreLJLambda particle array
-int GpuNonbondedSoftcore::setParticleSoftCoreLJLambda( unsigned int particleIndex, float softCoreLJLambda ){
-    (*_psSoftcoreLJLambda)[particleIndex] = softCoreLJLambda;
-    return 0;
-}
-// upload SoftCoreLJLambda array
-int GpuNonbondedSoftcore::upload( gpuContext gpu ){
-// ---------------------------------------------------------------------------------------
-   static const std::string methodName    = "GpuNonbondedSoftcore::upload";
-// ---------------------------------------------------------------------------------------
-    _psSoftcoreLJLambda->Upload();
-#define DUMP_PARAMETERS 0
-#if (DUMP_PARAMETERS == 1)
-    (void) fprintf( stderr, "%s %u %u\n", methodName.c_str(), gpu->natoms, gpu->sim.paddedNumberOfAtoms );
-    for (unsigned int ii = 0; ii < gpu->natoms; ii++)
-    {   
-       (void) fprintf( stderr, "%6u %13.6e\n", ii,  (*_psSoftcoreLJLambda)[ii] );
-    }
-#endif
-    SetCalculateCDLJSoftcoreSupplementarySim( getGpuParticleSoftCoreLJLambda() );
-    SetCalculateCDLJObcGbsaSoftcoreSupplementary1Sim( getGpuParticleSoftCoreLJLambda() );
-    return 0;
-}
-// get address for SoftCoreLJLambda particle array on board
-float* GpuNonbondedSoftcore::getGpuParticleSoftCoreLJLambda( void ) const {
-    return _psSoftcoreLJLambda->_pDevStream[0];
-}
--- a/plugins/freeEnergy/platforms/cuda/src/kernels/GpuNonbondedSoftcore.h
+++ b/plugins/freeEnergy/platforms/cuda/src/kernels/GpuNonbondedSoftcore.h
-#ifndef OPENMM_FREE_ENERGY_GPU_NONBONDED_SOFTCORE_
-#define OPENMM_FREE_ENERGY_GPU_NONBONDED_SOFTCORE_
-/* -------------------------------------------------------------------------- *
- *                                   OpenMM                                   *
- * -------------------------------------------------------------------------- *
- * This is part of the OpenMM molecular simulation toolkit originating from   *
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
- *                                                                            *
- * Portions copyright (c) 2009 Stanford University and the Authors.           *
- * Authors: Scott Le Grand, Peter Eastman                                     *
- * Contributors:                                                              *
- *                                                                            *
- * This program is free software: you can redistribute it and/or modify       *
- * it under the terms of the GNU Lesser General Public License as published   *
- * by the Free Software Foundation, either version 3 of the License, or       *
- * (at your option) any later version.                                        *
- *                                                                            *
- * This program is distributed in the hope that it will be useful,            *
- * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
- * GNU Lesser General Public License for more details.                        *
- *                                                                            *
- * You should have received a copy of the GNU Lesser General Public License   *
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
- * -------------------------------------------------------------------------- */
-#include "gputypes.h"
-// info related to nonbonded softcore
-class GpuNonbondedSoftcore {
-    public:
-        GpuNonbondedSoftcore();
-        ~GpuNonbondedSoftcore();
-        /** 
-         * Set softcore value
-         */
-        int setSoftCoreLJLambda( float softCoreLJLambda );
-        /** 
-         * Get softcore value
-         */
-        float getSoftCoreLJLambda( void ) const;
-        /** 
-         * Initialize ParticleSoftCoreLJLambda array
-         * 
-         * @param numberOfParticles number of particles
-         *
-         * @return 0 always
-         */
-        int initializeParticleSoftCoreLJLambda( unsigned int numberOfParticles );
-        /** 
-         * Upload data
-         * 
-         * @param implicitSolvent set if implicit solvent is included in system
-         *
-         * @return 0 always
-         */
-        int upload( gpuContext gpu );
-        /** 
-         * Set particle softCoreLJLambda entry
-         * 
-         * @param particleIndex     index of particle
-         * @param softCoreLJLambda  softCoreLJLambda value
-         *
-         * @return 0 always
-         */
-        int setParticleSoftCoreLJLambda( unsigned int particleIndex, float softCoreLJLambda );
-        /** 
-         * Get address for SoftCoreLJLambda particle array on board
-         * 
-         * @return address
-         */
-         float* getGpuParticleSoftCoreLJLambda( void ) const;
-    private:
-       float _softcoreLJLambda;
-       CUDAStream<float>*  _psSoftcoreLJLambda;
-};
-#endif // OPENMM_FREE_ENERGY_GPU_NONBONDED_SOFTCORE_
--- a/plugins/freeEnergy/platforms/cuda/src/kernels/GpuObcGbsaSoftcore.cpp
+++ b/plugins/freeEnergy/platforms/cuda/src/kernels/GpuObcGbsaSoftcore.cpp
-/* -------------------------------------------------------------------------- *
- *                                   OpenMM                                   *
- * -------------------------------------------------------------------------- *
- * This is part of the OpenMM molecular simulation toolkit originating from   *
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
- *                                                                            *
- * Portions copyright (c) 2009 Stanford University and the Authors.           *
- * Authors: Scott Le Grand, Peter Eastman                                     *
- * Contributors:                                                              *
- *                                                                            *
- * This program is free software: you can redistribute it and/or modify       *
- * it under the terms of the GNU Lesser General Public License as published   *
- * by the Free Software Foundation, either version 3 of the License, or       *
- * (at your option) any later version.                                        *
- *                                                                            *
- * This program is distributed in the hope that it will be useful,            *
- * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
- * GNU Lesser General Public License for more details.                        *
- *                                                                            *
- * You should have received a copy of the GNU Lesser General Public License   *
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
- * -------------------------------------------------------------------------- */
-#include "GpuObcGbsaSoftcore.h"
-#include "GpuFreeEnergyCudaKernels.h"
-// GpuObcGbsaSoftcore constructor
-GpuObcGbsaSoftcore::GpuObcGbsaSoftcore( ){
-    _psNonPolarScalingFactors   = NULL;
-}
-GpuObcGbsaSoftcore::~GpuObcGbsaSoftcore( ){
-    delete _psNonPolarScalingFactors;
-}
-// initialize NonPolarScalingFactors array
-int GpuObcGbsaSoftcore::initializeNonPolarScalingFactors( unsigned int numberOfParticles ){
-    _psNonPolarScalingFactors = new CUDAStream<float>( numberOfParticles, 1, "ObcSoftcoreNonPolarScaling");
-    for( unsigned int ii = 0; ii < numberOfParticles; ii++ ){
-        (*_psNonPolarScalingFactors)[ii] = 1.0f;
-    }
-    return 0;
-}
-// set entry in NonPolarScalingFactors array
-int GpuObcGbsaSoftcore::setNonPolarScalingFactors( unsigned int particleIndex, float nonPolarScalingFactor ){
-    (*_psNonPolarScalingFactors)[particleIndex] = nonPolarScalingFactor;
-    return 0;
-}
-// upload NonPolarScalingFactors array
-int GpuObcGbsaSoftcore::upload( gpuContext gpu ){
-    _psNonPolarScalingFactors->Upload();
-    SetCalculateObcGbsaSoftcoreNonPolarScalingFactorsSim( getGpuNonPolarScalingFactors() );
-    SetCalculateObcGbsaSoftcoreNonPolarScalingFactorsObc2Sim( getGpuNonPolarScalingFactors() );
-    return 0;
-}
-// get address for NonPolarScalingFactors array on board
-float* GpuObcGbsaSoftcore::getGpuNonPolarScalingFactors( void ) const {
-    return _psNonPolarScalingFactors->_pDevStream[0];
-}
--- a/plugins/freeEnergy/platforms/cuda/src/kernels/freeEnergyCudaGpu.cpp
+++ b/plugins/freeEnergy/platforms/cuda/src/kernels/freeEnergyCudaGpu.cpp
+/* -------------------------------------------------------------------------- *
+ *                               OpenMMFreeEnergy                             *
+ * -------------------------------------------------------------------------- *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ *                                                                            *
+ * Portions copyright (c) 2009 Stanford University and the Authors.           *
+ * Authors: Scott Le Grand, Peter Eastman, Mark Friedrichs                    *
+ * Contributors:                                                              *
+ *                                                                            *
+ * Permission is hereby granted, free of charge, to any person obtaining a    *
+ * copy of this software and associated documentation files (the "Software"), *
+ * to deal in the Software without restriction, including without limitation  *
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
+ * and/or sell copies of the Software, and to permit persons to whom the      *
+ * Software is furnished to do so, subject to the following conditions:       *
+ *                                                                            *
+ * The above copyright notice and this permission notice shall be included in *
+ * all copies or substantial portions of the Software.                        *
+ *                                                                            *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
+ * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
+ * -------------------------------------------------------------------------- */
+#ifdef WIN32
+  #define _USE_MATH_DEFINES /* M_PI */
+#endif
+#define PARAMETER_PRINT 1
+#define MAX_PARAMETER_PRINT 10
+#include "openmm/OpenMMException.h"
+#include "cudaKernels.h"
+#include "GpuFreeEnergyCudaKernels.h"
+#include "freeEnergyGpuTypes.h"
+// for some reason, these are not being included w/ cudaKernels.h on Windows
+//extern void OPENMMCUDA_EXPORT SetCalculateObcGbsaForces2Sim(gpuContext gpu);
+extern void OPENMMCUDA_EXPORT SetForcesSim(gpuContext gpu);
+#include <cmath>
+#include <sstream>
+#include <limits>
+#include <cstring>
+#include <vector>
+#include <stdio.h>
+#ifdef WIN32
+#include <windows.h>
+#else
+#include <sys/time.h>
+#endif
+using std::vector;
+extern "C"
+freeEnergyGpuContext freeEnergyGpuInit( _gpuContext* gpu ){
+    // allocate and zero block
+    freeEnergyGpuContext freeEnergyGpu             = new _freeEnergyGpuContext;
+    memset( freeEnergyGpu, 0, sizeof( struct _freeEnergyGpuContext ) );
+    freeEnergyGpu->gpuContext                      = gpu; 
+    return freeEnergyGpu;
+}
+extern "C"
+void gpuPrintCudaStream( std::string name,
+                         unsigned int length, unsigned int subStreams, unsigned int stride,
+                         unsigned int memoryFootprint,
+                         void*  pSysStream, void* pDevStream,
+                         void*  pSysData,   void* pDevData, FILE* log)
+{
+    (void) fprintf( log, "     %-35s [%8u %5u %8u %8u] Stream[%p %p] Data[%16p %16p]\n",
+                    name.c_str(), length, subStreams,
+                    stride, memoryFootprint, pSysStream, pDevStream, pSysData, pDevData );
+}
+extern "C"
+int gpuPrintCudaStreamFloat( CUDAStream<float>* cUDAStream, FILE* log )
+{
+    if( cUDAStream == NULL )return 0;
+    gpuPrintCudaStream( cUDAStream->_name.c_str(),
+                        cUDAStream->_length, cUDAStream->_subStreams, cUDAStream->_stride,
+                        cUDAStream->_length*cUDAStream->_subStreams*sizeof( float ),
+                        static_cast<void*>(cUDAStream->_pSysStream), static_cast<void*>(cUDAStream->_pDevStream),
+                        static_cast<void*>(cUDAStream->_pSysData), static_cast<void*>(cUDAStream->_pDevData), log );
+    return cUDAStream->_length*cUDAStream->_subStreams*sizeof( float );
+}
+extern "C"
+int gpuPrintCudaStreamFloat2( CUDAStream<float2>* cUDAStream, FILE* log )
+{
+    if( cUDAStream == NULL )return 0;
+    gpuPrintCudaStream( cUDAStream->_name.c_str(),
+                        cUDAStream->_length, cUDAStream->_subStreams, cUDAStream->_stride,
+                        cUDAStream->_length*cUDAStream->_subStreams*sizeof( float2 ),
+                        static_cast<void*>(cUDAStream->_pSysStream), static_cast<void*>(cUDAStream->_pDevStream),
+                        static_cast<void*>(cUDAStream->_pSysData), static_cast<void*>(cUDAStream->_pDevData), log );
+    return cUDAStream->_length*cUDAStream->_subStreams*2*sizeof( float );
+}
+extern "C"
+int gpuPrintCudaStreamFloat4( CUDAStream<float4>* cUDAStream, FILE* log )
+{
+    if( cUDAStream == NULL )return 0;
+    gpuPrintCudaStream( cUDAStream->_name.c_str(),
+                        cUDAStream->_length, cUDAStream->_subStreams, cUDAStream->_stride,
+                        cUDAStream->_length*cUDAStream->_subStreams*sizeof( float4 ),
+                        static_cast<void*>(cUDAStream->_pSysStream), static_cast<void*>(cUDAStream->_pDevStream),
+                        static_cast<void*>(cUDAStream->_pSysData), static_cast<void*>(cUDAStream->_pDevData), log );
+    return cUDAStream->_length*cUDAStream->_subStreams*4*sizeof( float );
+}
+extern "C"
+int gpuPrintCudaStreamUnsignedInt( CUDAStream<unsigned int>* cUDAStream, FILE* log )
+{
+    if( cUDAStream == NULL )return 0;
+    gpuPrintCudaStream( cUDAStream->_name.c_str(),
+                        cUDAStream->_length, cUDAStream->_subStreams, cUDAStream->_stride,
+                        cUDAStream->_length*cUDAStream->_subStreams*sizeof( unsigned int ),
+                        static_cast<void*>(cUDAStream->_pSysStream), static_cast<void*>(cUDAStream->_pDevStream),
+                        static_cast<void*>(cUDAStream->_pSysData), static_cast<void*>(cUDAStream->_pDevData), log );
+    return cUDAStream->_length*cUDAStream->_subStreams*sizeof( unsigned int );
+}
+extern "C"
+int gpuPrintCudaStreamInt( CUDAStream<int>* cUDAStream, FILE* log )
+{
+    if( cUDAStream == NULL )return 0;
+    gpuPrintCudaStream( cUDAStream->_name.c_str(),
+                        cUDAStream->_length, cUDAStream->_subStreams, cUDAStream->_stride,
+                        cUDAStream->_length*cUDAStream->_subStreams*sizeof( int ),
+                        static_cast<void*>(cUDAStream->_pSysStream), static_cast<void*>(cUDAStream->_pDevStream),
+                        static_cast<void*>(cUDAStream->_pSysData), static_cast<void*>(cUDAStream->_pDevData), log );
+    return cUDAStream->_length*cUDAStream->_subStreams*sizeof( int );
+}
+extern "C"
+int gpuPrintCudaStreamInt2( CUDAStream<int2>* cUDAStream, FILE* log )
+{
+    if( cUDAStream == NULL )return 0;
+    gpuPrintCudaStream( cUDAStream->_name.c_str(),
+                        cUDAStream->_length, cUDAStream->_subStreams, cUDAStream->_stride,
+                        cUDAStream->_length*cUDAStream->_subStreams*sizeof( int2 ),
+                        static_cast<void*>(cUDAStream->_pSysStream), static_cast<void*>(cUDAStream->_pDevStream),
+                        static_cast<void*>(cUDAStream->_pSysData), static_cast<void*>(cUDAStream->_pDevData), log );
+    return cUDAStream->_length*cUDAStream->_subStreams*2*sizeof( int );
+}
+extern "C"
+int gpuPrintCudaStreamInt4( CUDAStream<int4>* cUDAStream, FILE* log )
+{
+    if( cUDAStream == NULL )return 0;
+    gpuPrintCudaStream( cUDAStream->_name.c_str(),
+                        cUDAStream->_length, cUDAStream->_subStreams, cUDAStream->_stride,
+                        cUDAStream->_length*cUDAStream->_subStreams*sizeof( int4 ),
+                        static_cast<void*>(cUDAStream->_pSysStream), static_cast<void*>(cUDAStream->_pDevStream),
+                        static_cast<void*>(cUDAStream->_pSysData), static_cast<void*>(cUDAStream->_pDevData), log );
+    return cUDAStream->_length*cUDAStream->_subStreams*4*sizeof( int );
+}
+extern "C"
+void gpuPrintCudaFreeEnergyGmxSimulation(freeEnergyGpuContext freeEnergyGpu, FILE* log )
+{
+    if( log == NULL )return;
+    _gpuContext* gpu                            = freeEnergyGpu->gpuContext;
+    int totalMemory                             = 0;
+    (void) fprintf( log, "cudaFreeEnergyGmxSimulation:\n\n" );
+    (void) fprintf( log, "\n" );
+    (void) fprintf( log, "     numberOfAtoms                      %u\n",      gpu->natoms );
+    (void) fprintf( log, "     paddedNumberOfAtoms                %u\n",      gpu->sim.paddedNumberOfAtoms );
+    (void) fprintf( log, "\n\n" );
+    (void) fprintf( log, "     gpuContext                         %p\n",      freeEnergyGpu->gpuContext );
+    (void) fprintf( log, "     log                                %p %s\n",   freeEnergyGpu->log, freeEnergyGpu->log == stderr ? "is stderr" : "is not stderr");
+    (void) fprintf( log, "     sm_version                         %u\n",      gpu->sm_version );
+    (void) fprintf( log, "     device                             %u\n",      gpu->device );
+    (void) fprintf( log, "     sharedMemoryPerBlock               %u\n",      gpu->sharedMemoryPerBlock );
+    (void) fprintf( log, "     bOutputBufferPerWarp               %d\n",      gpu->bOutputBufferPerWarp );
+    (void) fprintf( log, "     blocks                             %u\n",      gpu->sim.blocks );
+    (void) fprintf( log, "     threads_per_block                  %u\n",      gpu->sim.threads_per_block);
+    (void) fprintf( log, "     update_threads_per_block           %u\n",      gpu->sim.update_threads_per_block);
+    (void) fprintf( log, "     nonbondBlocks                      %u\n",      gpu->sim.nonbond_blocks );
+    (void) fprintf( log, "     nonbondThreadsPerBlock             %u\n",      gpu->sim.nonbond_threads_per_block);
+    (void) fprintf( log, "     bsf_reduce_threads_per_block       %u\n",      gpu->sim.bsf_reduce_threads_per_block);
+    (void) fprintf( log, "     nonbondOutputBuffers               %u\n",      gpu->sim.nonbondOutputBuffers );
+    (void) fprintf( log, "     outputBuffers                      %u\n",      gpu->sim.outputBuffers );
+    totalMemory += gpuPrintCudaStreamFloat(  freeEnergyGpu->gpuContext->psEnergy,    log );
+    totalMemory += gpuPrintCudaStreamFloat4( freeEnergyGpu->gpuContext->psForce4,    log );
+    (void) fflush( log );
+}
+extern "C"
+void freeEnergyGpuShutDown( freeEnergyGpuContext freeEnergyGpu ){
+    if( freeEnergyGpu->log ){
+        (void) fprintf( freeEnergyGpu->log, "freeEnergyGpuShutDown called.\n" );
+        (void) fflush( freeEnergyGpu->log );
+    }
+    // free free energy Cuda arrays
+    delete freeEnergyGpu->psLJ14ID;
+    delete freeEnergyGpu->psLJ14Parameter;
+    delete freeEnergyGpu->psSigEps4;
+    delete freeEnergyGpu->psSwitchDerivative;
+    delete freeEnergyGpu->psNonPolarScalingFactors;
+    delete freeEnergyGpu;
+    return;
+}
+extern "C"
+void freeEnergyGpuSetConstants( freeEnergyGpuContext freeEnergyGpu ){
+    if( freeEnergyGpu->log ){
+        (void) fprintf( freeEnergyGpu->log, "FreeEnergyGpuSetConstants called\n" );
+        (void) fflush( freeEnergyGpu->log );
+    }
+    SetCalculateLocalSoftcoreGpuSim( freeEnergyGpu );
+    SetCalculateCDLJSoftcoreGpuSim( freeEnergyGpu );
+    SetCalculateGBVISoftcoreBornSumGpuSim( freeEnergyGpu );
+    SetCalculateCDLJObcGbsaSoftcoreGpu1Sim( freeEnergyGpu );
+    SetCalculateGBVISoftcoreForces2Sim( freeEnergyGpu );
+    SetCalculateObcGbsaSoftcoreBornSumSim( freeEnergyGpu );
+    SetCalculateObcGbsaSoftcoreForces2Sim( freeEnergyGpu );
+}
+static int decodeCell( int cellCode, unsigned int* x, unsigned int* y, unsigned int* exclusion ){
+    *x         =  cellCode >> 17; 
+    *y         = (cellCode >> 2 ) & 0x7FFF;
+    *exclusion = (cellCode & 1) ? 1 : 0;  
+   return 0;
+}
+void showWorkUnitsFreeEnergy( freeEnergyGpuContext freeEnergyGpu, int interactingWorkUnit ){
+    gpuContext gpu = freeEnergyGpu->gpuContext;
+    gpu->psWorkUnit->Download();
+    gpu->psInteractingWorkUnit->Download();
+    gpu->psInteractionFlag->Download();
+    unsigned int totalWarps      = (gpu->sim.nonbond_blocks*gpu->sim.nonbond_threads_per_block)/GRID;
+    //unsigned int warp            = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
+    //unsigned int numWorkUnits    = cSim.pInteractionCount[0];
+    unsigned int numWorkUnits    = gpu->psInteractionCount->_pSysData[0];
+    (void) fprintf( stderr, "Total warps=%u blocks=%u threads=%u GRID=%u wus=%u\n",
+                    totalWarps, gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block, GRID, numWorkUnits );
+    unsigned int maxPrint = 3;
+    std::stringstream message;
+    char buffer[2048];
+    unsigned int targetAtom = 18;
+    for( unsigned int ii = 0; ii < gpu->sim.nonbond_blocks; ii++ )
+    {
+        unsigned int blockId = ii;
+        for( unsigned int jj = 0; jj < gpu->sim.nonbond_threads_per_block; jj++ )
+        {
+            unsigned int warp = (ii*gpu->sim.nonbond_threads_per_block+jj)/GRID;
+            unsigned int pos  = warp*numWorkUnits/totalWarps;
+            unsigned int end  = (warp+1)*numWorkUnits/totalWarps;
+            unsigned int print = 0;
+            while( pos < end ){
+                unsigned int x, y, exclusion, flags;
+                int flagInt;
+                if( interactingWorkUnit ){
+                    decodeCell( gpu->psInteractingWorkUnit->_pSysData[pos], &x, &y, &exclusion );
+                    flags = gpu->psInteractionFlag->_pSysData[pos];
+                    if( flags == 0xFFFFFFFF ){
+                        flagInt = -2;
+                    } else {
+                        flagInt = flags;
+                    }
+                } else {
+                    decodeCell( gpu->psWorkUnit->_pSysData[pos], &x, &y, &exclusion );
+                    flagInt = -1;
+                }
+                x                   *= GRID;
+                y                   *= GRID;
+                if( jj == 1 ){
+                     (void) sprintf( buffer, "Block %4u thread %4u warp=%4u pos[%4u %4u] ", ii, jj, warp, pos, end );
+                     message << buffer;
+                     (void) sprintf( buffer, "    x[%4u %4u] y[%4u %4u] excl=%u", x, x+32, y, y+32, exclusion );
+                     message << buffer;
+                     if( interactingWorkUnit ){
+                         (void) sprintf( buffer, " Flg=%d (-2=all) %u", flagInt, flags );
+                     }
+                     message << buffer;
+                     message << std::endl;
+                }
+                pos++;
+            }
+        }
+    }
+    (void) fprintf( stderr, "%s\n\n", message.str().c_str() );
+}
--- a/plugins/freeEnergy/platforms/cuda/src/kernels/GpuLJ14Softcore.h
+++ b/plugins/freeEnergy/platforms/cuda/src/kernels/GpuLJ14Softcore.h
-#ifndef OPENMM_FREE_ENERGY_GPU_LJ14_SOFTCORE_
+#ifndef FREE_ENERGY_CUDA_TYPES_H
-#define OPENMM_FREE_ENERGY_GPU_LJ14_SOFTCORE_
+#define FREE_ENERGY_CUDA_TYPES_H
 /* -------------------------------------------------------------------------- *
 *                                   OpenMM                                   *
@@ -27,33 +27,65 @@
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
 * -------------------------------------------------------------------------- */
-#include "gputypes.h"
+#include <kernels/cudatypes.h>
-struct cudaFreeEnergySimulationNonbonded14 {
+#include <stdarg.h>
+#include <limits>
+#include <iostream>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string>
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+#include <cufft.h>
+#include <builtin_types.h>
+#include <vector_functions.h>
-    unsigned int    LJ14s;                          // Number of Lennard Jones 1-4 interactions
+enum CudaFreeEnergyNonbondedMethod
-    unsigned int    LJ14_offset;                    // Offset to end of Lennard Jones 1-4 parameters
+{
-    CudaNonbondedMethod nonbondedMethod;            // How to handle nonbonded interactions
+    FREE_ENERGY_NO_CUTOFF,
-    int4*           pLJ14ID;                        // Lennard Jones 1-4 atom and output buffer IDs
+    FREE_ENERGY_CUTOFF,
-    float4*         pLJ14Parameter;                 // Lennard Jones 1-4 parameters
+    FREE_ENERGY_PERIODIC
 };
-// info related to nonbonded 1-4 softcore
+struct cudaFreeEnergyGmxSimulation {
-class GpuLJ14Softcore {
+    // Constants
-    public:
+    unsigned int   LJ14_count;                      // LJ count
+    int4*          pLJ14ID;                         // LJ 14 particles ids
+    float4*        pLJ14Parameter;                  // LJ 14 parameters 
-        GpuLJ14Softcore();
+    float           epsfac;                         // Epsilon factor for CDLJ calculations
-        ~GpuLJ14Softcore();
+    CudaFreeEnergyNonbondedMethod nonbondedMethod;  // How to handle nonbonded interactions
+    float           nonbondedCutoff;                // Cutoff distance for nonbonded interactions
+    float           nonbondedCutoffSqr;             // Square of the cutoff distance for nonbonded interactions
-        CUDAStream<int4>* psLJ14SoftcoreID;
+    float           periodicBoxSizeX;               // The X dimension of the periodic box
-        CUDAStream<float4>* psLJ14SoftcoreParameter;
+    float           periodicBoxSizeY;               // The Y dimension of the periodic box
-        cudaFreeEnergySimulationNonbonded14 feSim;
+    float           periodicBoxSizeZ;               // The Z dimension of the periodic box
-        int flipStrides(gpuContext gpu);
+    float           invPeriodicBoxSizeX;            // The 1 over the X dimension of the periodic box
+    float           invPeriodicBoxSizeY;            // The 1 over the Y dimension of the periodic box
+    float           invPeriodicBoxSizeZ;            // The 1 over the Z dimension of the periodic box
+    float           recipBoxSizeX;                  // The X dimension of the reciprocal box for Ewald summation
+    float           recipBoxSizeY;                  // The Y dimension of the reciprocal box for Ewald summation
+    float           recipBoxSizeZ;                  // The Z dimension of the reciprocal box for Ewald summation
+    float           cellVolume;                     // Ewald parameter alpha (a.k.a. kappa)
+    float           reactionFieldK;                 // Constant for reaction field correction
+    float           reactionFieldC;                 // Constant for reaction field correction
+    float4*         pSigEps4;                       // sigma, eps, lambda. charge
+    int             bornRadiiScalingMethod;         // flag for method to use scaling radii (0=none,1=quintic spline)
+    float           quinticLowerLimitFactor;        // lower limit factor for quintic spline
+    float           quinticUpperLimit;              // upper limit for quintic spline
+    float*          pSwitchDerivative;              // switch deriviatives for quintic spline
+    float*          pNonPolarScalingFactors;        // non-polar scaling factors
-    private:
 };
-#endif // OPENMM_FREE_ENERGY_GPU_LJ14_SOFTCORE_
+#endif // FREE_ENERGY_CUDA_TYPES_H
--- a/plugins/freeEnergy/platforms/cuda/src/kernels/GpuLJ14Softcore.cpp
+++ b/plugins/freeEnergy/platforms/cuda/src/kernels/GpuLJ14Softcore.cpp
-/* -------------------------------------------------------------------------- *
+#ifndef __FREE_ENERGY_GPUTYPES_H__
- *                                   OpenMM                                   *
+#define __FREE_ENERGY_GPUTYPES_H__
- * -------------------------------------------------------------------------- *
- * This is part of the OpenMM molecular simulation toolkit originating from   *
+/* -------------------------------------------------------------------------- *
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
+ *                          OpenMMFreeEnergy                                      *
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
+ * -------------------------------------------------------------------------- *
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
+ * This is part of the OpenMM molecular simulation toolkit originating from   *
- *                                                                            *
+ * Simbios, the NIH National Center for Physics-Based Simulation of           *
- * Portions copyright (c) 2009 Stanford University and the Authors.           *
+ * Biological Structures at Stanford, funded under the NIH Roadmap for        *
- * Authors: Scott Le Grand, Peter Eastman                                     *
+ * Medical Research, grant U54 GM072970. See https://simtk.org.               *
- * Contributors:                                                              *
+ *                                                                            *
- *                                                                            *
+ * Portions copyright (c) 2009 Stanford University and the Authors.           *
- * This program is free software: you can redistribute it and/or modify       *
+ * Authors: Scott Le Grand, Peter Eastman                                     *
- * it under the terms of the GNU Lesser General Public License as published   *
+ * Contributors:                                                              *
- * by the Free Software Foundation, either version 3 of the License, or       *
+ *                                                                            *
- * (at your option) any later version.                                        *
+ * This program is free software: you can redistribute it and/or modify       *
- *                                                                            *
+ * it under the terms of the GNU Lesser General Public License as published   *
- * This program is distributed in the hope that it will be useful,            *
+ * by the Free Software Foundation, either version 3 of the License, or       *
- * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
+ * (at your option) any later version.                                        *
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
+ *                                                                            *
- * GNU Lesser General Public License for more details.                        *
+ * This program is distributed in the hope that it will be useful,            *
- *                                                                            *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
- * You should have received a copy of the GNU Lesser General Public License   *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
+ * GNU Lesser General Public License for more details.                        *
- * -------------------------------------------------------------------------- */
+ *                                                                            *
+ * You should have received a copy of the GNU Lesser General Public License   *
-#include "GpuLJ14Softcore.h"
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.      *
-#include "GpuFreeEnergyCudaKernels.h"
+ * -------------------------------------------------------------------------- */
-// GpuLJ14Softcore constructor
+#include "kernels/gputypes.h"
+#include "freeEnergyCudaTypes.h"
-GpuLJ14Softcore::GpuLJ14Softcore( ){
-    psLJ14SoftcoreID          = NULL;
+#include <map>
-    psLJ14SoftcoreParameter   = NULL;
+typedef std::map<int,float> MapIntFloat;
-}
+typedef MapIntFloat::const_iterator MapIntFloatCI;
-// GpuLJ14Softcore destructor
+struct _freeEnergyGpuContext {
-GpuLJ14Softcore::~GpuLJ14Softcore( ){
+    _gpuContext* gpuContext;
-    delete psLJ14SoftcoreID;
+    cudaFreeEnergyGmxSimulation freeEnergySim;
-    delete psLJ14SoftcoreParameter;
+    std::vector<std::vector<int> > exclusions;
-}
+    CUDAStream<float4>* psSigEps4;
-int GpuLJ14Softcore::flipStrides( gpuContext gpu ){
+    CUDAStream<int4>*   psLJ14ID;
-    int flip = gpu->sim.outputBuffers - 1;
+    CUDAStream<float4>* psLJ14Parameter;
-    for (unsigned int ii = 0; ii < psLJ14SoftcoreID->_stride; ii++)
+    CUDAStream<float>*  psSwitchDerivative;
-    {
+    CUDAStream<float>*  psNonPolarScalingFactors;
-        (*psLJ14SoftcoreID)[ii].z = flip - (*psLJ14SoftcoreID)[ii].z;
-        (*psLJ14SoftcoreID)[ii].w = flip - (*psLJ14SoftcoreID)[ii].w;
+    FILE* log;
-    }   
+};
-    psLJ14SoftcoreID->Upload();
+typedef struct _freeEnergyGpuContext *freeEnergyGpuContext;
-    return 0;
-}
+// Function prototypes
+extern "C" freeEnergyGpuContext freeEnergyGpuInit( _gpuContext* gpu );
+extern "C" void freeEnergyGpuShutDown(freeEnergyGpuContext gpu);
+extern "C" void freeEnergyGpuSetConstants(freeEnergyGpuContext gpu);
+#endif // __FREE_ENERGY_GPUTYPES_H__
--- a/plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateCDLJObcGbsaSoftcoreForces1.cu
+++ b/plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateCDLJObcGbsaSoftcoreForces1.cu
@@ -33,6 +33,9 @@
 #include <cuda.h>
 #include <vector_functions.h>
 #include <cstdlib>
+#include <sstream>
+#define USE_SOFTCORE_LJ
 struct Atom {
    float x;
@@ -49,65 +52,18 @@ struct Atom {
    float fb;
 };
-struct cudaFreeEnergySimulation {
-    float* pParticleSoftCoreLJLambda;
-};
 static __constant__ cudaGmxSimulation cSim;
-static __constant__ cudaFreeEnergySimulation feSimDev;
+static __constant__ cudaFreeEnergyGmxSimulation feSimDev;
-void SetCalculateCDLJObcGbsaSoftcoreGpu1Sim( gpuContext gpu )
-{
-    cudaError_t status;
-    //(void) fprintf( stderr, "SetCalculateCDLJObcGbsaSoftcoreGpu1Sim gpu=%p cSim=%p sizeof=%u\n", gpu, &gpu->sim, sizeof(cudaGmxSimulation) ); fflush( stderr );
+void SetCalculateCDLJObcGbsaSoftcoreGpu1Sim( freeEnergyGpuContext freeEnergyGpu ){
-    status = cudaMemcpyToSymbol(cSim, &gpu->sim, sizeof(cudaGmxSimulation));     
-    RTERROR(status, "cudaMemcpyToSymbol: SetCalculateCDLJObcGbsaSoftcoreGpu1Sim copy to cSim failed");
-}
-void SetCalculateCDLJObcGbsaSoftcoreSupplementary1Sim( float* gpuParticleSoftCoreLJLambda)
-{
    cudaError_t status;
-    //(void) fprintf( stderr, "SetCalculateCDLJObcGbsaSoftcoreSupplementary1Sim\n" );
+    status = cudaMemcpyToSymbol(cSim, &freeEnergyGpu->gpuContext->sim, sizeof(cudaGmxSimulation));
+    RTERROR(status, "cudaMemcpyToSymbol: SetCalculateCDLJObcGbsaSoftcoreGpu1Sim copy to cSim failed");
-    struct cudaFreeEnergySimulation feSim;
-    feSim.pParticleSoftCoreLJLambda = gpuParticleSoftCoreLJLambda;
-    status = cudaMemcpyToSymbol(feSimDev, &feSim, sizeof(cudaFreeEnergySimulation));
-    RTERROR(status, "cudaMemcpyToSymbol: SetCalculateCDLJObcGbsaSoftcoreSupplementary1Sim failed");
-}
-void GetCalculateCDLJObcGbsaSoftcoreForces1Sim( gpuContext gpu )
-{
-    cudaError_t status;
-    status = cudaMemcpyFromSymbol(&gpu->sim, cSim, sizeof(cudaGmxSimulation));     
-    RTERROR(status, "cudaMemcpyFromSymbol: SetSim copy from cSim failed");
-}
-#if 0
+    status = cudaMemcpyToSymbol( feSimDev, &freeEnergyGpu->freeEnergySim, sizeof(cudaFreeEnergyGmxSimulation));
-__device__ float fastErfc(float r)
+    RTERROR(status, "cudaMemcpyToSymbol: SetCalculateCDLJObcGbsaSoftcoreGpu1Sim copy to feSimDev failed");
-{
-    float normalized = cSim.tabulatedErfcScale*r;
-    int index = (int) normalized;
-    float fract2 = normalized-index;
-    float fract1 = 1.0f-fract2;
-    return fract1*tex1Dfetch(tabulatedErfcRef, index) + fract2*tex1Dfetch(tabulatedErfcRef, index+1);
 }
-#endif
-// Include versions of the kernel for N^2 calculations.
-#if 0
-#define METHOD_NAME(a, b) a##N2##b
-#include "kCalculateCDLJObcGbsaForces1.h"
-#define USE_OUTPUT_BUFFER_PER_WARP
-#undef METHOD_NAME
-#define METHOD_NAME(a, b) a##N2ByWarp##b
-#include "kCalculateCDLJObcGbsaForces1.h"
-#endif
 // Include versions of the kernel for N^2 calculations with softcore LJ.
@@ -126,16 +82,15 @@ __device__ float fastErfc(float r)
 // Include versions of the kernel with cutoffs.
-#if 0
 #undef METHOD_NAME
 #undef USE_OUTPUT_BUFFER_PER_WARP
 #define USE_CUTOFF
 #define METHOD_NAME(a, b) a##Cutoff##b
-#include "kCalculateCDLJObcGbsaForces1.h"
+#include "kCalculateCDLJObcGbsaSoftcoreForces1.h"
 #define USE_OUTPUT_BUFFER_PER_WARP
 #undef METHOD_NAME
 #define METHOD_NAME(a, b) a##CutoffByWarp##b
-#include "kCalculateCDLJObcGbsaForces1.h"
+#include "kCalculateCDLJObcGbsaSoftcoreForces1.h"
 // Include versions of the kernel with periodic boundary conditions.
@@ -143,140 +98,137 @@ __device__ float fastErfc(float r)
 #undef USE_OUTPUT_BUFFER_PER_WARP
 #define USE_PERIODIC
 #define METHOD_NAME(a, b) a##Periodic##b
-#include "kCalculateCDLJObcGbsaForces1.h"
+#include "kCalculateCDLJObcGbsaSoftcoreForces1.h"
 #define USE_OUTPUT_BUFFER_PER_WARP
 #undef METHOD_NAME
 #define METHOD_NAME(a, b) a##PeriodicByWarp##b
-#include "kCalculateCDLJObcGbsaForces1.h"
+#include "kCalculateCDLJObcGbsaSoftcoreForces1.h"
-// Include versions of the kernels for Ewald
-#undef METHOD_NAME
-#undef USE_OUTPUT_BUFFER_PER_WARP
-#define USE_PERIODIC
-#define USE_EWALD
-#define METHOD_NAME(a, b) a##Ewald##b
-#include "kCalculateCDLJObcGbsaForces1.h"
-#define USE_OUTPUT_BUFFER_PER_WARP
-#undef METHOD_NAME
-#define METHOD_NAME(a, b) a##EwaldByWarp##b
-#include "kCalculateCDLJObcGbsaForces1.h"
-extern __global__ void kFindBlockBoundsCutoff_kernel();
-extern __global__ void kFindBlockBoundsPeriodic_kernel();
-extern __global__ void kFindBlocksWithInteractionsCutoff_kernel();
-extern __global__ void kFindBlocksWithInteractionsPeriodic_kernel();
-extern __global__ void kFindInteractionsWithinBlocksCutoff_kernel(unsigned int*);
-extern __global__ void kFindInteractionsWithinBlocksPeriodic_kernel(unsigned int*);
-extern __global__ void kCalculateEwaldFastCosSinSums_kernel();
-extern __global__ void kCalculateEwaldFastForces_kernel();
-extern void kCalculatePME(gpuContext gpu);
-#endif
 /**
 * 
 * Calculate Born radii and first GBSA loop forces/energy
 *
- * @param gpu     gpu contexct
+ * @param gpu     gpu context
- * @param gbsaObc if set, calculate Born radii for OBC
- *                otherwise calculate Born radii for GB/VI
 *
 */
-void kCalculateCDLJObcGbsaSoftcoreForces1(gpuContext gpu )
+void kCalculateCDLJObcGbsaSoftcoreForces1( freeEnergyGpuContext freeEnergyGpu )
 {
 //    printf("kCalculateCDLJObcGbsaForces1\n");
-    switch (gpu->sim.nonbondedMethod)
+    gpuContext gpu = freeEnergyGpu->gpuContext;
+//fprintf( stderr, "kCalculateCDLJObcGbsaSoftcoreForces1 cutoff=%15.7e blks=%u thread/.block=%u nbMethod==%d warp=%u\n",
+//         gpu->sim.nonbondedCutoffSqr, gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block, 
+//         freeEnergyGpu->freeEnergySim.nonbondedMethod, gpu->bOutputBufferPerWarp);
+//#define DEBUG
+#ifdef DEBUG 
+fprintf( stderr, "kCalculateCDLJObcGbsaSoftcoreForces1 cutoff=%15.7e\n", gpu->sim.nonbondedCutoffSqr );
+int psize = gpu->sim.paddedNumberOfAtoms;
+CUDAStream<float4>* pdE1 = new CUDAStream<float4>( psize, 1, "pdE");
+CUDAStream<float4>* pdE2 = new CUDAStream<float4>( psize, 1, "pdE");
+float bF,bR;
+float bF1,b2;
+float ratio;
+float atomicRadii;
+showWorkUnitsFreeEnergy( freeEnergyGpu, 1 );
+for( int ii = 0; ii < psize; ii++ ){
+pdE1->_pSysData[ii].x = 0.001f;
+pdE1->_pSysData[ii].y = 0.001f;
+pdE1->_pSysData[ii].z = 0.001f;
+pdE1->_pSysData[ii].w = 0.001f;
+pdE2->_pSysData[ii].x = 0.001f;
+pdE2->_pSysData[ii].y = 0.001f;
+pdE2->_pSysData[ii].z = 0.001f;
+pdE2->_pSysData[ii].w = 0.001f;
+}
+pdE1->Upload();
+pdE2->Upload();
+#endif
+    switch( freeEnergyGpu->freeEnergySim.nonbondedMethod )
    {
-        case NO_CUTOFF:
+        case FREE_ENERGY_NO_CUTOFF:
            // use softcore LJ potential
+#ifdef DEBUG
+            if (gpu->bOutputBufferPerWarp)
+                   kCalculateCDLJObcGbsaSoftcoreN2ByWarpForces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
+                           sizeof(Atom)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit,pdE1->_pDevData, pdE2->_pDevData);
+            else
+                   kCalculateCDLJObcGbsaSoftcoreN2Forces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
+                           sizeof(Atom)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit, pdE1->_pDevData, pdE2->_pDevData);
+#else   
            if (gpu->bOutputBufferPerWarp)
                   kCalculateCDLJObcGbsaSoftcoreN2ByWarpForces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
-                           sizeof(Atom)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit);
+                           sizeof(Atom)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit );
            else
                   kCalculateCDLJObcGbsaSoftcoreN2Forces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
-                           sizeof(Atom)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit);
+                           sizeof(Atom)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit );
+#endif
            LAUNCHERROR("kCalculateCDLJObcGbsaSoftcoreForces1");
            break;
-#if 0
-        case CUTOFF:
+        case FREE_ENERGY_CUTOFF:
-            kFindBlockBoundsCutoff_kernel<<<(gpu->psGridBoundingBox->_length+63)/64, 64>>>();
-            LAUNCHERROR("kFindBlockBoundsCutoff");
+#ifdef DEBUG
-            kFindBlocksWithInteractionsCutoff_kernel<<<gpu->sim.interaction_blocks, gpu->sim.interaction_threads_per_block>>>();
-            LAUNCHERROR("kFindBlocksWithInteractionsCutoff");
-            compactStream(gpu->compactPlan, gpu->sim.pInteractingWorkUnit, gpu->sim.pWorkUnit, gpu->sim.pInteractionFlag, gpu->sim.workUnits, gpu->sim.pInteractionCount);
-            kFindInteractionsWithinBlocksCutoff_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
-                    sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
-            if (gpu->bRecalculateBornRadii)
-            {
-                kCalculateObcGbsaBornSum(gpu);
-                kReduceObcGbsaBornSum(gpu);
-            }
            if (gpu->bOutputBufferPerWarp)
-                kCalculateCDLJObcGbsaCutoffByWarpForces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
+                kCalculateCDLJObcGbsaSoftcoreCutoffByWarpForces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
-                        (sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
+                        (sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit, pdE1->_pDevData, pdE2->_pDevData);
            else
-                kCalculateCDLJObcGbsaCutoffForces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
+                kCalculateCDLJObcGbsaSoftcoreCutoffForces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
-                        (sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
+                        (sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit, pdE1->_pDevData, pdE2->_pDevData);
-            LAUNCHERROR("kCalculateCDLJObcGbsaCutoffForces1");
+#else
+            if (gpu->bOutputBufferPerWarp)
+                kCalculateCDLJObcGbsaSoftcoreCutoffByWarpForces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
+                        (sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit );
+            else
+                kCalculateCDLJObcGbsaSoftcoreCutoffForces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
+                        (sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit );
+#endif
            break;
-        case PERIODIC:
-            kFindBlockBoundsPeriodic_kernel<<<(gpu->psGridBoundingBox->_length+63)/64, 64>>>();
+        case FREE_ENERGY_PERIODIC:
-            LAUNCHERROR("kFindBlockBoundsPeriodic");
-            kFindBlocksWithInteractionsPeriodic_kernel<<<gpu->sim.interaction_blocks, gpu->sim.interaction_threads_per_block>>>();
-            LAUNCHERROR("kFindBlocksWithInteractionsPeriodic");
-            compactStream(gpu->compactPlan, gpu->sim.pInteractingWorkUnit, gpu->sim.pWorkUnit, gpu->sim.pInteractionFlag, gpu->sim.workUnits, gpu->sim.pInteractionCount);
-            kFindInteractionsWithinBlocksPeriodic_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
-                    sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
-            if (gpu->bRecalculateBornRadii)
-            {
-                kCalculateObcGbsaBornSum(gpu);
-                kReduceObcGbsaBornSum(gpu);
-            }
            if (gpu->bOutputBufferPerWarp)
-                kCalculateCDLJObcGbsaPeriodicByWarpForces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
+                kCalculateCDLJObcGbsaSoftcorePeriodicByWarpForces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
                        (sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
            else
-                kCalculateCDLJObcGbsaPeriodicForces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
+                kCalculateCDLJObcGbsaSoftcorePeriodicForces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
                        (sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
-            LAUNCHERROR("kCalculateCDLJObcGbsaPeriodicForces1");
+            LAUNCHERROR("kCalculateCDLJObcGbsaSoftcorePeriodicForces1");
            break;
-        case EWALD:
-        case PARTICLE_MESH_EWALD:
-            kFindBlockBoundsPeriodic_kernel<<<(gpu->psGridBoundingBox->_length+63)/64, 64>>>();
-            LAUNCHERROR("kFindBlockBoundsPeriodic");
-            kFindBlocksWithInteractionsPeriodic_kernel<<<gpu->sim.interaction_blocks, gpu->sim.interaction_threads_per_block>>>();
-            LAUNCHERROR("kFindBlocksWithInteractionsPeriodic");
-            compactStream(gpu->compactPlan, gpu->sim.pInteractingWorkUnit, gpu->sim.pWorkUnit, gpu->sim.pInteractionFlag, gpu->sim.workUnits, gpu->sim.pInteractionCount);
-            kFindInteractionsWithinBlocksPeriodic_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
-                    sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
-            LAUNCHERROR("kFindInteractionsWithinBlocksPeriodic");
-            if (gpu->bRecalculateBornRadii)
-            {
-                kCalculateObcGbsaBornSum(gpu);
-                kReduceObcGbsaBornSum(gpu);
-            }
-            cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
-            cudaBindTexture(NULL, &tabulatedErfcRef, gpu->psTabulatedErfc->_pDevData, &channelDesc, gpu->psTabulatedErfc->_length*sizeof(float));
-            if (gpu->bOutputBufferPerWarp)
-                kCalculateCDLJObcGbsaEwaldByWarpForces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
-                        (sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
-            else
-                kCalculateCDLJObcGbsaEwaldForces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
-                        (sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
-            LAUNCHERROR("kCalculateCDLJObcGbsaEwaldForces");
-            if (gpu->sim.nonbondedMethod == EWALD)
-            {
-                // Ewald summation
-                kCalculateEwaldFastCosSinSums_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block>>>();
-                LAUNCHERROR("kCalculateEwaldFastCosSinSums");
-                kCalculateEwaldFastForces_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
-                LAUNCHERROR("kCalculateEwaldFastForces");
-            }
-            else
-                kCalculatePME(gpu);
-#endif
    }
+#ifdef DEBUG 
+pdE1->Download();
+pdE2->Download();
+gpu->psPosq4->Download();
+gpu->psGBVIData->Download();
+gpu->psBornRadii->Download();
+freeEnergyGpu->psSwitchDerivative->Download();
+fprintf( stderr, "PdeCud %d\n", TARGET );
+bF = 0.0;
+for( int ii = 0; ii < gpu->natoms; ii++ ){
+bF += pdE1->_pSysData[ii].x;
+if( fabs( pdE1->_pSysData[ii].w ) > 1.0e-03 ){
+fprintf( stderr, "%4d %15.7e %15.7e %15.7e %15.7e    %15.7e %15.7e %15.7e %15.7e\n", ii, 
+         pdE1->_pSysData[ii].x, pdE1->_pSysData[ii].y, pdE1->_pSysData[ii].z, pdE1->_pSysData[ii].w,
+         pdE2->_pSysData[ii].x, pdE2->_pSysData[ii].y, pdE2->_pSysData[ii].z, pdE2->_pSysData[ii].w );
+}
+}
+bR      = gpu->psBornRadii->_pSysData[TARGET];
+atomicRadii = gpu->psGBVIData->_pSysData[TARGET].x; 
+ratio   = (atomicRadii/bR);
+bF1     = bF + (3.0f*gpu->psGBVIData->_pSysData[TARGET].z*ratio*ratio*ratio)/bR; 
+b2      = bR*bR;
+bF1     *= (1.0f/3.0f)*b2*b2;
+fprintf( stderr, "sumbF Cud %6d %15.7e %15.7e %15.7e\n", TARGET, bF, bF1, bR);
+#endif
 }