Extened Free energy plugin to allow cutoffs; cleaned up code and added tests

bc85b9f0 · Mark Friedrichs · d4441c15 · bc85b9f0 · d4441c15 · bc85b9f0
Commit bc85b9f0 authored Oct 24, 2011 by Mark Friedrichs
20 changed files
--- a/plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateCDLJObcGbsaSoftcoreForces1.h
+++ b/plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateCDLJObcGbsaSoftcoreForces1.h
--- a/plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateGBVISoftcoreAux.h
+++ b/plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateGBVISoftcoreAux.h
-/* -------------------------------------------------------------------------- *
- *                                   OpenMM                                   *
- * -------------------------------------------------------------------------- *
- * This is part of the OpenMM molecular simulation toolkit originating from   *
- * Simbios, the NIH National Center for Physics-Based Simulation of           *
- * Biological Structures at Stanford, funded under the NIH Roadmap for        *
- * Medical Research, grant U54 GM072970. See https://simtk.org.               *
- *                                                                            *
- * Portions copyright (c) 2009 Stanford University and the Authors.           *
- * Authors: Mark Friedrichs                                                   *
- * Contributors:                                                              *
- *                                                                            *
- * Permission is hereby granted, free of charge, to any person obtaining a    *
- * copy of this software and associated documentation files (the "Software"), *
- * to deal in the Software without restriction, including without limitation  *
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,   *
- * and/or sell copies of the Software, and to permit persons to whom the      *
- * Software is furnished to do so, subject to the following conditions:       *
- *                                                                            *
- * The above copyright notice and this permission notice shall be included in *
- * all copies or substantial portions of the Software.                        *
- *                                                                            *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,   *
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL    *
- * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,    *
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR      *
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE  *
- * USE OR OTHER DEALINGS IN THE SOFTWARE.                                     *
- * -------------------------------------------------------------------------- */
-#ifndef __Gpu_GBVI_SOFTCORE_AUX_H__
-#define __Gpu_GBVI_SOFTCORE_AUX_H__
-/**
- * This file contains subroutines used in evaluating quantities associated w/ the GB/VI function
- */
-__device__ float getGBVI_L( float r, float x, float S )
-{
-   float rInv   = 1.0f/r;
-   float xInv   = 1.0f/x;
-   float xInv2  = xInv*xInv;
-   float diff2  = (r + S)*(r - S);
-   return (1.5f*xInv2)*( (0.25f*rInv) - (xInv/3.0f) + (0.125f*diff2*xInv2*rInv) );
-}
-__device__ float getGBVI_Volume( float r_ij, float R, float S )
-{
-     float upperBound        = r_ij + S; 
-     float rdiffS            = r_ij - S; 
-     float lowerBound        = R > rdiffS ? R : rdiffS;
-     float L_upper           = getGBVI_L( r_ij, upperBound, S );
-     float L_lower           = getGBVI_L( r_ij, lowerBound, S );
-     float mask              = r_ij < (R - S) ? 0.0f : 1.0f;  
-     float addOn             = r_ij < (S - R) ? (1.0f/(R*R*R)) : 0.0f;  
-     return (mask*( L_upper - L_lower ) + addOn);
-}
-__device__ float getGBVI_dL_dr( float r, float x, float S )
-{
-   float rInv   = 1.0f/r;
-   float rInv2  = rInv*rInv;
-   float xInv   = 1.0f/x;
-   float xInv2  = xInv*xInv;
-   float xInv3  = xInv2*xInv;
-   float diff2  = (r + S)*(r - S);
-   return ( (-1.5f*xInv2*rInv2)*( 0.25f + 0.125f*diff2*xInv2 ) + 0.375f*xInv3*xInv );
-   //return 0.0f;
-}
-__device__ float getGBVI_dL_dx( float r, float x, float S )
-{
-   float rInv   = 1.0f/r;
-   float xInv   = 1.0f/x;
-   float xInv2  = xInv*xInv;
-   float xInv3  = xInv2*xInv;
-   float diff   = (r + S)*(r - S);
-   return ( (-1.5f*xInv3)*( (0.5f*rInv) - xInv + (0.5f*diff*xInv2*rInv) ));
-}
-__device__ float getGBVI_dE2( float r, float R, float S, float bornForce )
-{
-    float diff              = S - R;
-    float absDiff           = fabsf( S - R );
-    float dE                = getGBVI_dL_dr( r, r+S, S ) + getGBVI_dL_dx( r, r+S, S );
-    float mask;
-    float lowerBound;
-    if( (R > (r - S)) && (absDiff < r) ){
-        mask       = 0.0f;
-        lowerBound = R;
-    } else {
-        mask       = 1.0f;
-        lowerBound = (r - S);  
-    }   
-    dE                    -= getGBVI_dL_dr( r, lowerBound, S ) + mask*getGBVI_dL_dx( r, lowerBound, S );
-    dE                     = (absDiff >= r) && r >= diff ? 0.0f : dE; 
-    dE                    *= ( (r > 1.0e-08f) ? (bornForce/r) : 0.0f);
-    return (-dE);
-}
-__device__ float getGBVIBornForce2( float bornRadius, float R, float bornForce, float gamma )
-{ 
-    float ratio                     = (R/bornRadius);
-    float returnBornForce           = bornForce + (3.0f*gamma*ratio*ratio*ratio)/bornRadius; // 'cavity' term
-    float br2                       = bornRadius*bornRadius;
-          returnBornForce          *= (1.0f/3.0f)*br2*br2;
-   return returnBornForce;
-}
-#endif // __Gpu_GBVI_SOFTCORE_AUX_H__
--- a/plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateGBVISoftcoreBornSum.cu
+++ b/plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateGBVISoftcoreBornSum.cu
--- a/plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateGBVISoftcoreBornSum.h
+++ b/plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateGBVISoftcoreBornSum.h
@@ -37,7 +37,22 @@
 #include "kCalculateGBVIAux.h"
-__global__ void METHOD_NAME(kCalculateGBVISoftcore, BornSum_kernel)(unsigned int* workUnit)
+#undef TARGET
+//#define TARGET 5443
+__global__ 
+#if (__CUDA_ARCH__ >= 200)
+__launch_bounds__(GF1XX_NONBOND_THREADS_PER_BLOCK, 1)
+#elif (__CUDA_ARCH__ >= 120)
+__launch_bounds__(GT2XX_NONBOND_THREADS_PER_BLOCK, 1)
+#else
+__launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1)
+#endif
+#ifdef DEBUG
+void METHOD_NAME(kCalculateGBVISoftcore, BornSum_kernel)(unsigned int* workUnit, float4* pdE1, float4* pdE2 )
+#else
+void METHOD_NAME(kCalculateGBVISoftcore, BornSum_kernel)(unsigned int* workUnit)
+#endif
 {
    extern __shared__ Atom sA[];
@@ -47,10 +62,8 @@ __global__ void METHOD_NAME(kCalculateGBVISoftcore, BornSum_kernel)(unsigned int
    unsigned int pos          = warp*numWorkUnits/totalWarps;
    unsigned int end          = (warp+1)*numWorkUnits/totalWarps;
-//    int end = workUnits / gridDim.x;
-//    int pos = end - (threadIdx.x >> GRIDBITS) - 1;
 #ifdef USE_CUTOFF
-    float* tempBuffer = (float*) &sA[cSim.nonbond_threads_per_block];
+    float* tempBuffer         = (float*) &sA[cSim.nonbond_threads_per_block];
 #endif
    while ( pos < end )
@@ -85,68 +98,93 @@ __global__ void METHOD_NAME(kCalculateGBVISoftcore, BornSum_kernel)(unsigned int
            sA[threadIdx.x].r                       = ar.x;
            sA[threadIdx.x].sr                      = ar.y;
            sA[threadIdx.x].bornRadiusScaleFactor   = ar.w;
-            apos.w                                  = 0.0f;
+            float bSum                              = 0.0f;
-            for (unsigned int j             = 0; j < GRID; j++)
+            for (unsigned int j = 0; j < GRID; j++)
            {
                dx                                  = psA[j].x - apos.x;
                dy                                  = psA[j].y - apos.y;
                dz                                  = psA[j].z - apos.z;
 #ifdef USE_PERIODIC
-                dx -= floor(dx/cSim.periodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
+                dx                                 -= floor(dx/cSim.periodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
-                dy -= floor(dy/cSim.periodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
+                dy                                 -= floor(dy/cSim.periodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
-                dz -= floor(dz/cSim.periodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
+                dz                                 -= floor(dz/cSim.periodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
 #endif
-                r2                      = dx * dx + dy * dy + dz * dz;
+                r2                                  = dx * dx + dy * dy + dz * dz;
-#if defined USE_PERIODIC
+#if defined USE_CUTOFF
-                if (i < cSim.atoms && x+j < cSim.atoms && r2 < cSim.nonbondedCutoffSqr)
+                if (i < cSim.atoms && x+j < cSim.atoms && r2 < cSim.nonbondedCutoffSqr && j != tgx)
-#elif defined USE_CUTOFF
+#else
-                if (r2 < cSim.nonbondedCutoffSqr)
+                if (i < cSim.atoms && x+j < cSim.atoms && j != tgx )
 #endif
                {
-                    r                       = sqrt(r2);
+                    bSum  += psA[j].bornRadiusScaleFactor*getGBVI_Volume( sqrt(r2), ar.x, psA[j].sr );
-                    if ((j != tgx) )
-                    {
+#ifdef DEBUG
-                        apos.w             += psA[j].bornRadiusScaleFactor*getGBVI_Volume( r, ar.x, psA[j].sr );
+int jIdx = j;
-                    }
+if( i == TARGET ){
+int tjj     = y+jIdx;
+pdE1[tjj].x = psA[jIdx].bornRadiusScaleFactor*getGBVI_Volume( sqrt(r2), ar.x, psA[jIdx].sr );
+pdE1[tjj].y = psA[jIdx].bornRadiusScaleFactor;
+pdE1[tjj].z = ar.x;
+pdE1[tjj].w = 1.0f;
+pdE2[tjj].x = sqrt(r2);
+pdE2[tjj].y = psA[jIdx].sr;
+pdE2[tjj].z = ar.x;
+pdE2[tjj].w = 1.0f;
+}
+if( (y+jIdx) == TARGET ){
+int tjj     = i;
+pdE1[tjj].x =  psA[jIdx].bornRadiusScaleFactor*getGBVI_Volume( sqrt(r2), ar.x, psA[jIdx].sr );
+pdE1[tjj].y =  psA[jIdx].bornRadiusScaleFactor;
+pdE1[tjj].z = ar.x;
+pdE1[tjj].w = -1.0f;
+} 
+#endif
                }
            }
            // Write results
 #ifdef USE_OUTPUT_BUFFER_PER_WARP
            unsigned int offset = x + tgx + warp*cSim.stride;
-            cSim.pBornSum[offset] += apos.w;
+            cSim.pBornSum[offset] += bSum;
 #else
            unsigned int offset = x + tgx + (x >> GRIDBITS) * cSim.stride;
-            cSim.pBornSum[offset] = apos.w;
+            cSim.pBornSum[offset] = bSum;
 #endif
-        }
-        else        // 100% utilization
-        {
+        } else {
            // Read fixed atom data into registers and GRF
            unsigned int j                              = y + tgx;
            unsigned int i                              = x + tgx;
            float4 temp                                 = cSim.pPosq[j];
            float4 temp1                                = cSim.pGBVIData[j];
            float4 apos                                 = cSim.pPosq[i];        // Local atom x, y, z, sum
            float4 ar                                   = cSim.pGBVIData[i];    // Local atom vr, sr
            sA[threadIdx.x].x                           = temp.x;
            sA[threadIdx.x].y                           = temp.y;
            sA[threadIdx.x].z                           = temp.z;
            sA[threadIdx.x].r                           = temp1.x;
            sA[threadIdx.x].sr                          = temp1.y;
            sA[threadIdx.x].bornRadiusScaleFactor       = temp1.w;
-            sA[threadIdx.x].sum             = apos.w    = 0.0f;
+            sA[threadIdx.x].sum                         = 0.0f;
+            apos.w                                      = 0.0f;
 #ifdef USE_CUTOFF
-            //unsigned int flags = cSim.pInteractionFlag[pos + (blockIdx.x*workUnits)/gridDim.x];
            unsigned int flags = cSim.pInteractionFlag[pos];
            if (flags == 0)
            {
                // No interactions in this block.
            }
            else if (flags == 0xFFFFFFFF)
+            //else if (flags )
 #endif
            {
                // Compute all interactions within this block.
@@ -157,15 +195,15 @@ __global__ void METHOD_NAME(kCalculateGBVISoftcore, BornSum_kernel)(unsigned int
                    dy                      = psA[tj].y - apos.y;
                    dz                      = psA[tj].z - apos.z;
 #ifdef USE_PERIODIC
-                    dx -= floor(dx/cSim.periodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
+                    dx                     -= floor(dx/cSim.periodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
-                    dy -= floor(dy/cSim.periodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
+                    dy                     -= floor(dy/cSim.periodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
-                    dz -= floor(dz/cSim.periodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
+                    dz                     -= floor(dz/cSim.periodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
 #endif
                    r2                      = dx * dx + dy * dy + dz * dz;
-#ifdef USE_PERIODIC
+#ifdef USE_CUTOFF
                    if (i < cSim.atoms && y+tj < cSim.atoms && r2 < cSim.nonbondedCutoffSqr)
-#elif defined USE_CUTOFF
+#else
-                    if (r2 < cSim.nonbondedCutoffSqr)
+                    if (i < cSim.atoms && y+tj < cSim.atoms )
 #endif
                    {
                        r                       = sqrt(r2);
@@ -173,11 +211,39 @@ __global__ void METHOD_NAME(kCalculateGBVISoftcore, BornSum_kernel)(unsigned int
                        // psA[tj].sr = Sj
                        // ar.x       = Ri
-                        apos.w                 += psA[tj].bornRadiusScaleFactor*getGBVI_Volume( r, ar.x,      psA[tj].sr );
+                        apos.w                 += psA[tj].bornRadiusScaleFactor*getGBVI_Volume( r, ar.x, psA[tj].sr );
                        psA[tj].sum            += ar.w*getGBVI_Volume( r, psA[tj].r, ar.y );
+#ifdef DEBUG
+int jIdx = tj;
+if( i == TARGET ){
+int tjj     = y+jIdx;
+pdE1[tjj].x = psA[jIdx].bornRadiusScaleFactor*getGBVI_Volume( r, ar.x, psA[jIdx].sr );
+pdE1[tjj].y = psA[jIdx].bornRadiusScaleFactor;
+pdE1[tjj].z = ar.x;
+pdE1[tjj].w = 2.0f;
+float R =  ar.x;
+float S =  psA[tj].sr;
+pdE2[tjj].x = getGBVI_L( r, (r + S), S );
+pdE2[tjj].y = -getGBVI_L( r, (r - S), S );
+pdE2[tjj].z = -getGBVI_L( r, R, S );
+pdE2[tjj].w = (1.0f/(R*R*R));
+}
+if( (y+jIdx) == TARGET ){
+int tjj     = i;
+pdE1[tjj].x = ar.w*getGBVI_Volume( r, psA[jIdx].r, ar.y );
+pdE1[tjj].y = ar.w;
+pdE1[tjj].z = psA[jIdx].r;
+pdE1[tjj].w = -2.0f;
+}
+#endif
                    }
                    tj = (tj - 1) & (GRID - 1);
                }
            }
 #ifdef USE_CUTOFF
            else
@@ -193,19 +259,20 @@ __global__ void METHOD_NAME(kCalculateGBVISoftcore, BornSum_kernel)(unsigned int
                        dy                      = psA[j].y - apos.y;
                        dz                      = psA[j].z - apos.z;
 #ifdef USE_PERIODIC
-                        dx -= floor(dx/cSim.periodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
+                        dx                     -= floor(dx/cSim.periodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
-                        dy -= floor(dy/cSim.periodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
+                        dy                     -= floor(dy/cSim.periodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
-                        dz -= floor(dz/cSim.periodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
+                        dz                     -= floor(dz/cSim.periodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
 #endif
                        r2                      = dx * dx + dy * dy + dz * dz;
-#ifdef USE_PERIODIC
+#ifdef USE_CUTOFF
                        if (i < cSim.atoms && y+j < cSim.atoms && r2 < cSim.nonbondedCutoffSqr)
-#elif defined USE_CUTOFF
+#else
-                        if (r2 < cSim.nonbondedCutoffSqr)
+                        if (i < cSim.atoms && y+j < cSim.atoms)
 #endif
                        {
                            r                       = sqrt(r2);
-                            tempBuffer[threadIdx.x] = ar.w*getGBVI_Volume( r, psA[tj].r, ar.y );
+                            tempBuffer[threadIdx.x] = ar.w*getGBVI_Volume( r, psA[j].r, ar.y );
+                            apos.w                 += psA[j].bornRadiusScaleFactor*getGBVI_Volume( r, ar.x, psA[j].sr );
                        }
                        // Sum the terms.
@@ -226,6 +293,7 @@ __global__ void METHOD_NAME(kCalculateGBVISoftcore, BornSum_kernel)(unsigned int
 #endif
            // Write results
 #ifdef USE_OUTPUT_BUFFER_PER_WARP
            unsigned int offset = x + tgx + warp*cSim.stride;
            cSim.pBornSum[offset] += apos.w;
@@ -237,6 +305,7 @@ __global__ void METHOD_NAME(kCalculateGBVISoftcore, BornSum_kernel)(unsigned int
            offset = y + tgx + (x >> GRIDBITS) * cSim.stride;
            cSim.pBornSum[offset] = sA[threadIdx.x].sum;
 #endif
        }
        pos++;

--- a/plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateGBVISoftcoreForces2.cu
+++ b/plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateGBVISoftcoreForces2.cu
--- a/plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateGBVISoftcoreForces2.h
+++ b/plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateGBVISoftcoreForces2.h
--- a/plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateLocalSoftcoreForces.cu
+++ b/plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateLocalSoftcoreForces.cu
--- a/plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateNonbondedSoftcore.cu
+++ b/plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateNonbondedSoftcore.cu
--- a/plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateNonbondedSoftcore.h
+++ b/plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateNonbondedSoftcore.h
--- a/plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateObcGbsaSoftcoreBornSum.cu
+++ b/plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateObcGbsaSoftcoreBornSum.cu
--- a/plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateObcGbsaSoftcoreBornSum.h
+++ b/plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateObcGbsaSoftcoreBornSum.h
--- a/plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateObcGbsaSoftcoreForces2.cu
+++ b/plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateObcGbsaSoftcoreForces2.cu
@@ -34,7 +34,7 @@
 using namespace std;
 #include "gputypes.h"
-#include "GpuObcGbsaSoftcore.h"
+#include "freeEnergyGpuTypes.h"
 struct Atom {
    float x;
@@ -49,38 +49,19 @@ struct Atom {
    float fb;
 };
-struct cudaFreeEnergySimulationObcGbsaSoftcore {
-    float* pNonPolarScalingFactors;
-};
-struct cudaFreeEnergySimulationObcGbsaSoftcore gbsaSimObc2;
 static __constant__ cudaGmxSimulation cSim;
-static __constant__ cudaFreeEnergySimulationObcGbsaSoftcore gbsaSimDev;
+static __constant__ cudaFreeEnergyGmxSimulation feSimDev;
 extern "C"
-void SetCalculateObcGbsaSoftcoreForces2Sim(gpuContext gpu)
+void SetCalculateObcGbsaSoftcoreForces2Sim( freeEnergyGpuContext freeEnergyGpu )
 {
    cudaError_t status;
-    status = cudaMemcpyToSymbol(cSim, &gpu->sim, sizeof(cudaGmxSimulation));     
+    status = cudaMemcpyToSymbol(cSim, &freeEnergyGpu->gpuContext->sim, sizeof(cudaGmxSimulation));
-    RTERROR(status, "cudaMemcpyToSymbol: SetSim copy to cSim failed");
+    RTERROR(status, "cudaMemcpyToSymbol: SetCalculateObcGbsaSoftcoreForces2Sim copy to cSim failed");
-}
-extern "C" 
+    status = cudaMemcpyToSymbol( feSimDev, &freeEnergyGpu->freeEnergySim, sizeof(cudaFreeEnergyGmxSimulation));
-void SetCalculateObcGbsaSoftcoreNonPolarScalingFactorsObc2Sim( float* nonPolarScalingFactors )
+    RTERROR(status, "cudaMemcpyToSymbol: SetCalculateObcGbsaSoftcoreForces2Sim copy to feSimDev failed");
-{
-    cudaError_t status;
-    gbsaSimObc2.pNonPolarScalingFactors = nonPolarScalingFactors;
-    status                              = cudaMemcpyToSymbol(gbsaSimDev, &gbsaSimObc2, sizeof(cudaFreeEnergySimulationObcGbsaSoftcore));
-    RTERROR(status, "cudaMemcpyToSymbol: SetCalculateObcGbsaSoftcoreNonPolarScalingFactorsObc2Sim");
-    //(void) fprintf( stderr, "In SetCalculateObcGbsaSoftcoreNonPolarScalingFactorsObc2Sim\n" );
-}
-void GetCalculateObcGbsaSoftcoreForces2Sim(gpuContext gpu)
-{
-    cudaError_t status;
-    status = cudaMemcpyFromSymbol(&gpu->sim, cSim, sizeof(cudaGmxSimulation));     
-    RTERROR(status, "cudaMemcpyFromSymbol: SetSim copy from cSim failed");
 }
 // Include versions of the kernels for N^2 calculations.
@@ -116,15 +97,14 @@ void GetCalculateObcGbsaSoftcoreForces2Sim(gpuContext gpu)
 #define METHOD_NAME(a, b) a##PeriodicByWarp##b
 #include "kCalculateObcGbsaSoftcoreForces2.h"
-void kCalculateObcGbsaSoftcoreForces2(gpuContext gpu)
+void kCalculateObcGbsaSoftcoreForces2( freeEnergyGpuContext freeEnergyGpu )
 {
    //printf("kCalculateObcGbsaSoftcoreForces2\n");
-    //fprintf( stderr, "kCalculateObcGbsaSoftcoreForces2 nonbondedMethod=%d warp=%d\n", gpu->sim.nonbondedMethod, gpu->bOutputBufferPerWarp);
+    gpuContext gpu                     = freeEnergyGpu->gpuContext;
-//fprintf( stderr, "kCalculateObcGbsaSoftcoreForces2 nonbondedMethod=%d calling kReduceForces\n", gpu->sim.nonbondedMethod);
+    switch (freeEnergyGpu->freeEnergySim.nonbondedMethod)
-//kReduceForces(gpu);
-    switch (gpu->sim.nonbondedMethod)
    {
-        case NO_CUTOFF:
+        case FREE_ENERGY_NO_CUTOFF:
            if (gpu->bOutputBufferPerWarp)
                kCalculateObcGbsaSoftcoreN2ByWarpForces2_kernel<<<gpu->sim.bornForce2_blocks, gpu->sim.bornForce2_threads_per_block,
                        sizeof(Atom)*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pWorkUnit);
@@ -132,7 +112,9 @@ void kCalculateObcGbsaSoftcoreForces2(gpuContext gpu)
                kCalculateObcGbsaSoftcoreN2Forces2_kernel<<<gpu->sim.bornForce2_blocks, gpu->sim.bornForce2_threads_per_block,
                        sizeof(Atom)*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pWorkUnit);
            break;
-        case CUTOFF:
+        case FREE_ENERGY_CUTOFF:
            if (gpu->bOutputBufferPerWarp)
                kCalculateObcGbsaSoftcoreCutoffByWarpForces2_kernel<<<gpu->sim.bornForce2_blocks, gpu->sim.bornForce2_threads_per_block,
                        (sizeof(Atom)+sizeof(float3))*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
@@ -140,7 +122,9 @@ void kCalculateObcGbsaSoftcoreForces2(gpuContext gpu)
                kCalculateObcGbsaSoftcoreCutoffForces2_kernel<<<gpu->sim.bornForce2_blocks, gpu->sim.bornForce2_threads_per_block,
                        (sizeof(Atom)+sizeof(float3))*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
            break;
-        case PERIODIC:
+        case FREE_ENERGY_PERIODIC:
            if (gpu->bOutputBufferPerWarp)
                kCalculateObcGbsaSoftcorePeriodicByWarpForces2_kernel<<<gpu->sim.bornForce2_blocks, gpu->sim.bornForce2_threads_per_block,
                        (sizeof(Atom)+sizeof(float3))*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);

--- a/plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateObcGbsaSoftcoreForces2.h
+++ b/plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateObcGbsaSoftcoreForces2.h
--- a/plugins/freeEnergy/platforms/cuda/src/kernels/kSoftcoreLJ.h
+++ b/plugins/freeEnergy/platforms/cuda/src/kernels/kSoftcoreLJ.h
@@ -35,14 +35,14 @@
 static __device__ float getSoftCoreLJ( float r2, float sig, float  eps, float lambdaI, float lambdaJ, float* energy)
 {
-   float r                         = sqrt(r2);
   float lambda                    = lambdaI < lambdaJ ? lambdaI : lambdaJ;
   eps                            *= lambda;
    // (r/sig)
-    float sig2                     = r/sig;
+    float sig2                     = 1.0f/sig;
          sig2                    *= sig2;
+          sig2                    *= r2;
    float sig6                     = sig2*sig2*sig2;
    float softcoreLJTerm           = 0.5f*( 1.0f -  lambda) + sig6;
@@ -53,6 +53,27 @@ static __device__ float getSoftCoreLJ( float r2, float sig, float  eps, float la
    return eps*softcoreLJInv2*( 12.0f*softcoreLJInv - 6.0f )*sig6;
 }
+static __device__ float getSoftCoreLJMod( float sigInvR, float  eps, float lambdaI, float lambdaJ, float* energy)
+{
+   float lambda                    = lambdaI < lambdaJ ? lambdaI : lambdaJ;
+   eps                            *= lambda;
+    // (r/sig)
+    float sig2                     = sigInvR*sigInvR;
+    float sig6                     = sig2*sig2*sig2;
+    float softcoreLJTerm           = 0.5f*( 1.0f -  lambda) + sig6;
+    float softcoreLJInv            = 1.0f/softcoreLJTerm;
+    float softcoreLJInv2           = softcoreLJInv*softcoreLJInv;
+    *energy                        = eps*(softcoreLJInv2 - softcoreLJInv);
+    return eps*softcoreLJInv2*( 12.0f*softcoreLJInv - 6.0f )*sig6;
+}
 #endif
 #endif
--- a/plugins/freeEnergy/platforms/cuda/tests/CMakeLists.txt
+++ b/plugins/freeEnergy/platforms/cuda/tests/CMakeLists.txt
--- a/plugins/freeEnergy/platforms/cuda/tests/TestCudaGBVISoftcoreForce.cpp
+++ b/plugins/freeEnergy/platforms/cuda/tests/TestCudaGBVISoftcoreForce.cpp
--- a/plugins/freeEnergy/platforms/cuda/tests/TestCudaLJSoftcoreForce.cpp
+++ b/plugins/freeEnergy/platforms/cuda/tests/TestCudaLJSoftcoreForce.cpp
--- a/plugins/freeEnergy/platforms/cuda/tests/TestCudaOBCSoftcoreForce.cpp
+++ b/plugins/freeEnergy/platforms/cuda/tests/TestCudaOBCSoftcoreForce.cpp
--- a/plugins/freeEnergy/platforms/cuda/tests/TestCudaSoftcoreForce.h
+++ b/plugins/freeEnergy/platforms/cuda/tests/TestCudaSoftcoreForce.h
--- a/plugins/freeEnergy/platforms/cuda/tests/TstFreeEnergyCudaUsingParameterFile.cpp
+++ b/plugins/freeEnergy/platforms/cuda/tests/TstFreeEnergyCudaUsingParameterFile.cpp