Removed debug code

In Born radii calculation for particle i, removed factor of lambda associated w/ particle i

Removed debug code
In Born radii calculation for particle i, removed factor of lambda associated w/ particle i
032e18de · Mark Friedrichs · 076eb95e · 032e18de · 032e18de · 032e18de
Commit 032e18de authored Nov 10, 2011 by Mark Friedrichs
18 changed files
--- a/plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateCDLJObcGbsaSoftcoreForces1.cu
+++ b/plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateCDLJObcGbsaSoftcoreForces1.cu
@@ -38,7 +38,6 @@
 #include <sstream>
 #define USE_SOFTCORE_LJ
-//#define DEBUG
 struct Atom {
    float x;
@@ -134,52 +133,11 @@ void kCalculateCDLJObcGbsaSoftcoreForces1( freeEnergyGpuContext freeEnergyGpu )
    }   
    threadsPerBlock = threadsPerBlockPerMethod[methodIndex];
-#ifdef DEBUG 
-fprintf( stderr, "kCalculateCDLJObcGbsaSoftcoreForces1 blks=%u thread/block=%u %u shMem=%u nbMethod==%d warp=%u\n",
-         gpu->sim.nonbond_blocks, threadsPerBlock, gpu->sim.nonbond_threads_per_block, sizeof(Atom)*threadsPerBlock,
-         freeEnergyGpu->freeEnergySim.nonbondedMethod, gpu->bOutputBufferPerWarp);
-int psize = gpu->sim.paddedNumberOfAtoms;
-CUDAStream<float4>* pdE1 = new CUDAStream<float4>( psize, 1, "pdE");
-CUDAStream<float4>* pdE2 = new CUDAStream<float4>( psize, 1, "pdE");
-float bF,bR;
-float bF1,b2;
-float ratio;
-float atomicRadii;
-showWorkUnitsFreeEnergy( freeEnergyGpu, 1 );
-for( int ii = 0; ii < psize; ii++ ){
-pdE1->_pSysData[ii].x = 0.0f;
-pdE1->_pSysData[ii].y = 0.001f;
-pdE1->_pSysData[ii].z = 0.001f;
-pdE1->_pSysData[ii].w = 0.001f;
-pdE2->_pSysData[ii].x = 0.0f;
-pdE2->_pSysData[ii].y = 0.001f;
-pdE2->_pSysData[ii].z = 0.001f;
-pdE2->_pSysData[ii].w = 0.001f;
-}
-pdE1->Upload();
-pdE2->Upload();
-#endif
    switch( freeEnergyGpu->freeEnergySim.nonbondedMethod )
    {
        case FREE_ENERGY_NO_CUTOFF:
            // use softcore LJ potential
-#ifdef DEBUG
-            (void) fprintf( stderr, "kCalculateCDLJObcGbsaSoftcoreForces1 ver=%u blks=%u threadsPerBlock=%u shMem=%u %u wrp=%u\n", gpu->sm_version,
-                            gpu->sim.nonbond_blocks, threadsPerBlock, sizeof(Atom)*threadsPerBlock, gpu->sharedMemoryPerBlock, gpu->bOutputBufferPerWarp ); fflush( stderr );
-            if (gpu->bOutputBufferPerWarp)
-                   kCalculateCDLJObcGbsaSoftcoreN2ByWarpForces1_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
-                           sizeof(Atom)*threadsPerBlock>>>(gpu->sim.pWorkUnit,pdE1->_pDevData, pdE2->_pDevData);
-            else
-                   kCalculateCDLJObcGbsaSoftcoreN2Forces1_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
-                           sizeof(Atom)*threadsPerBlock>>>(gpu->sim.pWorkUnit, pdE1->_pDevData, pdE2->_pDevData);
-#else   
            if (gpu->bOutputBufferPerWarp)
                   kCalculateCDLJObcGbsaSoftcoreN2ByWarpForces1_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
                           sizeof(Atom)*threadsPerBlock>>>(gpu->sim.pWorkUnit );
@@ -187,33 +145,17 @@ pdE2->Upload();
                   kCalculateCDLJObcGbsaSoftcoreN2Forces1_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
                           sizeof(Atom)*threadsPerBlock>>>(gpu->sim.pWorkUnit );
-#endif
            LAUNCHERROR("kCalculateCDLJObcGbsaSoftcoreForces1");
            break;
        case FREE_ENERGY_CUTOFF:
-#ifdef DEBUG
-            (void) fprintf( stderr, "kCalculateCDLJObcGbsaSoftcoreCutoffForces1 %6d blks=%u nonbond_threads_per_block=%5u shMem=%5u\n",
-                            gpu->natoms, gpu->sim.nonbond_blocks, threadsPerBlock, (sizeof(Atom)+sizeof(float))*threadsPerBlock);
-            (void) fflush( stderr );
-            if (gpu->bOutputBufferPerWarp)
-                kCalculateCDLJObcGbsaSoftcoreCutoffByWarpForces1_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
-                        (sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit, pdE1->_pDevData, pdE2->_pDevData);
-            else
-                kCalculateCDLJObcGbsaSoftcoreCutoffForces1_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
-                        (sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit, pdE1->_pDevData, pdE2->_pDevData);
-#else
            if (gpu->bOutputBufferPerWarp)
                kCalculateCDLJObcGbsaSoftcoreCutoffByWarpForces1_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
                        (sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit );
            else
                kCalculateCDLJObcGbsaSoftcoreCutoffForces1_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
                        (sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit );
-#endif
            LAUNCHERROR("kCalculateCDLJObcGbsaSoftcoreCutoffForces1");
@@ -221,85 +163,15 @@ pdE2->Upload();
        case FREE_ENERGY_PERIODIC:
-#ifdef DEBUG
-            if (gpu->bOutputBufferPerWarp)
-                kCalculateCDLJObcGbsaSoftcorePeriodicByWarpForces1_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
-                        (sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit, pdE1->_pDevData, pdE2->_pDevData);
-            else
-                kCalculateCDLJObcGbsaSoftcorePeriodicForces1_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
-                        (sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit, pdE1->_pDevData, pdE2->_pDevData);
-#else
            if (gpu->bOutputBufferPerWarp)
                kCalculateCDLJObcGbsaSoftcorePeriodicByWarpForces1_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
                        (sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit);
            else
                kCalculateCDLJObcGbsaSoftcorePeriodicForces1_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
                        (sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit);
-#endif
            LAUNCHERROR("kCalculateCDLJObcGbsaSoftcorePeriodicForces1");
            break;
    }
-#ifdef DEBUG 
-/*
-gpu->psBornForce->Download();
-freeEnergyGpu->psSwitchDerivative->Download();
-gpu->psBornRadii->Download();
-fprintf( stderr, "XX bR=%15.7e swd=%15.7e\n", gpu->psBornRadii->_pSysData[0], freeEnergyGpu->psSwitchDerivative->_pSysData[0] );
-for( int ii = 0; ii < gpu->sim.nonbondOutputBuffers; ii++ ){
-    fprintf( stderr, "strx %4d %15.7e %15.7e %15.7e %15.7e\n", ii, 
-             gpu->psBornForce->_pSysStream[ii][0],
-             gpu->psBornForce->_pSysStream[ii][1],
-             gpu->psBornForce->_pSysStream[ii][2],
-             gpu->psBornForce->_pSysStream[ii][3] );
-    if(  gpu->natoms > 1984 ){
-    int idx = 1983;
-    fprintf( stderr, "stry %4d %15.7e %15.7e %15.7e %15.7e %5d\n", ii, 
-             gpu->psBornForce->_pSysStream[ii][idx+0],
-             gpu->psBornForce->_pSysStream[ii][idx+1],
-             gpu->psBornForce->_pSysStream[ii][idx+2],
-             gpu->psBornForce->_pSysStream[ii][idx+3], idx );
-    }
-}
-int bufferI = 62;
-if(  bufferI < gpu->sim.nonbondOutputBuffers ){
-    fprintf( stderr, "BufferI %4d \n", bufferI );
-for( int ii = 0; ii < gpu->sim.paddedNumberOfAtoms; ii++ ){
-    fprintf( stderr, "strz %4d %15.7e\n",  ii, gpu->psBornForce->_pSysStream[bufferI][ii] );
-}
-}
-*/
-pdE1->Download();
-pdE2->Download();
-gpu->psPosq4->Download();
-gpu->psGBVIData->Download();
-gpu->psBornRadii->Download();
-gpu->psBornForce->Download();
-freeEnergyGpu->psSwitchDerivative->Download();
-fprintf( stderr, "PdeCud %d\n", TARGET );
-bF = 0.0;
-int count =0;
-for( int ii = 0; ii < psize; ii++ ){
-bF += pdE1->_pSysData[ii].x;
-if( fabs( pdE1->_pSysData[ii].w ) > 1.0e-03 && fabs( pdE1->_pSysData[ii].x ) > 0.0 ){
-count++;
-fprintf( stderr, "%4d %4d %15.7e %15.7e %15.7e %15.7e    %15.7e %15.7e %15.7e %15.7e bF=%15.7e swd=%15.7e\n", count, ii, 
-         pdE1->_pSysData[ii].x, pdE1->_pSysData[ii].y, pdE1->_pSysData[ii].z, pdE1->_pSysData[ii].w,
-         pdE2->_pSysData[ii].x, pdE2->_pSysData[ii].y, pdE2->_pSysData[ii].z, pdE2->_pSysData[ii].w, gpu->psBornForce->_pSysData[ii],
-         freeEnergyGpu->psSwitchDerivative->_pSysData[ii] );
-}
-}
-bR      = gpu->psBornRadii->_pSysData[TARGET];
-atomicRadii = gpu->psGBVIData->_pSysData[TARGET].x; 
-ratio   = (atomicRadii/bR);
-bF1     = bF + (3.0f*gpu->psGBVIData->_pSysData[TARGET].z*ratio*ratio*ratio)/bR; 
-b2      = bR*bR;
-bF1     *= (1.0f/3.0f)*b2*b2;
-fprintf( stderr, "sumbF Cud %6d count=%d %15.7e %15.7e %15.7e\n", TARGET, count, bF, bF1, bR);
-#endif
 }
--- a/plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateCDLJObcGbsaSoftcoreForces1.h
+++ b/plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateCDLJObcGbsaSoftcoreForces1.h
@@ -36,9 +36,6 @@
 #include "kSoftcoreLJ.h"
 #endif
-#undef TARGET
-#define TARGET 1926
 __global__ 
 #if (__CUDA_ARCH__ >= 200)
 __launch_bounds__(GF1XX_NONBOND_THREADS_PER_BLOCK, 1)
@@ -57,15 +54,12 @@ void METHOD_NAME(kCalculateCDLJObcGbsaSoftcore, Forces1_kernel)(unsigned int* wo
    unsigned int totalWarps        = gridDim.x*blockDim.x/GRID;
    unsigned int warp              = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
-    //unsigned int totalWarps        = cSim.nonbond_blocks*cSim.nonbond_threads_per_block/GRID;
-    //unsigned int warp              = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
    unsigned int numWorkUnits      = cSim.pInteractionCount[0];
    unsigned int pos               = warp*numWorkUnits/totalWarps;
    unsigned int end               = (warp+1)*numWorkUnits/totalWarps;
    float CDLJObcGbsa_energy;
    float energy                   = 0.0f;
 #ifdef USE_CUTOFF
-    //float* tempBuffer              = (float*) &sA[cSim.nonbond_threads_per_block];
    float* tempBuffer              = (float*) &sA[blockDim.x];
 #endif
@@ -171,24 +165,6 @@ void METHOD_NAME(kCalculateCDLJObcGbsaSoftcore, Forces1_kernel)(unsigned int* wo
                        dGpol_dalpha2_ij    = 0.0f;
                    }
                    af.w                   += dGpol_dalpha2_ij * psA[j].br;
-#ifdef DEBUG
-int jIdx = j;
-if( i == TARGET ){
-int tjj     = y+jIdx;
-pdE1[tjj].x = dGpol_dalpha2_ij * psA[jIdx].br;
-pdE1[tjj].y = sqrt(r2);
-pdE1[tjj].w = 1.0f;
-}
-if( (y+jIdx) == TARGET ){
-int tjj     = i;
-pdE1[tjj].x = dGpol_dalpha2_ij * psA[jIdx].br;
-pdE1[tjj].y = sqrt(r2);
-pdE1[tjj].w = -1.0f;
-}
-#endif
                    energy                 += 0.5f*CDLJObcGbsa_energy;
                    // Add Forces
@@ -278,22 +254,6 @@ pdE1[tjj].w = -1.0f;
                        dGpol_dalpha2_ij   = 0.0f;
                    }
-#ifdef DEBUG
-int jIdx    = j;
-if( i == TARGET ){
-int tjj     =  (y+jIdx);
-pdE1[tjj].x = dGpol_dalpha2_ij * psA[jIdx].br;
-pdE1[tjj].y = sqrt(r2);
-pdE1[tjj].w = 2.0f;
-}
-if( (y+jIdx) == TARGET ){
-int tjj     = i;
-pdE1[tjj].x = dGpol_dalpha2_ij * psA[jIdx].br;
-pdE1[tjj].y = sqrt(r2);
-pdE1[tjj].w = -2.0f;
-}
-#endif
                    af.w                  += dGpol_dalpha2_ij * psA[j].br;
                    energy                += 0.5f*CDLJObcGbsa_energy;
@@ -428,26 +388,6 @@ pdE1[tjj].w = -2.0f;
                        af.w                   += dGpol_dalpha2_ij * psA[tj].br;
                        energy                 += CDLJObcGbsa_energy;
-#ifdef DEBUG
-int jIdx = tj;
-if( i == TARGET ){
-int tjj     = y+jIdx;
-pdE1[tjj].x = dGpol_dalpha2_ij * psA[jIdx].br;
-pdE1[tjj].y = sqrt(r2);
-pdE1[tjj].w = 3.0f;
-}
-if( (y+jIdx) == TARGET ){
-int tjj     = i;
-pdE1[tjj].x = dGpol_dalpha2_ij * br;
-pdE1[tjj].y = sqrt(r2);
-pdE1[tjj].w = -3.0f;
-}
-#endif
                        // Add forces
                        dx                     *= dEdR;
@@ -545,24 +485,6 @@ pdE1[tjj].w = -3.0f;
                            if (tgx == 0)
                                psA[j].fb += tempBuffer[threadIdx.x] + tempBuffer[threadIdx.x+16];
-#ifdef DEBUG
-int jIdx = j;
-if( i == TARGET ){
-int tjj     = y+jIdx;
-pdE1[tjj].x = dGpol_dalpha2_ij * psA[j].br;
-pdE1[tjj].y = sqrt(r2);
-pdE1[tjj].w = 4.0f;
-}
-if( (y+jIdx) == TARGET ){
-int tjj     = i;
-pdE1[tjj].x = tempBuffer[threadIdx.x] + tempBuffer[threadIdx.x+16];
-pdE1[tjj].y = sqrt(r2);
-pdE1[tjj].w = -4.0f;
-} 
-#endif
                            energy                 += CDLJObcGbsa_energy;
                            // Add forces
@@ -678,26 +600,6 @@ pdE1[tjj].w = -4.0f;
                        dGpol_dalpha2_ij   = 0.0f;
                    }
-#ifdef DEBUG
-int jIdx = tj;
-if( i == TARGET ){
-int tjj     = y+jIdx;
-pdE1[tjj].x = dGpol_dalpha2_ij * psA[tj].br;
-pdE1[tjj].y = sqrt(r2);
-pdE1[tjj].z = dGpol_dalpha2_ij;
-pdE1[tjj].w = 6.0f;
-}
-if( (y+jIdx) == TARGET ){
-int tjj     = i;
-pdE1[tjj].x = dGpol_dalpha2_ij * br;
-pdE1[tjj].y = sqrt(r2);
-pdE1[tjj].z = dGpol_dalpha2_ij;
-pdE1[tjj].w = -6.0f;
-} 
-#endif
                    af.w                   += dGpol_dalpha2_ij * psA[tj].br;
                    psA[tj].fb             += dGpol_dalpha2_ij * br;
                    energy                 += CDLJObcGbsa_energy;

--- a/plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateGBVISoftcoreBornSum.cu
+++ b/plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateGBVISoftcoreBornSum.cu
@@ -39,7 +39,6 @@
 #define PARAMETER_PRINT 0
 #define MAX_PARAMETER_PRINT 10
-//#define DEBUG
 static __constant__ cudaGmxSimulation cSim;
 static __constant__ cudaFreeEnergyGmxSimulation gbviSimDev;
@@ -525,35 +524,6 @@ void kCalculateGBVISoftcoreBornSum( freeEnergyGpuContext freeEnergyGpu )
    }   
    threadsPerBlock = threadsPerBlockPerMethod[methodIndex];
-#ifdef DEBUG
-fprintf( stderr, "kCalculateGBVISoftcoreBornSum blks=%u threadsPerBlock=%u %u  shMem=%u\n",
-         gpu->sim.nonbond_blocks, threadsPerBlock, gpu->sim.nonbond_threads_per_block, (sizeof(Atom)+sizeof(float))*threadsPerBlock ); fflush( stderr );
-int psize = gpu->sim.paddedNumberOfAtoms;
-CUDAStream<float4>* pdE1 = new CUDAStream<float4>( psize, 1, "pdE");
-CUDAStream<float4>* pdE2 = new CUDAStream<float4>( psize, 1, "pdE");
-float bF; 
-float bF1; 
-showWorkUnitsFreeEnergy( freeEnergyGpu, 1 );
-for( int ii = 0; ii < psize; ii++ ){
-pdE1->_pSysData[ii].x = 0.0f;
-pdE1->_pSysData[ii].y = 0.001f;
-pdE1->_pSysData[ii].z = 0.001f;
-pdE1->_pSysData[ii].w = 0.001f;
-pdE2->_pSysData[ii].x = 0.001f;
-pdE2->_pSysData[ii].y = 0.001f;
-pdE2->_pSysData[ii].z = 0.001f;
-pdE2->_pSysData[ii].w = 0.001f;
-}
-pdE1->Upload();
-pdE2->Upload();
-#endif
    kClearGBVISoftcoreBornSum( gpu );
    LAUNCHERROR("kClearGBVIBornSum from kCalculateGBVISoftcoreBornSum");
@@ -561,15 +531,6 @@ pdE2->Upload();
    {   
        case FREE_ENERGY_NO_CUTOFF:
-#ifdef DEBUG
-            if (gpu->bOutputBufferPerWarp){
-                kCalculateGBVISoftcoreN2ByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
-                        sizeof(Atom)*threadsPerBlock>>>(gpu->sim.pWorkUnit, pdE1->_pDevData, pdE2->_pDevData);
-            } else {
-                kCalculateGBVISoftcoreN2BornSum_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
-                        sizeof(Atom)*threadsPerBlock>>>(gpu->sim.pWorkUnit, pdE1->_pDevData, pdE2->_pDevData);
-            }
-#else
            if (gpu->bOutputBufferPerWarp){
                kCalculateGBVISoftcoreN2ByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
                        sizeof(Atom)*threadsPerBlock>>>(gpu->sim.pWorkUnit);
@@ -577,8 +538,6 @@ pdE2->Upload();
                kCalculateGBVISoftcoreN2BornSum_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
                        sizeof(Atom)*threadsPerBlock>>>(gpu->sim.pWorkUnit);
            }
-#endif
            break;
        case FREE_ENERGY_CUTOFF:
@@ -591,27 +550,13 @@ pdE2->Upload();
            kFindInteractionsWithinBlocksCutoff_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
                    sizeof(unsigned int)*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit);
-#ifdef DEBUG
-            (void) fprintf( stderr, "kCalculateGBVISoftcoreBornSum cutoff=%15.7e warp=%u GridBoundingBox.length=%u interaction_blocks=%u interaction_threads_per_block=%u nonbond_blocks=%u nonbond_threads_per_block=%u\n",
-                            gpu->sim.nonbondedCutoffSqr, gpu->bOutputBufferPerWarp, gpu->psGridBoundingBox->_length, gpu->sim.interaction_blocks,
-                            gpu->sim.interaction_threads_per_block, gpu->sim.nonbond_blocks, threadsPerBlock ); fflush( stderr );
-            if (gpu->bOutputBufferPerWarp)
-                kCalculateGBVISoftcoreCutoffByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
-                        (sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit, pdE1->_pDevData, pdE2->_pDevData);
-            else
-                kCalculateGBVISoftcoreCutoffBornSum_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
-                        (sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit, pdE1->_pDevData, pdE2->_pDevData );
-#else
            if (gpu->bOutputBufferPerWarp)
                kCalculateGBVISoftcoreCutoffByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
                        (sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit);
            else
                kCalculateGBVISoftcoreCutoffBornSum_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
                        (sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit );
-#endif
            break;
        case FREE_ENERGY_PERIODIC:
@@ -624,21 +569,13 @@ pdE2->Upload();
            kFindInteractionsWithinBlocksPeriodic_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
                    sizeof(unsigned int)*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit);
-#ifdef DEBUG
-            if (gpu->bOutputBufferPerWarp)
-                kCalculateGBVISoftcorePeriodicByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
-                        (sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit, pdE1->_pDevData, pdE2->_pDevData  );
-            else
-                kCalculateGBVISoftcorePeriodicBornSum_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
-                        (sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit, pdE1->_pDevData, pdE2->_pDevData  );
-#else
            if (gpu->bOutputBufferPerWarp)
                kCalculateGBVISoftcorePeriodicByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
                        (sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit );
            else
                kCalculateGBVISoftcorePeriodicBornSum_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
                        (sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit );
-#endif
            break;
        default:
@@ -647,23 +584,4 @@ pdE2->Upload();
    }
    LAUNCHERROR("kCalculateGBVISoftcoreBornSum");
-#ifdef DEBUG
-pdE1->Download();
-pdE2->Download();
-fprintf( stderr, "bSum Cud method=%u warp=%u\n", freeEnergyGpu->freeEnergySim.nonbondedMethod, gpu->bOutputBufferPerWarp );
-bF  = 0.0;
-bF1 = 0.0;
-for( int ii = 0; ii < gpu->natoms; ii++ ){
-    if( fabsf( pdE1->_pSysData[ii].w ) > 0.002 ){
-        bF1 += pdE1->_pSysData[ii].x;
-        if( fabsf( pdE1->_pSysData[ii].x ) > 0.001 ){
-            fprintf( stderr, "%4d %15.7e %15.7e %15.7e %15.7e    %15.7e %15.7e %15.7e %15.7e\n", ii,
-                     pdE1->_pSysData[ii].x, pdE1->_pSysData[ii].y, pdE1->_pSysData[ii].z, pdE1->_pSysData[ii].w,
-                     pdE2->_pSysData[ii].x, pdE2->_pSysData[ii].y, pdE2->_pSysData[ii].z, pdE2->_pSysData[ii].w );
-        }
-    }
-    bF += pdE1->_pSysData[ii].x;
-}
-fprintf( stderr, "bSum Cud %6d %15.7e %15.7e\n", TARGET, bF, bF1 );
-#endif
 }
--- a/plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateGBVISoftcoreBornSum.h
+++ b/plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateGBVISoftcoreBornSum.h
@@ -37,9 +37,6 @@
 #include "kCalculateGBVIAux.h"
-#undef TARGET
-//#define TARGET 39
 __global__ 
 #if (__CUDA_ARCH__ >= 200)
 __launch_bounds__(GF1XX_NONBOND_THREADS_PER_BLOCK, 1)
@@ -48,11 +45,7 @@ __launch_bounds__(GT2XX_NONBOND_THREADS_PER_BLOCK, 1)
 #else
 __launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1)
 #endif
-#ifdef DEBUG
-void METHOD_NAME(kCalculateGBVISoftcore, BornSum_kernel)(unsigned int* workUnit, float4* pdE1, float4* pdE2 )
-#else
 void METHOD_NAME(kCalculateGBVISoftcore, BornSum_kernel)(unsigned int* workUnit)
-#endif
 {
    extern __shared__ Atom sA[];
@@ -119,28 +112,6 @@ void METHOD_NAME(kCalculateGBVISoftcore, BornSum_kernel)(unsigned int* workUnit)
                {
                    bSum  += psA[j].bornRadiusScaleFactor*getGBVI_Volume( sqrt(r2), ar.x, psA[j].sr );
-#ifdef DEBUG
-int jIdx = j;
-if( i == TARGET ){
-int tjj     = y+jIdx;
-pdE1[tjj].x = psA[jIdx].bornRadiusScaleFactor*getGBVI_Volume( sqrt(r2), ar.x, psA[jIdx].sr );
-pdE1[tjj].y = psA[jIdx].bornRadiusScaleFactor;
-pdE1[tjj].z = ar.x;
-pdE1[tjj].w = 1.0f;
-pdE2[tjj].x = sqrt(r2);
-pdE2[tjj].y = psA[jIdx].sr;
-pdE2[tjj].z = ar.x;
-pdE2[tjj].w = 1.0f;
-}
-if( (y+jIdx) == TARGET ){
-int tjj     = i;
-pdE1[tjj].x =  psA[jIdx].bornRadiusScaleFactor*getGBVI_Volume( sqrt(r2), ar.x, psA[jIdx].sr );
-pdE1[tjj].y =  psA[jIdx].bornRadiusScaleFactor;
-pdE1[tjj].z = ar.x;
-pdE1[tjj].w = -1.0f;
-} 
-#endif
                }
            }
@@ -214,32 +185,6 @@ pdE1[tjj].w = -1.0f;
                        apos.w                 += psA[tj].bornRadiusScaleFactor*getGBVI_Volume( r, ar.x, psA[tj].sr );
                        psA[tj].sum            += ar.w*getGBVI_Volume( r, psA[tj].r, ar.y );
-#ifdef DEBUG
-int jIdx = tj;
-if( i == TARGET ){
-int tjj     = y+jIdx;
-pdE1[tjj].x = psA[jIdx].bornRadiusScaleFactor*getGBVI_Volume( r, ar.x, psA[jIdx].sr );
-pdE1[tjj].y = psA[jIdx].bornRadiusScaleFactor;
-pdE1[tjj].z = ar.x;
-pdE1[tjj].w = 2.0f;
-float R =  ar.x;
-float S =  psA[tj].sr;
-pdE2[tjj].x = getGBVI_L( r, (r + S), S );
-pdE2[tjj].y = -getGBVI_L( r, (r - S), S );
-pdE2[tjj].z = -getGBVI_L( r, R, S );
-pdE2[tjj].w = (1.0f/(R*R*R));
-}
-if( (y+jIdx) == TARGET ){
-int tjj     = i;
-pdE1[tjj].x = ar.w*getGBVI_Volume( r, psA[jIdx].r, ar.y );
-pdE1[tjj].y = ar.w;
-pdE1[tjj].z = psA[jIdx].r;
-pdE1[tjj].w = -2.0f;
-}
-#endif
                    }
                    tj = (tj - 1) & (GRID - 1);
                }

--- a/plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateGBVISoftcoreForces2.cu
+++ b/plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateGBVISoftcoreForces2.cu
@@ -68,66 +68,6 @@ void SetCalculateGBVISoftcoreForces2Sim( freeEnergyGpuContext gpu)
 #include "kCalculateGBVIAux.h"
-/**
- * This file contains the kernel for evalauating the second stage of GBSA.  It is included
- * several times in kCalculateGBVIForces2.cu with different #defines to generate
- * different versions of the kernels.
- */
-__global__ void kCalculateGBVISoftcoreForces2a_kernel()
-{
-    unsigned int pos  = (blockIdx.x * blockDim.x + threadIdx.x);
-    if( pos >=  cSim.atoms )return;
-    float4 apos                     = cSim.pPosq[pos];
-    float4 ar                       = cSim.pGBVIData[pos];
-    float fb                        = cSim.pBornForce[pos];
-    unsigned int posJ               = 0;
-    float4 force;
-    force.x = force.y = force.z = force.w = 0.0f;
-    while ( posJ < cSim.atoms )
-    {
-        float4 aposJ                = cSim.pPosq[posJ];
-        float4 arJ                  = cSim.pGBVIData[posJ];
-        float fbJ                   = cSim.pBornForce[posJ];
-        float dx                    = aposJ.x - apos.x;
-        float dy                    = aposJ.y - apos.y;
-        float dz                    = aposJ.z - apos.z;
-        float r2                    = dx * dx + dy * dy + dz * dz;
-        float r                     = sqrt(r2);
-        float dE                    = getGBVI_dE2( r, ar.x, arJ.y, fb );
-        dE                          = r > 1.0e-08f ? dE : 0.0f;
-//dx = dy = dz = 1.0f;
-        float d                     = dx*dE;
-        force.x                    -= d;
-        d                           = dy*dE;
-        force.y                    -= d;
-        d                           = dz*dE;
-        force.z                    -= d;
-#if 1
-        dE                          = getGBVI_dE2( r, arJ.x, ar.y, fbJ );
-        dE                          = r > 1.0e-08f ? dE : 0.0f;
-        d                           = dx*dE;
-        force.x                    -= d;
-        d                           = dy*dE;
-        force.y                    -= d;
-        d                           = dz*dE;
-        force.z                    -= d;
-#endif
-        posJ                       += 1;
-    }
-    // Write results
-    cSim.pForce4[pos]              = force;
-}
 // Include versions of the kernels for N^2 calculations.
 #define METHOD_NAME(a, b) a##N2##b
@@ -180,46 +120,6 @@ void kCalculateGBVISoftcoreForces2( freeEnergyGpuContext freeEnergyGpu )
    }
    threadsPerBlock = threadsPerBlockPerMethod[methodIndex];
-#ifdef DEBUG
-    fprintf( stderr,"kCalculateGBVISoftcoreForces2 nonbondedMethod=%d bornForce2_blocks=%u threadsPerBlock=%u shMem=%u\n",
-             freeEnergyGpu->freeEnergySim.nonbondedMethod,
-             gpu->sim.bornForce2_blocks, threadsPerBlock, (sizeof(Atom)+sizeof(float3))*threadsPerBlock ); fflush( stderr );
-int psize = 64;
-CUDAStream<float4>* pdE1 = new CUDAStream<float4>( psize, 1, "pdE");
-CUDAStream<float4>* pdE2 = new CUDAStream<float4>( psize, 1, "pdE");
-for( int ii = 0; ii < 32; ii++ ){
-pdE1->_pSysData[ii].x = 0.0f;
-pdE1->_pSysData[ii].y = 0.0f;
-pdE1->_pSysData[ii].z = 0.0f;
-pdE1->_pSysData[ii].w = 0.0f;
-pdE2->_pSysData[ii].x = 0.0f;
-pdE2->_pSysData[ii].y = 0.0f;
-pdE2->_pSysData[ii].z = 0.0f;
-pdE2->_pSysData[ii].w = 0.0f;
-}
-pdE1->Upload();
-pdE2->Upload();
-            if (gpu->bOutputBufferPerWarp)
-                kCalculateGBVISoftcoreN2ByWarpForces2_kernel<<<gpu->sim.bornForce2_blocks, threadsPerBlock,
-                        sizeof(Atom)*threadsPerBlock>>>(gpu->sim.pWorkUnit, gpu->sim.workUnits, pdE1->_pDevData, pdE2->_pDevData);
-            else
-                kCalculateGBVISoftcoreN2Forces2_kernel<<<gpu->sim.bornForce2_blocks, threadsPerBlock,
-                        sizeof(Atom)*threadsPerBlock>>>(gpu->sim.pWorkUnit, gpu->sim.workUnits, pdE1->_pDevData, pdE2->_pDevData);
-pdE1->Download();
-pdE2->Download();
-fprintf( stderr, "Pde\n" );
-for( int ii = 0; ii < 32; ii++ ){
-fprintf( stderr, "%4d %15.7e %15.7e %15.7e %15.7e    %15.7e %15.7e %15.7e %15.7e\n", ii, 
-         pdE1->_pSysData[ii].x, pdE1->_pSysData[ii].y, pdE1->_pSysData[ii].z, pdE1->_pSysData[ii].w,
-         pdE2->_pSysData[ii].x, pdE2->_pSysData[ii].y, pdE2->_pSysData[ii].z, pdE2->_pSysData[ii].w );
-}
-            break;
-#endif
    switch (freeEnergyGpu->freeEnergySim.nonbondedMethod)
    {
        case FREE_ENERGY_NO_CUTOFF:

--- a/plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateGBVISoftcoreForces2.h
+++ b/plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateGBVISoftcoreForces2.h
@@ -47,18 +47,14 @@ __launch_bounds__(G8X_BORNFORCE2_THREADS_PER_BLOCK, 1)
 #endif
 void METHOD_NAME(kCalculateGBVISoftcore, Forces2_kernel)(unsigned int* workUnit )
 {
-//METHOD_NAME(kCalculateGBVISoftcore, Forces2_kernel)(unsigned int* workUnit, float4* pdE1, float4* pdE2 )
    extern __shared__ Atom sA[];
    unsigned int totalWarps   = gridDim.x*blockDim.x/GRID;
    unsigned int warp         = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
-    //unsigned int totalWarps   = cSim.bornForce2_blocks*cSim.bornForce2_threads_per_block/GRID;
-    //unsigned int warp         = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
    unsigned int numWorkUnits = cSim.pInteractionCount[0];
    unsigned int pos          = warp*numWorkUnits/totalWarps;
    unsigned int end          = (warp+1)*numWorkUnits/totalWarps;
 #ifdef USE_CUTOFF
-    //float3* tempBuffer        = (float3*) &sA[cSim.bornForce2_threads_per_block];
    float3* tempBuffer        = (float3*) &sA[blockDim.x];
 #endif
@@ -126,19 +122,6 @@ void METHOD_NAME(kCalculateGBVISoftcore, Forces2_kernel)(unsigned int* workUnit
                    dE              = 0.0f;
                }
-/*
-if( i == TARGET ){
-    pdE1[x+j].x = dE;
-    pdE1[x+j].y = psA[j].bornRadiusScaleFactor;
-    pdE1[x+j].z = r;
-    pdE1[x+j].w = dE1;
-}
-if( (x+j) == TARGET ){
-    pdE2[i].x = dE;
-    pdE2[i].y = psA[j].bornRadiusScaleFactor;
-    pdE2[i].z = r;
-    pdE2[i].w = psA[j].sr-ar.x;
-}*/
                float d             = dx * dE;
                af.x               -= d;
                psA[j].fx          += d;

--- a/plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateLocalSoftcoreForces.cu
+++ b/plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateLocalSoftcoreForces.cu
@@ -29,7 +29,7 @@
 #include <cudatypes.h>
 #include "kSoftcoreLJ.h"
-#define PARAMETER_PRINT 1
+#define PARAMETER_PRINT 0
 #define MAX_PARAMETER_PRINT 10
 static __constant__ cudaGmxSimulation cSim;

--- a/plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateNonbondedSoftcore.cu
+++ b/plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateNonbondedSoftcore.cu
@@ -117,9 +117,6 @@ void kCalculateCDLJSoftcoreForces( freeEnergyGpuContext freeEnergyGpu )
 {
    gpuContext gpu = freeEnergyGpu->gpuContext;
-    // (void) fprintf( stderr,"kCalculateCDLJCutoffForces %d warp=%u nonbond_blocks=%u nonbond_threads_per_block=%u rfK=%15.7e rfC=%15.7e\n", freeEnergyGpu->freeEnergySim.nonbondedMethod,
-    //                 gpu->bOutputBufferPerWarp, gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block, gpu->sim.reactionFieldK, gpu->sim.reactionFieldC); fflush( stderr );
    switch (freeEnergyGpu->freeEnergySim.nonbondedMethod)
    {
        case FREE_ENERGY_NO_CUTOFF:

--- a/plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateObcGbsaSoftcoreBornSum.cu
+++ b/plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateObcGbsaSoftcoreBornSum.cu
@@ -34,7 +34,6 @@
 #define PARAMETER_PRINT 0
 #define MAX_PARAMETER_PRINT 10
-//#define DEBUG
 struct Atom {
    float x;
@@ -211,7 +210,6 @@ __global__ void kReduceObcGbsaSoftcoreBornSum_kernel()
        // Now calculate Born radius and OBC term.
        sum                    *= 0.5f * atom.x;
-        sum                    *= gbsaSimDev.pNonPolarScalingFactors[pos];
        float sum2              = sum * sum;
        float sum3              = sum * sum2;
        float tanhSum           = tanh(cSim.alphaOBC * sum - cSim.betaOBC * sum2 + cSim.gammaOBC * sum3);
@@ -377,31 +375,6 @@ void kCalculateObcGbsaSoftcoreBornSum( freeEnergyGpuContext freeEnergyGpu )
  //  printf("kCalculateObcGbsaSoftcoreBornSum\n");
    gpuContext gpu = freeEnergyGpu->gpuContext;
-#ifdef DEBUG 
-fprintf( stderr, "kCalculateObcGbsaSoftcoreBornSum cutoff=%15.7e\n", gpu->sim.nonbondedCutoffSqr );
-int psize = gpu->sim.paddedNumberOfAtoms;
-CUDAStream<float4>* pdE1 = new CUDAStream<float4>( psize, 1, "pdE");
-CUDAStream<float4>* pdE2 = new CUDAStream<float4>( psize, 1, "pdE");
-float bF; 
-float bF1; 
-showWorkUnitsFreeEnergy( freeEnergyGpu, 1 );
-for( int ii = 0; ii < psize; ii++ ){
-pdE1->_pSysData[ii].x = 0.0f;
-pdE1->_pSysData[ii].y = 0.001f;
-pdE1->_pSysData[ii].z = 0.001f;
-pdE1->_pSysData[ii].w = 0.001f;
-pdE2->_pSysData[ii].x = 0.001f;
-pdE2->_pSysData[ii].y = 0.001f;
-pdE2->_pSysData[ii].z = 0.001f;
-pdE2->_pSysData[ii].w = 0.001f;
-}
-pdE1->Upload();
-pdE2->Upload();
-#endif
    kClearObcGbsaSoftcoreBornSum(gpu);
    LAUNCHERROR("kClearBornSum from kCalculateObcGbsaSoftcoreBornSum");
@@ -409,22 +382,13 @@ pdE2->Upload();
    {
        case FREE_ENERGY_NO_CUTOFF:
-#ifdef DEBUG 
-            if (gpu->bOutputBufferPerWarp)
-                kCalculateObcGbsaSoftcoreN2ByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
-                        sizeof(Atom)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit,  pdE1->_pDevData, pdE2->_pDevData);
-            else
-                kCalculateObcGbsaSoftcoreN2BornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
-                        sizeof(Atom)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit,  pdE1->_pDevData, pdE2->_pDevData);
-#else
            if (gpu->bOutputBufferPerWarp)
                kCalculateObcGbsaSoftcoreN2ByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
                        sizeof(Atom)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit);
            else
                kCalculateObcGbsaSoftcoreN2BornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
                        sizeof(Atom)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit);
-#endif
            break;
        case FREE_ENERGY_CUTOFF:
@@ -437,14 +401,6 @@ pdE2->Upload();
            kFindInteractionsWithinBlocksCutoff_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
                    sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
-#ifdef DEBUG
-            if (gpu->bOutputBufferPerWarp)
-                kCalculateObcGbsaSoftcoreCutoffByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
-                        (sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit, pdE1->_pDevData, pdE2->_pDevData);
-            else
-                kCalculateObcGbsaSoftcoreCutoffBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
-                        (sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit, pdE1->_pDevData, pdE2->_pDevData);
-#else
            if (gpu->bOutputBufferPerWarp)
                kCalculateObcGbsaSoftcoreCutoffByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
@@ -452,7 +408,6 @@ pdE2->Upload();
            else
                kCalculateObcGbsaSoftcoreCutoffBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
                        (sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
-#endif
            break;
@@ -466,16 +421,6 @@ pdE2->Upload();
            kFindInteractionsWithinBlocksPeriodic_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
                    sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
-#ifdef DEBUG
-            if (gpu->bOutputBufferPerWarp)
-                kCalculateObcGbsaSoftcorePeriodicByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
-                        (sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit, pdE1->_pDevData, pdE2->_pDevData);
-            else
-                kCalculateObcGbsaSoftcorePeriodicBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
-                        (sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit, pdE1->_pDevData, pdE2->_pDevData);
-#else
            if (gpu->bOutputBufferPerWarp)
                kCalculateObcGbsaSoftcorePeriodicByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
                        (sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
@@ -483,7 +428,7 @@ pdE2->Upload();
                kCalculateObcGbsaSoftcorePeriodicBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
                        (sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
-#endif
            break;
        default:
@@ -492,23 +437,4 @@ pdE2->Upload();
    }
    LAUNCHERROR("kCalculateObcGbsaSoftcoreBornSum");
-#ifdef DEBUG 
-pdE1->Download();
-pdE2->Download();
-//gpu->psBornRadii->Download();
-//gpu->psObcData->Download();
-fprintf( stderr, "bL Obc Cud\n" );
-bF  = 0.0;
-bF1 = 0.0;
-for( int ii = 0; ii < gpu->natoms; ii++ ){
-    bF1 += pdE1->_pSysData[ii].x;
-            fprintf( stderr, "%4d %15.7e %15.7e %15.7e %15.7e    %15.7e %15.7e %15.7e %15.7e\n", ii,
-                     pdE1->_pSysData[ii].x, pdE1->_pSysData[ii].y, pdE1->_pSysData[ii].z, pdE1->_pSysData[ii].w,
-                     pdE2->_pSysData[ii].x, pdE2->_pSysData[ii].y, pdE2->_pSysData[ii].z, pdE2->_pSysData[ii].w );
-    bF += pdE1->_pSysData[ii].x;
-}
-fprintf( stderr, "bS Obc Cud %6d %15.7e %15.7e\n", TARGET, bF, bF1 );
-#endif
 }
--- a/plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateObcGbsaSoftcoreBornSum.h
+++ b/plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateObcGbsaSoftcoreBornSum.h
@@ -41,11 +41,7 @@ __launch_bounds__(GT2XX_NONBOND_THREADS_PER_BLOCK, 1)
 #else
 __launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1)
 #endif
-#ifdef DEBUG
-void METHOD_NAME(kCalculateObcGbsaSoftcore, BornSum_kernel)(unsigned int* workUnit, float4* pdE1, float4* pdE2)
-#else
 void METHOD_NAME(kCalculateObcGbsaSoftcore, BornSum_kernel)(unsigned int* workUnit)
-#endif
 {
    extern __shared__ Atom sA[];
    unsigned int totalWarps   = cSim.nonbond_blocks*cSim.nonbond_threads_per_block/GRID;
@@ -135,32 +131,6 @@ void METHOD_NAME(kCalculateObcGbsaSoftcore, BornSum_kernel)(unsigned int* workUn
                            term += 2.0f * ((1.0f / ar.x) - l_ij);
                        }
                        apos.w += psA[j].polarScaleData*term;
-#ifdef DEBUG
-int jIdx = j;
-if( i == TARGET ){
-int tjj     = y+jIdx;
-pdE1[tjj].x = term;
-pdE1[tjj].y = r;
-pdE1[tjj].z = ar.x;
-pdE1[tjj].w = 1.0f;
-pdE2[tjj].x = r;
-pdE2[tjj].y = l_ij;
-pdE2[tjj].z = rj;
-pdE2[tjj].w = 1.0f;
-}
-/*
-if( (y+jIdx) == TARGET ){
-int tjj     = i;
-pdE2[tjj].x = sum;
-pdE2[tjj].y = psA[jIdx].polarScaleData;
-pdE2[tjj].z = ar.x;
-pdE2[tjj].w = -1.0f;
-} */
-#endif
                    }
                }
            }
@@ -254,37 +224,6 @@ pdE2[tjj].w = -1.0f;
                            }
                            apos.w        += (scale*term);
-#ifdef DEBUG
-int jIdx = tj;
-if( i == TARGET ){
-int tjj     = y+jIdx;
-pdE1[tjj].x = term;
-pdE1[tjj].y = r;
-pdE1[tjj].z = ar.x;
-pdE1[tjj].w = 2.0f;
-/*
-pdE2[tjj].x = r;
-pdE2[tjj].y = l_ij;
-pdE2[tjj].z = rj;
-pdE2[tjj].w = 2.0f;
-*/
-}
-if( (y+jIdx) == TARGET ){
-int tjj     = i;
-/*
-pdE1[tjj].x = term;
-pdE1[tjj].y = r;
-pdE1[tjj].z = ar.x;
-pdE1[tjj].w = -2.0f;
-*/
-pdE2[tjj].x = term;
-pdE2[tjj].y = r;
-pdE2[tjj].z = ar.x;
-pdE2[tjj].w = -2.0f;
-}
-#endif
                        }
                        float rScaledRadiusI    = r + ar.y;
                        if (psA[tj].r < rScaledRadiusI)
@@ -307,25 +246,6 @@ pdE2[tjj].w = -2.0f;
                            }
                            psA[tj].sum    += polarScaleDataI*term;
-#ifdef DEBUG
-int jIdx = tj;
-if( i == TARGET ){
-int tjj     = y+jIdx;
-pdE1[tjj].x = term;
-pdE1[tjj].y = r;
-pdE1[tjj].z = ar.x;
-pdE1[tjj].w = 3.0f;
-}
-if( (y+jIdx) == TARGET ){
-int tjj     = i;
-pdE2[tjj].x = term;
-pdE2[tjj].y = r;
-pdE2[tjj].z = ar.x;
-pdE2[tjj].w = -3.0f;
-}
-#endif
                        }
                    }
                    tj = (tj - 1) & (GRID - 1);

--- a/plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateObcGbsaSoftcoreForces2.h
+++ b/plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateObcGbsaSoftcoreForces2.h
@@ -44,14 +44,10 @@ void METHOD_NAME(kCalculateObcGbsaSoftcore, Forces2_kernel)(unsigned int* workUn
    unsigned int totalWarps   = gridDim.x*blockDim.x/GRID;
    unsigned int warp         = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
-    //unsigned int totalWarps   = cSim.bornForce2_blocks*cSim.bornForce2_threads_per_block/GRID;
-    //unsigned int warp         = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
    unsigned int numWorkUnits = cSim.pInteractionCount[0];
    unsigned int pos          = warp*numWorkUnits/totalWarps;
    unsigned int end          = (warp+1)*numWorkUnits/totalWarps;
 #ifdef USE_CUTOFF
-    //float3* tempBuffer        = (float3*) &sA[cSim.bornForce2_threads_per_block];
    float3* tempBuffer        = (float3*) &sA[blockDim.x];
 #endif
@@ -124,7 +120,7 @@ void METHOD_NAME(kCalculateObcGbsaSoftcore, Forces2_kernel)(unsigned int* workUn
                // Born Forces term
                float term              =  0.125f * (1.000f + psA[j].sr * psA[j].sr * r2Inverse) * t3 +
                                           0.250f * t1 * r2Inverse;
-                term                   *= psA[j].npScale*nonPolarScaleDataI;
+                term                   *= psA[j].npScale;
                float dE                = fb * term;
 #if defined USE_CUTOFF
@@ -231,7 +227,7 @@ void METHOD_NAME(kCalculateObcGbsaSoftcore, Forces2_kernel)(unsigned int* workUn
                    float term              =  0.125f *
                                              (1.000f + psA[tj].sr * psA[tj].sr * r2Inverse) * t3J +
                                               0.250f * t1J * r2Inverse;
-                    term                   *= psA[tj].npScale*nonPolarScaleDataI;
+                    term                   *= psA[tj].npScale;
                    float dE                = fb * term;
 #if defined USE_CUTOFF
@@ -257,7 +253,7 @@ void METHOD_NAME(kCalculateObcGbsaSoftcore, Forces2_kernel)(unsigned int* workUn
                    term                    =  0.125f *
                                              (1.000f + sr2 * r2Inverse) * t3I +
                                               0.250f * t1I * r2Inverse;
-                    term                   *= psA[tj].npScale*nonPolarScaleDataI;
+                    term                   *= nonPolarScaleDataI;
                    dE                      = psA[tj].fb * term;
                    float rj = psA[tj].r;
@@ -293,11 +289,11 @@ void METHOD_NAME(kCalculateObcGbsaSoftcore, Forces2_kernel)(unsigned int* workUn
                        float dx                = psA[j].x - apos.x;
                        float dy                = psA[j].y - apos.y;
                        float dz                = psA[j].z - apos.z;
-    #ifdef USE_PERIODIC
+#ifdef USE_PERIODIC
-                        dx -= floor(dx/cSim.periodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
+                        dx                     -= floor(dx/cSim.periodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
-                        dy -= floor(dy/cSim.periodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
+                        dy                     -= floor(dy/cSim.periodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
-                        dz -= floor(dz/cSim.periodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
+                        dz                     -= floor(dz/cSim.periodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
-    #endif
+#endif
                        float r2                = dx * dx + dy * dy + dz * dz;
                        float r                 = sqrt(r2);
@@ -327,16 +323,16 @@ void METHOD_NAME(kCalculateObcGbsaSoftcore, Forces2_kernel)(unsigned int* workUn
                        float term              =  0.125f *
                                                  (1.000f + psA[j].sr * psA[j].sr * r2Inverse) * t3J +
                                                   0.250f * t1J * r2Inverse;
-                        term                   *= psA[j].npScale*nonPolarScaleDataI;
+                        term                   *= psA[j].npScale;
                        float dE                = fb * term;
-    #if defined USE_PERIODIC
+#if defined USE_PERIODIC
                        if (a.x >= rScaledRadiusJ || i >= cSim.atoms || y+j >= cSim.atoms || r2 > cSim.nonbondedCutoffSqr)
-    #elif defined USE_CUTOFF
+#elif defined USE_CUTOFF
                        if (a.x >= rScaledRadiusJ || r2 > cSim.nonbondedCutoffSqr)
-    #else
+#else
                        if (a.x >= rScaledRadiusJ)
-    #endif
+#endif
                        {
                            dE                  = 0.0f;
                        }
@@ -355,17 +351,17 @@ void METHOD_NAME(kCalculateObcGbsaSoftcore, Forces2_kernel)(unsigned int* workUn
                        term                    =  0.125f *
                                                  (1.000f + sr2 * r2Inverse) * t3I +
                                                   0.250f * t1I * r2Inverse;
-                        term                   *= psA[j].npScale*nonPolarScaleDataI;
+                        term                   *= nonPolarScaleDataI;
                        dE                      = psA[j].fb * term;
                        float rj = psA[j].r;
-    #ifdef USE_PERIODIC
+#ifdef USE_PERIODIC
                        if (rj >= rScaledRadiusI || i >= cSim.atoms || y+j >= cSim.atoms || r2 > cSim.nonbondedCutoffSqr)
-    #elif defined USE_CUTOFF
+#elif defined USE_CUTOFF
                        if (rj >= rScaledRadiusI || r2 > cSim.nonbondedCutoffSqr)
-    #else
+#else
                        if (rj >= rScaledRadiusI)
-    #endif
+#endif
                        {
                            dE                  = 0.0f;
                        }

--- a/plugins/freeEnergy/platforms/cuda/tests/CMakeLists.txt
+++ b/plugins/freeEnergy/platforms/cuda/tests/CMakeLists.txt
@@ -15,14 +15,12 @@ SET( INCLUDE_SERIALIZATION FALSE )
 #SET( INCLUDE_SERIALIZATION TRUE )
 SET( SHARED_OPENMM_TARGET OpenMMFreeEnergy)
-SET( STATIC_OPENMM_TARGET OpenMMFreeEnergy_static)
 SET( SHARED_CUDA_TARGET OpenMMCuda)
-SET( STATIC_CUDA_TARGET OpenMMCuda_static)
 IF( INCLUDE_SERIALIZATION )
    INCLUDE_DIRECTORIES(${OPENMM_DIR}/serialization/include)
-    SET( SHARED_OPENMM_SERIALIZATION OpenMMSerialization )
+    SET( SHARED_OPENMM_SERIALIZATION "OpenMMSerialization" )
-    SET( SHARED_FREE_ENERGY_SERIALIZATION FreeEnergySerialization )
+    SET( SHARED_FREE_ENERGY_SERIALIZATION "FreeEnergySerialization" )
 ENDIF( INCLUDE_SERIALIZATION )
 IF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
@@ -32,8 +30,6 @@ IF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
        SET(SHARED_OPENMM_SERIALIZATION ${SHARED_OPENMM_SERIALIZATION}_d)
        SET(SHARED_FREE_ENERGY_SERIALIZATION ${SHARED_FREE_ENERGY_SERIALIZATION}_d)
    ENDIF( INCLUDE_SERIALIZATION )
-    SET(STATIC_CUDA_TARGET ${STATIC_CUDA_TARGET}_d)
-    SET(STATIC_OPENMM_TARGET ${STATIC_OPENMM_TARGET}_d)
 ENDIF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
 # Automatically create tests using files named "Test*.cpp"
@@ -54,13 +50,52 @@ FOREACH(TEST_PROG ${TEST_PROGS})
        SET(DEFINE_STRING "${DEFINE_STRING} -DOPENMM_SERIALIZE")
    ENDIF( INCLUDE_SERIALIZATION )
-    IF( ${TEST_ROOT} STREQUAL "TestCudaOBCSoftcoreForce" )
+    IF( ${TEST_ROOT} STREQUAL "TestCudaGBVISoftcoreForce2" )
-        SET(DEFINE_STRING "${DEFINE_STRING} -DIMPLICIT_SOLVENT=1")
-    ENDIF( ${TEST_ROOT} STREQUAL "TestCudaOBCSoftcoreForce" )
+        # serialize
+        SET(DEFINE_STRING "-DTEST_PLATFORM=0 ")
+        IF( INCLUDE_SERIALIZATION )
+            SET(DEFINE_STRING "${DEFINE_STRING} -DOPENMM_SERIALIZE ")
+            SET(TARGET_LINK_LIBRARIES_STRING "${SHARED_TARGET} ${SHARED_OPENMM_SERIALIZATION}")
+        ELSE( INCLUDE_SERIALIZATION )
+            SET(TARGET_LINK_LIBRARIES_STRING "${SHARED_TARGET}")
+        ENDIF( INCLUDE_SERIALIZATION )
+        # obc
+        SET(OBC_DEFINE_STRING "${DEFINE_STRING} -DIMPLICIT_SOLVENT=1")
+        SET(OBC_TEST "TestCudaGBSAOBCSoftcoreForce2")
+        CUDA_ADD_EXECUTABLE(${OBC_TEST} ${TEST_PROG})
+        IF( INCLUDE_SERIALIZATION )
+            TARGET_LINK_LIBRARIES(${OBC_TEST} ${SHARED_TARGET} ${SHARED_OPENMM_SERIALIZATION} )
+        ELSE( INCLUDE_SERIALIZATION )
+            TARGET_LINK_LIBRARIES(${OBC_TEST} ${SHARED_TARGET})
+        ENDIF( INCLUDE_SERIALIZATION )
+        SET_TARGET_PROPERTIES(${OBC_TEST} PROPERTIES COMPILE_FLAGS ${OBC_DEFINE_STRING} )
+        ADD_TEST(${OBC_TEST} ${EXECUTABLE_OUTPUT_PATH}/${OBC_TEST})
+        # nonbond
+        SET(NONBOND_DEFINE_STRING "${DEFINE_STRING} -DIMPLICIT_SOLVENT=0")
+        SET(NONBOND_TEST "TestCudaNonbondSoftcoreForce2")
+        CUDA_ADD_EXECUTABLE(${NONBOND_TEST} ${TEST_PROG})
+        IF( INCLUDE_SERIALIZATION )
+            TARGET_LINK_LIBRARIES(${NONBOND_TEST} ${SHARED_TARGET} ${SHARED_OPENMM_SERIALIZATION} )
+        ELSE( INCLUDE_SERIALIZATION )
+            TARGET_LINK_LIBRARIES(${NONBOND_TEST} ${SHARED_TARGET})
+        ENDIF( INCLUDE_SERIALIZATION )
+        SET_TARGET_PROPERTIES(${NONBOND_TEST} PROPERTIES COMPILE_FLAGS ${NONBOND_DEFINE_STRING} )
+        ADD_TEST(${NONBOND_TEST} ${EXECUTABLE_OUTPUT_PATH}/${NONBOND_TEST})
+        # gbvi
-    IF( ${TEST_ROOT} STREQUAL "TestCudaGBVISoftcoreForce" )
        SET(DEFINE_STRING "${DEFINE_STRING} -DIMPLICIT_SOLVENT=2")
-    ENDIF( ${TEST_ROOT} STREQUAL "TestCudaGBVISoftcoreForce" )
+        SET_TARGET_PROPERTIES(${TEST_ROOT} PROPERTIES COMPILE_FLAGS ${DEFINE_STRING} )
+    ENDIF( ${TEST_ROOT} STREQUAL "TestCudaGBVISoftcoreForce2" )
    #MESSAGE( "${TEST_ROOT} ${DEFINE_STRING}" )
    SET_TARGET_PROPERTIES(${TEST_ROOT} PROPERTIES COMPILE_FLAGS ${DEFINE_STRING} )

--- a/plugins/freeEnergy/platforms/cuda/tests/TestCudaGBVISoftcoreForce.cpp
+++ b/plugins/freeEnergy/platforms/cuda/tests/TestCudaGBVISoftcoreForce.cpp
--- a/plugins/freeEnergy/platforms/cuda/tests/TestCudaOBCSoftcoreForce.cpp
+++ b/plugins/freeEnergy/platforms/cuda/tests/TestCudaOBCSoftcoreForce.cpp
--- a/plugins/freeEnergy/platforms/reference/src/gbsa/CpuObcSoftcore.cpp
+++ b/plugins/freeEnergy/platforms/reference/src/gbsa/CpuObcSoftcore.cpp
@@ -221,7 +221,7 @@ void CpuObcSoftcore::computeBornRadii( const vector<RealVec>& atomCoordinates,
        // OBC-specific code (Eqs. 6-8 in paper)
-        sum                  *= nonPolarScaleFactors[atomI]*half*offsetRadiusI;
+        sum                  *= half*offsetRadiusI;
        RealOpenMM sum2       = sum*sum;
        RealOpenMM sum3       = sum*sum2;
        RealOpenMM tanhSum    = TANH( alphaObc*sum - betaObc*sum2 + gammaObc*sum3 );
@@ -486,7 +486,7 @@ RealOpenMM CpuObcSoftcore::computeBornEnergyForces( vector<RealVec>& atomCoordin
                RealOpenMM r2Inverse     = rInverse*rInverse;
                RealOpenMM t3            = eighth*(one + scaledRadiusJ2*r2Inverse)*(l_ij2 - u_ij2) + fourth*LN( u_ij/l_ij )*r2Inverse;
-                           t3           *= nonPolarScaleFactors[atomI]*nonPolarScaleFactors[atomJ];
+                           t3           *= nonPolarScaleFactors[atomJ];
                RealOpenMM de            = bornForces[atomI]*t3*rInverse;

--- a/plugins/freeEnergy/platforms/reference/tests/TestReferenceGBVISoftcoreForce.cpp
+++ b/plugins/freeEnergy/platforms/reference/tests/TestReferenceGBVISoftcoreForce.cpp
--- a/plugins/freeEnergy/platforms/reference/tests/TestReferenceOBCSoftcoreForce.cpp
+++ b/plugins/freeEnergy/platforms/reference/tests/TestReferenceOBCSoftcoreForce.cpp
@@ -77,7 +77,7 @@ void testOBCSoftcore( double lambda1, double lambda2 ){
    custom->addGlobalParameter("solventDielectric", obc->getSolventDielectric());
    custom->addGlobalParameter("soluteDielectric", obc->getSoluteDielectric());
-    custom->addComputedValue("I", "lambda1*lambda2*step(r+sr2-or1)*0.5*(1/L-1/U+0.25*(1/U^2-1/L^2)*(r-sr2*sr2/r)+0.5*log(L/U)/r+C);"
+    custom->addComputedValue("I", "lambda2*step(r+sr2-or1)*0.5*(1/L-1/U+0.25*(1/U^2-1/L^2)*(r-sr2*sr2/r)+0.5*log(L/U)/r+C);"
                                  "U=r+sr2;"
                                  "C=2*(1/or1-1/L)*step(sr2-r-or1);"
                                  "L=max(or1, D);"
@@ -86,8 +86,8 @@ void testOBCSoftcore( double lambda1, double lambda2 ){
                                  "or1 = radius1-0.009; or2 = radius2-0.009", CustomGBForce::ParticlePairNoExclusions);
    custom->addComputedValue("B", "1/(1/or-tanh(1*psi-0.8*psi^2+4.85*psi^3)/radius);"
                                  "psi=I*or; or=radius-0.009", CustomGBForce::SingleParticle);
-    custom->addEnergyTerm("lambda*28.3919551*(radius+0.14)^2*(radius/B)^6-lambda*lambda*0.5*138.935485*(1/soluteDielectric-1/solventDielectric)*q^2/B", CustomGBForce::SingleParticle);
+    custom->addEnergyTerm("lambda*28.3919551*(radius+0.14)^2*(radius/B)^6-0.5*138.935485*(1/soluteDielectric-1/solventDielectric)*q^2/B", CustomGBForce::SingleParticle);
-    custom->addEnergyTerm("-138.935485*lambda1*lambda2*(1/soluteDielectric-1/solventDielectric)*q1*q2/f;"
+    custom->addEnergyTerm("-138.935485*(1/soluteDielectric-1/solventDielectric)*q1*q2/f;"
                          "f=sqrt(r^2+B1*B2*exp(-r^2/(4*B1*B2)))", CustomGBForce::ParticlePairNoExclusions);
    vector<Vec3> positions(numParticles);
@@ -103,14 +103,14 @@ void testOBCSoftcore( double lambda1, double lambda2 ){
            obc->addParticle( charge*lambda1, 0.2, 0.5,  lambda1);
            obc->addParticle(-charge*lambda1, 0.1, 0.5, lambda1);
-            params[0] = charge;
+            params[0] = charge*lambda1;
            params[1] = 0.2;
            params[2] = 0.5;
            params[3] = lambda1;
            custom->addParticle(params);
-            params[0] = -charge;
+            params[0] = -charge*lambda1;
            params[1] = 0.1;
            custom->addParticle(params);
@@ -119,13 +119,13 @@ void testOBCSoftcore( double lambda1, double lambda2 ){
            obc->addParticle( charge*lambda2, 0.2, 0.8, lambda2);
            obc->addParticle(-charge*lambda2, 0.1, 0.8, lambda2);
-            params[0] = charge;
+            params[0] = charge*lambda2;
            params[1] = 0.2;
            params[2] = 0.8;
            params[3] = lambda2;
            custom->addParticle(params);
-            params[0] = -charge;
+            params[0] = -charge*lambda2;
            params[1] = 0.1;
            custom->addParticle(params);
        }

--- a/plugins/freeEnergy/platforms/cuda/tests/TestCudaSoftcoreForce.h
+++ b/plugins/freeEnergy/platforms/cuda/tests/TestCudaSoftcoreForce.h
@@ -59,10 +59,7 @@
 #include <iostream>
 #include <vector>
 #include <algorithm>
-#include <iostream>
 #include <cstdio>
-#include <vector>
 #include <typeinfo>
 extern "C" void registerFreeEnergyCudaKernelFactories();
@@ -1322,29 +1319,27 @@ static NonbondedForce* copyNonbondedForce( const NonbondedForce& nonbondedForce
 *
 */
-static System* copySystem( const System& inputSystem ){
+static void copySystem( const System& inputSystem, System& systemCopy ){
-    System* systemCopy = new System();
    for( unsigned int ii = 0; ii < inputSystem.getNumParticles(); ii++ ){
-        systemCopy->addParticle( inputSystem.getParticleMass( static_cast<int>(ii) ) );
+        systemCopy.addParticle( inputSystem.getParticleMass( static_cast<int>(ii) ) );
    }
    Vec3 a;
    Vec3 b;
    Vec3 c;
    inputSystem.getDefaultPeriodicBoxVectors( a, b, c );
-    systemCopy->setDefaultPeriodicBoxVectors( a, b, c );
+    systemCopy.setDefaultPeriodicBoxVectors( a, b, c );
    for( unsigned int ii = 0; ii < inputSystem.getNumConstraints(); ii++ ){
        int index;
        int particle1, particle2;
        double distance;
        inputSystem.getConstraintParameters( ii, particle1, particle2, distance);
-        systemCopy->addConstraint( particle1, particle2, distance);
+        systemCopy.addConstraint( particle1, particle2, distance);
    }
-    return systemCopy;
+    return;
 }
 /** 
@@ -1809,8 +1804,9 @@ void runSystemComparisonTest( System& system1, System& system2,
        (void) fprintf( log, "System1: particles=%d forces=%d    System2: particles=%d forces=%d\n",
                        system1.getNumParticles(), system1.getNumForces(),
                        system2.getNumParticles(), system2.getNumForces() );
-        (void) fprintf( log, "Positions=%u\n",
+        (void) fprintf( log, "Positions=%u\n", static_cast<unsigned int>(positions.size()) );
-                        static_cast<unsigned int>(positions.size()) );
+        (void) fprintf( log, "Platform1=%s Platform2=%s\n", platform1.c_str(), platform2.c_str() );
+        (void) fprintf( log, "relativeTolerance=%8.2e applyAssert=%d\n", relativeTolerance, applyAssert );
        MapStringInt stringForceVector1;
        MapStringInt stringForceVector2;
@@ -1836,7 +1832,7 @@ void runSystemComparisonTest( System& system1, System& system2,
    if( system1.getNumParticles() != static_cast<int>(positions.size()) ){
        std::stringstream msg;
-        msg << "Number of partciles for system des not equal size of position array: " << system1.getNumParticles() << " != " << positions.size();
+        msg << "Number of particles for system does not equal size of position array: " << system1.getNumParticles() << " != " << positions.size();
        throw OpenMMException( msg.str() );
    }
@@ -1844,7 +1840,7 @@ void runSystemComparisonTest( System& system1, System& system2,
    context1.setPositions(positions);
    State state1 = context1.getState(State::Forces | State::Energy);
-    Context context2( system2, integrator2, Platform::getPlatformByName( "Reference"));
+    Context context2( system2, integrator2, Platform::getPlatformByName( platform2 ));
    context2.setPositions(positions);
    State state2 = context2.getState(State::Forces | State::Energy);