Commit 032e18de authored by Mark Friedrichs's avatar Mark Friedrichs
Browse files

Removed debug code

In Born radii calculation for particle i, removed factor of lambda associated w/ particle i
parent 076eb95e
...@@ -38,7 +38,6 @@ ...@@ -38,7 +38,6 @@
#include <sstream> #include <sstream>
#define USE_SOFTCORE_LJ #define USE_SOFTCORE_LJ
//#define DEBUG
struct Atom { struct Atom {
float x; float x;
...@@ -134,52 +133,11 @@ void kCalculateCDLJObcGbsaSoftcoreForces1( freeEnergyGpuContext freeEnergyGpu ) ...@@ -134,52 +133,11 @@ void kCalculateCDLJObcGbsaSoftcoreForces1( freeEnergyGpuContext freeEnergyGpu )
} }
threadsPerBlock = threadsPerBlockPerMethod[methodIndex]; threadsPerBlock = threadsPerBlockPerMethod[methodIndex];
#ifdef DEBUG
fprintf( stderr, "kCalculateCDLJObcGbsaSoftcoreForces1 blks=%u thread/block=%u %u shMem=%u nbMethod==%d warp=%u\n",
gpu->sim.nonbond_blocks, threadsPerBlock, gpu->sim.nonbond_threads_per_block, sizeof(Atom)*threadsPerBlock,
freeEnergyGpu->freeEnergySim.nonbondedMethod, gpu->bOutputBufferPerWarp);
int psize = gpu->sim.paddedNumberOfAtoms;
CUDAStream<float4>* pdE1 = new CUDAStream<float4>( psize, 1, "pdE");
CUDAStream<float4>* pdE2 = new CUDAStream<float4>( psize, 1, "pdE");
float bF,bR;
float bF1,b2;
float ratio;
float atomicRadii;
showWorkUnitsFreeEnergy( freeEnergyGpu, 1 );
for( int ii = 0; ii < psize; ii++ ){
pdE1->_pSysData[ii].x = 0.0f;
pdE1->_pSysData[ii].y = 0.001f;
pdE1->_pSysData[ii].z = 0.001f;
pdE1->_pSysData[ii].w = 0.001f;
pdE2->_pSysData[ii].x = 0.0f;
pdE2->_pSysData[ii].y = 0.001f;
pdE2->_pSysData[ii].z = 0.001f;
pdE2->_pSysData[ii].w = 0.001f;
}
pdE1->Upload();
pdE2->Upload();
#endif
switch( freeEnergyGpu->freeEnergySim.nonbondedMethod ) switch( freeEnergyGpu->freeEnergySim.nonbondedMethod )
{ {
case FREE_ENERGY_NO_CUTOFF: case FREE_ENERGY_NO_CUTOFF:
// use softcore LJ potential // use softcore LJ potential
#ifdef DEBUG
(void) fprintf( stderr, "kCalculateCDLJObcGbsaSoftcoreForces1 ver=%u blks=%u threadsPerBlock=%u shMem=%u %u wrp=%u\n", gpu->sm_version,
gpu->sim.nonbond_blocks, threadsPerBlock, sizeof(Atom)*threadsPerBlock, gpu->sharedMemoryPerBlock, gpu->bOutputBufferPerWarp ); fflush( stderr );
if (gpu->bOutputBufferPerWarp)
kCalculateCDLJObcGbsaSoftcoreN2ByWarpForces1_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
sizeof(Atom)*threadsPerBlock>>>(gpu->sim.pWorkUnit,pdE1->_pDevData, pdE2->_pDevData);
else
kCalculateCDLJObcGbsaSoftcoreN2Forces1_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
sizeof(Atom)*threadsPerBlock>>>(gpu->sim.pWorkUnit, pdE1->_pDevData, pdE2->_pDevData);
#else
if (gpu->bOutputBufferPerWarp) if (gpu->bOutputBufferPerWarp)
kCalculateCDLJObcGbsaSoftcoreN2ByWarpForces1_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, kCalculateCDLJObcGbsaSoftcoreN2ByWarpForces1_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
sizeof(Atom)*threadsPerBlock>>>(gpu->sim.pWorkUnit ); sizeof(Atom)*threadsPerBlock>>>(gpu->sim.pWorkUnit );
...@@ -187,33 +145,17 @@ pdE2->Upload(); ...@@ -187,33 +145,17 @@ pdE2->Upload();
kCalculateCDLJObcGbsaSoftcoreN2Forces1_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, kCalculateCDLJObcGbsaSoftcoreN2Forces1_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
sizeof(Atom)*threadsPerBlock>>>(gpu->sim.pWorkUnit ); sizeof(Atom)*threadsPerBlock>>>(gpu->sim.pWorkUnit );
#endif
LAUNCHERROR("kCalculateCDLJObcGbsaSoftcoreForces1"); LAUNCHERROR("kCalculateCDLJObcGbsaSoftcoreForces1");
break; break;
case FREE_ENERGY_CUTOFF: case FREE_ENERGY_CUTOFF:
#ifdef DEBUG
(void) fprintf( stderr, "kCalculateCDLJObcGbsaSoftcoreCutoffForces1 %6d blks=%u nonbond_threads_per_block=%5u shMem=%5u\n",
gpu->natoms, gpu->sim.nonbond_blocks, threadsPerBlock, (sizeof(Atom)+sizeof(float))*threadsPerBlock);
(void) fflush( stderr );
if (gpu->bOutputBufferPerWarp)
kCalculateCDLJObcGbsaSoftcoreCutoffByWarpForces1_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
(sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit, pdE1->_pDevData, pdE2->_pDevData);
else
kCalculateCDLJObcGbsaSoftcoreCutoffForces1_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
(sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit, pdE1->_pDevData, pdE2->_pDevData);
#else
if (gpu->bOutputBufferPerWarp) if (gpu->bOutputBufferPerWarp)
kCalculateCDLJObcGbsaSoftcoreCutoffByWarpForces1_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, kCalculateCDLJObcGbsaSoftcoreCutoffByWarpForces1_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
(sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit ); (sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit );
else else
kCalculateCDLJObcGbsaSoftcoreCutoffForces1_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, kCalculateCDLJObcGbsaSoftcoreCutoffForces1_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
(sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit ); (sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit );
#endif
LAUNCHERROR("kCalculateCDLJObcGbsaSoftcoreCutoffForces1"); LAUNCHERROR("kCalculateCDLJObcGbsaSoftcoreCutoffForces1");
...@@ -221,85 +163,15 @@ pdE2->Upload(); ...@@ -221,85 +163,15 @@ pdE2->Upload();
case FREE_ENERGY_PERIODIC: case FREE_ENERGY_PERIODIC:
#ifdef DEBUG
if (gpu->bOutputBufferPerWarp)
kCalculateCDLJObcGbsaSoftcorePeriodicByWarpForces1_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
(sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit, pdE1->_pDevData, pdE2->_pDevData);
else
kCalculateCDLJObcGbsaSoftcorePeriodicForces1_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
(sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit, pdE1->_pDevData, pdE2->_pDevData);
#else
if (gpu->bOutputBufferPerWarp) if (gpu->bOutputBufferPerWarp)
kCalculateCDLJObcGbsaSoftcorePeriodicByWarpForces1_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, kCalculateCDLJObcGbsaSoftcorePeriodicByWarpForces1_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
(sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit); (sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit);
else else
kCalculateCDLJObcGbsaSoftcorePeriodicForces1_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, kCalculateCDLJObcGbsaSoftcorePeriodicForces1_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
(sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit); (sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit);
#endif
LAUNCHERROR("kCalculateCDLJObcGbsaSoftcorePeriodicForces1"); LAUNCHERROR("kCalculateCDLJObcGbsaSoftcorePeriodicForces1");
break; break;
} }
#ifdef DEBUG
/*
gpu->psBornForce->Download();
freeEnergyGpu->psSwitchDerivative->Download();
gpu->psBornRadii->Download();
fprintf( stderr, "XX bR=%15.7e swd=%15.7e\n", gpu->psBornRadii->_pSysData[0], freeEnergyGpu->psSwitchDerivative->_pSysData[0] );
for( int ii = 0; ii < gpu->sim.nonbondOutputBuffers; ii++ ){
fprintf( stderr, "strx %4d %15.7e %15.7e %15.7e %15.7e\n", ii,
gpu->psBornForce->_pSysStream[ii][0],
gpu->psBornForce->_pSysStream[ii][1],
gpu->psBornForce->_pSysStream[ii][2],
gpu->psBornForce->_pSysStream[ii][3] );
if( gpu->natoms > 1984 ){
int idx = 1983;
fprintf( stderr, "stry %4d %15.7e %15.7e %15.7e %15.7e %5d\n", ii,
gpu->psBornForce->_pSysStream[ii][idx+0],
gpu->psBornForce->_pSysStream[ii][idx+1],
gpu->psBornForce->_pSysStream[ii][idx+2],
gpu->psBornForce->_pSysStream[ii][idx+3], idx );
}
}
int bufferI = 62;
if( bufferI < gpu->sim.nonbondOutputBuffers ){
fprintf( stderr, "BufferI %4d \n", bufferI );
for( int ii = 0; ii < gpu->sim.paddedNumberOfAtoms; ii++ ){
fprintf( stderr, "strz %4d %15.7e\n", ii, gpu->psBornForce->_pSysStream[bufferI][ii] );
}
}
*/
pdE1->Download();
pdE2->Download();
gpu->psPosq4->Download();
gpu->psGBVIData->Download();
gpu->psBornRadii->Download();
gpu->psBornForce->Download();
freeEnergyGpu->psSwitchDerivative->Download();
fprintf( stderr, "PdeCud %d\n", TARGET );
bF = 0.0;
int count =0;
for( int ii = 0; ii < psize; ii++ ){
bF += pdE1->_pSysData[ii].x;
if( fabs( pdE1->_pSysData[ii].w ) > 1.0e-03 && fabs( pdE1->_pSysData[ii].x ) > 0.0 ){
count++;
fprintf( stderr, "%4d %4d %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e bF=%15.7e swd=%15.7e\n", count, ii,
pdE1->_pSysData[ii].x, pdE1->_pSysData[ii].y, pdE1->_pSysData[ii].z, pdE1->_pSysData[ii].w,
pdE2->_pSysData[ii].x, pdE2->_pSysData[ii].y, pdE2->_pSysData[ii].z, pdE2->_pSysData[ii].w, gpu->psBornForce->_pSysData[ii],
freeEnergyGpu->psSwitchDerivative->_pSysData[ii] );
}
}
bR = gpu->psBornRadii->_pSysData[TARGET];
atomicRadii = gpu->psGBVIData->_pSysData[TARGET].x;
ratio = (atomicRadii/bR);
bF1 = bF + (3.0f*gpu->psGBVIData->_pSysData[TARGET].z*ratio*ratio*ratio)/bR;
b2 = bR*bR;
bF1 *= (1.0f/3.0f)*b2*b2;
fprintf( stderr, "sumbF Cud %6d count=%d %15.7e %15.7e %15.7e\n", TARGET, count, bF, bF1, bR);
#endif
} }
...@@ -36,9 +36,6 @@ ...@@ -36,9 +36,6 @@
#include "kSoftcoreLJ.h" #include "kSoftcoreLJ.h"
#endif #endif
#undef TARGET
#define TARGET 1926
__global__ __global__
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_NONBOND_THREADS_PER_BLOCK, 1) __launch_bounds__(GF1XX_NONBOND_THREADS_PER_BLOCK, 1)
...@@ -57,15 +54,12 @@ void METHOD_NAME(kCalculateCDLJObcGbsaSoftcore, Forces1_kernel)(unsigned int* wo ...@@ -57,15 +54,12 @@ void METHOD_NAME(kCalculateCDLJObcGbsaSoftcore, Forces1_kernel)(unsigned int* wo
unsigned int totalWarps = gridDim.x*blockDim.x/GRID; unsigned int totalWarps = gridDim.x*blockDim.x/GRID;
unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID; unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
//unsigned int totalWarps = cSim.nonbond_blocks*cSim.nonbond_threads_per_block/GRID;
//unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
unsigned int numWorkUnits = cSim.pInteractionCount[0]; unsigned int numWorkUnits = cSim.pInteractionCount[0];
unsigned int pos = warp*numWorkUnits/totalWarps; unsigned int pos = warp*numWorkUnits/totalWarps;
unsigned int end = (warp+1)*numWorkUnits/totalWarps; unsigned int end = (warp+1)*numWorkUnits/totalWarps;
float CDLJObcGbsa_energy; float CDLJObcGbsa_energy;
float energy = 0.0f; float energy = 0.0f;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
//float* tempBuffer = (float*) &sA[cSim.nonbond_threads_per_block];
float* tempBuffer = (float*) &sA[blockDim.x]; float* tempBuffer = (float*) &sA[blockDim.x];
#endif #endif
...@@ -171,24 +165,6 @@ void METHOD_NAME(kCalculateCDLJObcGbsaSoftcore, Forces1_kernel)(unsigned int* wo ...@@ -171,24 +165,6 @@ void METHOD_NAME(kCalculateCDLJObcGbsaSoftcore, Forces1_kernel)(unsigned int* wo
dGpol_dalpha2_ij = 0.0f; dGpol_dalpha2_ij = 0.0f;
} }
af.w += dGpol_dalpha2_ij * psA[j].br; af.w += dGpol_dalpha2_ij * psA[j].br;
#ifdef DEBUG
int jIdx = j;
if( i == TARGET ){
int tjj = y+jIdx;
pdE1[tjj].x = dGpol_dalpha2_ij * psA[jIdx].br;
pdE1[tjj].y = sqrt(r2);
pdE1[tjj].w = 1.0f;
}
if( (y+jIdx) == TARGET ){
int tjj = i;
pdE1[tjj].x = dGpol_dalpha2_ij * psA[jIdx].br;
pdE1[tjj].y = sqrt(r2);
pdE1[tjj].w = -1.0f;
}
#endif
energy += 0.5f*CDLJObcGbsa_energy; energy += 0.5f*CDLJObcGbsa_energy;
// Add Forces // Add Forces
...@@ -278,22 +254,6 @@ pdE1[tjj].w = -1.0f; ...@@ -278,22 +254,6 @@ pdE1[tjj].w = -1.0f;
dGpol_dalpha2_ij = 0.0f; dGpol_dalpha2_ij = 0.0f;
} }
#ifdef DEBUG
int jIdx = j;
if( i == TARGET ){
int tjj = (y+jIdx);
pdE1[tjj].x = dGpol_dalpha2_ij * psA[jIdx].br;
pdE1[tjj].y = sqrt(r2);
pdE1[tjj].w = 2.0f;
}
if( (y+jIdx) == TARGET ){
int tjj = i;
pdE1[tjj].x = dGpol_dalpha2_ij * psA[jIdx].br;
pdE1[tjj].y = sqrt(r2);
pdE1[tjj].w = -2.0f;
}
#endif
af.w += dGpol_dalpha2_ij * psA[j].br; af.w += dGpol_dalpha2_ij * psA[j].br;
energy += 0.5f*CDLJObcGbsa_energy; energy += 0.5f*CDLJObcGbsa_energy;
...@@ -428,26 +388,6 @@ pdE1[tjj].w = -2.0f; ...@@ -428,26 +388,6 @@ pdE1[tjj].w = -2.0f;
af.w += dGpol_dalpha2_ij * psA[tj].br; af.w += dGpol_dalpha2_ij * psA[tj].br;
energy += CDLJObcGbsa_energy; energy += CDLJObcGbsa_energy;
#ifdef DEBUG
int jIdx = tj;
if( i == TARGET ){
int tjj = y+jIdx;
pdE1[tjj].x = dGpol_dalpha2_ij * psA[jIdx].br;
pdE1[tjj].y = sqrt(r2);
pdE1[tjj].w = 3.0f;
}
if( (y+jIdx) == TARGET ){
int tjj = i;
pdE1[tjj].x = dGpol_dalpha2_ij * br;
pdE1[tjj].y = sqrt(r2);
pdE1[tjj].w = -3.0f;
}
#endif
// Add forces // Add forces
dx *= dEdR; dx *= dEdR;
...@@ -545,24 +485,6 @@ pdE1[tjj].w = -3.0f; ...@@ -545,24 +485,6 @@ pdE1[tjj].w = -3.0f;
if (tgx == 0) if (tgx == 0)
psA[j].fb += tempBuffer[threadIdx.x] + tempBuffer[threadIdx.x+16]; psA[j].fb += tempBuffer[threadIdx.x] + tempBuffer[threadIdx.x+16];
#ifdef DEBUG
int jIdx = j;
if( i == TARGET ){
int tjj = y+jIdx;
pdE1[tjj].x = dGpol_dalpha2_ij * psA[j].br;
pdE1[tjj].y = sqrt(r2);
pdE1[tjj].w = 4.0f;
}
if( (y+jIdx) == TARGET ){
int tjj = i;
pdE1[tjj].x = tempBuffer[threadIdx.x] + tempBuffer[threadIdx.x+16];
pdE1[tjj].y = sqrt(r2);
pdE1[tjj].w = -4.0f;
}
#endif
energy += CDLJObcGbsa_energy; energy += CDLJObcGbsa_energy;
// Add forces // Add forces
...@@ -678,26 +600,6 @@ pdE1[tjj].w = -4.0f; ...@@ -678,26 +600,6 @@ pdE1[tjj].w = -4.0f;
dGpol_dalpha2_ij = 0.0f; dGpol_dalpha2_ij = 0.0f;
} }
#ifdef DEBUG
int jIdx = tj;
if( i == TARGET ){
int tjj = y+jIdx;
pdE1[tjj].x = dGpol_dalpha2_ij * psA[tj].br;
pdE1[tjj].y = sqrt(r2);
pdE1[tjj].z = dGpol_dalpha2_ij;
pdE1[tjj].w = 6.0f;
}
if( (y+jIdx) == TARGET ){
int tjj = i;
pdE1[tjj].x = dGpol_dalpha2_ij * br;
pdE1[tjj].y = sqrt(r2);
pdE1[tjj].z = dGpol_dalpha2_ij;
pdE1[tjj].w = -6.0f;
}
#endif
af.w += dGpol_dalpha2_ij * psA[tj].br; af.w += dGpol_dalpha2_ij * psA[tj].br;
psA[tj].fb += dGpol_dalpha2_ij * br; psA[tj].fb += dGpol_dalpha2_ij * br;
energy += CDLJObcGbsa_energy; energy += CDLJObcGbsa_energy;
......
...@@ -39,7 +39,6 @@ ...@@ -39,7 +39,6 @@
#define PARAMETER_PRINT 0 #define PARAMETER_PRINT 0
#define MAX_PARAMETER_PRINT 10 #define MAX_PARAMETER_PRINT 10
//#define DEBUG
static __constant__ cudaGmxSimulation cSim; static __constant__ cudaGmxSimulation cSim;
static __constant__ cudaFreeEnergyGmxSimulation gbviSimDev; static __constant__ cudaFreeEnergyGmxSimulation gbviSimDev;
...@@ -525,35 +524,6 @@ void kCalculateGBVISoftcoreBornSum( freeEnergyGpuContext freeEnergyGpu ) ...@@ -525,35 +524,6 @@ void kCalculateGBVISoftcoreBornSum( freeEnergyGpuContext freeEnergyGpu )
} }
threadsPerBlock = threadsPerBlockPerMethod[methodIndex]; threadsPerBlock = threadsPerBlockPerMethod[methodIndex];
#ifdef DEBUG
fprintf( stderr, "kCalculateGBVISoftcoreBornSum blks=%u threadsPerBlock=%u %u shMem=%u\n",
gpu->sim.nonbond_blocks, threadsPerBlock, gpu->sim.nonbond_threads_per_block, (sizeof(Atom)+sizeof(float))*threadsPerBlock ); fflush( stderr );
int psize = gpu->sim.paddedNumberOfAtoms;
CUDAStream<float4>* pdE1 = new CUDAStream<float4>( psize, 1, "pdE");
CUDAStream<float4>* pdE2 = new CUDAStream<float4>( psize, 1, "pdE");
float bF;
float bF1;
showWorkUnitsFreeEnergy( freeEnergyGpu, 1 );
for( int ii = 0; ii < psize; ii++ ){
pdE1->_pSysData[ii].x = 0.0f;
pdE1->_pSysData[ii].y = 0.001f;
pdE1->_pSysData[ii].z = 0.001f;
pdE1->_pSysData[ii].w = 0.001f;
pdE2->_pSysData[ii].x = 0.001f;
pdE2->_pSysData[ii].y = 0.001f;
pdE2->_pSysData[ii].z = 0.001f;
pdE2->_pSysData[ii].w = 0.001f;
}
pdE1->Upload();
pdE2->Upload();
#endif
kClearGBVISoftcoreBornSum( gpu ); kClearGBVISoftcoreBornSum( gpu );
LAUNCHERROR("kClearGBVIBornSum from kCalculateGBVISoftcoreBornSum"); LAUNCHERROR("kClearGBVIBornSum from kCalculateGBVISoftcoreBornSum");
...@@ -561,15 +531,6 @@ pdE2->Upload(); ...@@ -561,15 +531,6 @@ pdE2->Upload();
{ {
case FREE_ENERGY_NO_CUTOFF: case FREE_ENERGY_NO_CUTOFF:
#ifdef DEBUG
if (gpu->bOutputBufferPerWarp){
kCalculateGBVISoftcoreN2ByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
sizeof(Atom)*threadsPerBlock>>>(gpu->sim.pWorkUnit, pdE1->_pDevData, pdE2->_pDevData);
} else {
kCalculateGBVISoftcoreN2BornSum_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
sizeof(Atom)*threadsPerBlock>>>(gpu->sim.pWorkUnit, pdE1->_pDevData, pdE2->_pDevData);
}
#else
if (gpu->bOutputBufferPerWarp){ if (gpu->bOutputBufferPerWarp){
kCalculateGBVISoftcoreN2ByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, kCalculateGBVISoftcoreN2ByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
sizeof(Atom)*threadsPerBlock>>>(gpu->sim.pWorkUnit); sizeof(Atom)*threadsPerBlock>>>(gpu->sim.pWorkUnit);
...@@ -577,8 +538,6 @@ pdE2->Upload(); ...@@ -577,8 +538,6 @@ pdE2->Upload();
kCalculateGBVISoftcoreN2BornSum_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, kCalculateGBVISoftcoreN2BornSum_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
sizeof(Atom)*threadsPerBlock>>>(gpu->sim.pWorkUnit); sizeof(Atom)*threadsPerBlock>>>(gpu->sim.pWorkUnit);
} }
#endif
break; break;
case FREE_ENERGY_CUTOFF: case FREE_ENERGY_CUTOFF:
...@@ -591,27 +550,13 @@ pdE2->Upload(); ...@@ -591,27 +550,13 @@ pdE2->Upload();
kFindInteractionsWithinBlocksCutoff_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, kFindInteractionsWithinBlocksCutoff_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
sizeof(unsigned int)*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit); sizeof(unsigned int)*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit);
#ifdef DEBUG
(void) fprintf( stderr, "kCalculateGBVISoftcoreBornSum cutoff=%15.7e warp=%u GridBoundingBox.length=%u interaction_blocks=%u interaction_threads_per_block=%u nonbond_blocks=%u nonbond_threads_per_block=%u\n",
gpu->sim.nonbondedCutoffSqr, gpu->bOutputBufferPerWarp, gpu->psGridBoundingBox->_length, gpu->sim.interaction_blocks,
gpu->sim.interaction_threads_per_block, gpu->sim.nonbond_blocks, threadsPerBlock ); fflush( stderr );
if (gpu->bOutputBufferPerWarp)
kCalculateGBVISoftcoreCutoffByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
(sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit, pdE1->_pDevData, pdE2->_pDevData);
else
kCalculateGBVISoftcoreCutoffBornSum_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
(sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit, pdE1->_pDevData, pdE2->_pDevData );
#else
if (gpu->bOutputBufferPerWarp) if (gpu->bOutputBufferPerWarp)
kCalculateGBVISoftcoreCutoffByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, kCalculateGBVISoftcoreCutoffByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
(sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit); (sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit);
else else
kCalculateGBVISoftcoreCutoffBornSum_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, kCalculateGBVISoftcoreCutoffBornSum_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
(sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit ); (sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit );
#endif
break; break;
case FREE_ENERGY_PERIODIC: case FREE_ENERGY_PERIODIC:
...@@ -624,21 +569,13 @@ pdE2->Upload(); ...@@ -624,21 +569,13 @@ pdE2->Upload();
kFindInteractionsWithinBlocksPeriodic_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, kFindInteractionsWithinBlocksPeriodic_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
sizeof(unsigned int)*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit); sizeof(unsigned int)*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit);
#ifdef DEBUG
if (gpu->bOutputBufferPerWarp)
kCalculateGBVISoftcorePeriodicByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
(sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit, pdE1->_pDevData, pdE2->_pDevData );
else
kCalculateGBVISoftcorePeriodicBornSum_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
(sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit, pdE1->_pDevData, pdE2->_pDevData );
#else
if (gpu->bOutputBufferPerWarp) if (gpu->bOutputBufferPerWarp)
kCalculateGBVISoftcorePeriodicByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, kCalculateGBVISoftcorePeriodicByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
(sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit ); (sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit );
else else
kCalculateGBVISoftcorePeriodicBornSum_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, kCalculateGBVISoftcorePeriodicBornSum_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
(sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit ); (sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit );
#endif
break; break;
default: default:
...@@ -647,23 +584,4 @@ pdE2->Upload(); ...@@ -647,23 +584,4 @@ pdE2->Upload();
} }
LAUNCHERROR("kCalculateGBVISoftcoreBornSum"); LAUNCHERROR("kCalculateGBVISoftcoreBornSum");
#ifdef DEBUG
pdE1->Download();
pdE2->Download();
fprintf( stderr, "bSum Cud method=%u warp=%u\n", freeEnergyGpu->freeEnergySim.nonbondedMethod, gpu->bOutputBufferPerWarp );
bF = 0.0;
bF1 = 0.0;
for( int ii = 0; ii < gpu->natoms; ii++ ){
if( fabsf( pdE1->_pSysData[ii].w ) > 0.002 ){
bF1 += pdE1->_pSysData[ii].x;
if( fabsf( pdE1->_pSysData[ii].x ) > 0.001 ){
fprintf( stderr, "%4d %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e\n", ii,
pdE1->_pSysData[ii].x, pdE1->_pSysData[ii].y, pdE1->_pSysData[ii].z, pdE1->_pSysData[ii].w,
pdE2->_pSysData[ii].x, pdE2->_pSysData[ii].y, pdE2->_pSysData[ii].z, pdE2->_pSysData[ii].w );
}
}
bF += pdE1->_pSysData[ii].x;
}
fprintf( stderr, "bSum Cud %6d %15.7e %15.7e\n", TARGET, bF, bF1 );
#endif
} }
...@@ -37,9 +37,6 @@ ...@@ -37,9 +37,6 @@
#include "kCalculateGBVIAux.h" #include "kCalculateGBVIAux.h"
#undef TARGET
//#define TARGET 39
__global__ __global__
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_NONBOND_THREADS_PER_BLOCK, 1) __launch_bounds__(GF1XX_NONBOND_THREADS_PER_BLOCK, 1)
...@@ -48,11 +45,7 @@ __launch_bounds__(GT2XX_NONBOND_THREADS_PER_BLOCK, 1) ...@@ -48,11 +45,7 @@ __launch_bounds__(GT2XX_NONBOND_THREADS_PER_BLOCK, 1)
#else #else
__launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1) __launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1)
#endif #endif
#ifdef DEBUG
void METHOD_NAME(kCalculateGBVISoftcore, BornSum_kernel)(unsigned int* workUnit, float4* pdE1, float4* pdE2 )
#else
void METHOD_NAME(kCalculateGBVISoftcore, BornSum_kernel)(unsigned int* workUnit) void METHOD_NAME(kCalculateGBVISoftcore, BornSum_kernel)(unsigned int* workUnit)
#endif
{ {
extern __shared__ Atom sA[]; extern __shared__ Atom sA[];
...@@ -119,28 +112,6 @@ void METHOD_NAME(kCalculateGBVISoftcore, BornSum_kernel)(unsigned int* workUnit) ...@@ -119,28 +112,6 @@ void METHOD_NAME(kCalculateGBVISoftcore, BornSum_kernel)(unsigned int* workUnit)
{ {
bSum += psA[j].bornRadiusScaleFactor*getGBVI_Volume( sqrt(r2), ar.x, psA[j].sr ); bSum += psA[j].bornRadiusScaleFactor*getGBVI_Volume( sqrt(r2), ar.x, psA[j].sr );
#ifdef DEBUG
int jIdx = j;
if( i == TARGET ){
int tjj = y+jIdx;
pdE1[tjj].x = psA[jIdx].bornRadiusScaleFactor*getGBVI_Volume( sqrt(r2), ar.x, psA[jIdx].sr );
pdE1[tjj].y = psA[jIdx].bornRadiusScaleFactor;
pdE1[tjj].z = ar.x;
pdE1[tjj].w = 1.0f;
pdE2[tjj].x = sqrt(r2);
pdE2[tjj].y = psA[jIdx].sr;
pdE2[tjj].z = ar.x;
pdE2[tjj].w = 1.0f;
}
if( (y+jIdx) == TARGET ){
int tjj = i;
pdE1[tjj].x = psA[jIdx].bornRadiusScaleFactor*getGBVI_Volume( sqrt(r2), ar.x, psA[jIdx].sr );
pdE1[tjj].y = psA[jIdx].bornRadiusScaleFactor;
pdE1[tjj].z = ar.x;
pdE1[tjj].w = -1.0f;
}
#endif
} }
} }
...@@ -214,32 +185,6 @@ pdE1[tjj].w = -1.0f; ...@@ -214,32 +185,6 @@ pdE1[tjj].w = -1.0f;
apos.w += psA[tj].bornRadiusScaleFactor*getGBVI_Volume( r, ar.x, psA[tj].sr ); apos.w += psA[tj].bornRadiusScaleFactor*getGBVI_Volume( r, ar.x, psA[tj].sr );
psA[tj].sum += ar.w*getGBVI_Volume( r, psA[tj].r, ar.y ); psA[tj].sum += ar.w*getGBVI_Volume( r, psA[tj].r, ar.y );
#ifdef DEBUG
int jIdx = tj;
if( i == TARGET ){
int tjj = y+jIdx;
pdE1[tjj].x = psA[jIdx].bornRadiusScaleFactor*getGBVI_Volume( r, ar.x, psA[jIdx].sr );
pdE1[tjj].y = psA[jIdx].bornRadiusScaleFactor;
pdE1[tjj].z = ar.x;
pdE1[tjj].w = 2.0f;
float R = ar.x;
float S = psA[tj].sr;
pdE2[tjj].x = getGBVI_L( r, (r + S), S );
pdE2[tjj].y = -getGBVI_L( r, (r - S), S );
pdE2[tjj].z = -getGBVI_L( r, R, S );
pdE2[tjj].w = (1.0f/(R*R*R));
}
if( (y+jIdx) == TARGET ){
int tjj = i;
pdE1[tjj].x = ar.w*getGBVI_Volume( r, psA[jIdx].r, ar.y );
pdE1[tjj].y = ar.w;
pdE1[tjj].z = psA[jIdx].r;
pdE1[tjj].w = -2.0f;
}
#endif
} }
tj = (tj - 1) & (GRID - 1); tj = (tj - 1) & (GRID - 1);
} }
......
...@@ -68,66 +68,6 @@ void SetCalculateGBVISoftcoreForces2Sim( freeEnergyGpuContext gpu) ...@@ -68,66 +68,6 @@ void SetCalculateGBVISoftcoreForces2Sim( freeEnergyGpuContext gpu)
#include "kCalculateGBVIAux.h" #include "kCalculateGBVIAux.h"
/**
* This file contains the kernel for evalauating the second stage of GBSA. It is included
* several times in kCalculateGBVIForces2.cu with different #defines to generate
* different versions of the kernels.
*/
__global__ void kCalculateGBVISoftcoreForces2a_kernel()
{
unsigned int pos = (blockIdx.x * blockDim.x + threadIdx.x);
if( pos >= cSim.atoms )return;
float4 apos = cSim.pPosq[pos];
float4 ar = cSim.pGBVIData[pos];
float fb = cSim.pBornForce[pos];
unsigned int posJ = 0;
float4 force;
force.x = force.y = force.z = force.w = 0.0f;
while ( posJ < cSim.atoms )
{
float4 aposJ = cSim.pPosq[posJ];
float4 arJ = cSim.pGBVIData[posJ];
float fbJ = cSim.pBornForce[posJ];
float dx = aposJ.x - apos.x;
float dy = aposJ.y - apos.y;
float dz = aposJ.z - apos.z;
float r2 = dx * dx + dy * dy + dz * dz;
float r = sqrt(r2);
float dE = getGBVI_dE2( r, ar.x, arJ.y, fb );
dE = r > 1.0e-08f ? dE : 0.0f;
//dx = dy = dz = 1.0f;
float d = dx*dE;
force.x -= d;
d = dy*dE;
force.y -= d;
d = dz*dE;
force.z -= d;
#if 1
dE = getGBVI_dE2( r, arJ.x, ar.y, fbJ );
dE = r > 1.0e-08f ? dE : 0.0f;
d = dx*dE;
force.x -= d;
d = dy*dE;
force.y -= d;
d = dz*dE;
force.z -= d;
#endif
posJ += 1;
}
// Write results
cSim.pForce4[pos] = force;
}
// Include versions of the kernels for N^2 calculations. // Include versions of the kernels for N^2 calculations.
#define METHOD_NAME(a, b) a##N2##b #define METHOD_NAME(a, b) a##N2##b
...@@ -180,46 +120,6 @@ void kCalculateGBVISoftcoreForces2( freeEnergyGpuContext freeEnergyGpu ) ...@@ -180,46 +120,6 @@ void kCalculateGBVISoftcoreForces2( freeEnergyGpuContext freeEnergyGpu )
} }
threadsPerBlock = threadsPerBlockPerMethod[methodIndex]; threadsPerBlock = threadsPerBlockPerMethod[methodIndex];
#ifdef DEBUG
fprintf( stderr,"kCalculateGBVISoftcoreForces2 nonbondedMethod=%d bornForce2_blocks=%u threadsPerBlock=%u shMem=%u\n",
freeEnergyGpu->freeEnergySim.nonbondedMethod,
gpu->sim.bornForce2_blocks, threadsPerBlock, (sizeof(Atom)+sizeof(float3))*threadsPerBlock ); fflush( stderr );
int psize = 64;
CUDAStream<float4>* pdE1 = new CUDAStream<float4>( psize, 1, "pdE");
CUDAStream<float4>* pdE2 = new CUDAStream<float4>( psize, 1, "pdE");
for( int ii = 0; ii < 32; ii++ ){
pdE1->_pSysData[ii].x = 0.0f;
pdE1->_pSysData[ii].y = 0.0f;
pdE1->_pSysData[ii].z = 0.0f;
pdE1->_pSysData[ii].w = 0.0f;
pdE2->_pSysData[ii].x = 0.0f;
pdE2->_pSysData[ii].y = 0.0f;
pdE2->_pSysData[ii].z = 0.0f;
pdE2->_pSysData[ii].w = 0.0f;
}
pdE1->Upload();
pdE2->Upload();
if (gpu->bOutputBufferPerWarp)
kCalculateGBVISoftcoreN2ByWarpForces2_kernel<<<gpu->sim.bornForce2_blocks, threadsPerBlock,
sizeof(Atom)*threadsPerBlock>>>(gpu->sim.pWorkUnit, gpu->sim.workUnits, pdE1->_pDevData, pdE2->_pDevData);
else
kCalculateGBVISoftcoreN2Forces2_kernel<<<gpu->sim.bornForce2_blocks, threadsPerBlock,
sizeof(Atom)*threadsPerBlock>>>(gpu->sim.pWorkUnit, gpu->sim.workUnits, pdE1->_pDevData, pdE2->_pDevData);
pdE1->Download();
pdE2->Download();
fprintf( stderr, "Pde\n" );
for( int ii = 0; ii < 32; ii++ ){
fprintf( stderr, "%4d %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e\n", ii,
pdE1->_pSysData[ii].x, pdE1->_pSysData[ii].y, pdE1->_pSysData[ii].z, pdE1->_pSysData[ii].w,
pdE2->_pSysData[ii].x, pdE2->_pSysData[ii].y, pdE2->_pSysData[ii].z, pdE2->_pSysData[ii].w );
}
break;
#endif
switch (freeEnergyGpu->freeEnergySim.nonbondedMethod) switch (freeEnergyGpu->freeEnergySim.nonbondedMethod)
{ {
case FREE_ENERGY_NO_CUTOFF: case FREE_ENERGY_NO_CUTOFF:
......
...@@ -47,18 +47,14 @@ __launch_bounds__(G8X_BORNFORCE2_THREADS_PER_BLOCK, 1) ...@@ -47,18 +47,14 @@ __launch_bounds__(G8X_BORNFORCE2_THREADS_PER_BLOCK, 1)
#endif #endif
void METHOD_NAME(kCalculateGBVISoftcore, Forces2_kernel)(unsigned int* workUnit ) void METHOD_NAME(kCalculateGBVISoftcore, Forces2_kernel)(unsigned int* workUnit )
{ {
//METHOD_NAME(kCalculateGBVISoftcore, Forces2_kernel)(unsigned int* workUnit, float4* pdE1, float4* pdE2 )
extern __shared__ Atom sA[]; extern __shared__ Atom sA[];
unsigned int totalWarps = gridDim.x*blockDim.x/GRID; unsigned int totalWarps = gridDim.x*blockDim.x/GRID;
unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID; unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
//unsigned int totalWarps = cSim.bornForce2_blocks*cSim.bornForce2_threads_per_block/GRID;
//unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
unsigned int numWorkUnits = cSim.pInteractionCount[0]; unsigned int numWorkUnits = cSim.pInteractionCount[0];
unsigned int pos = warp*numWorkUnits/totalWarps; unsigned int pos = warp*numWorkUnits/totalWarps;
unsigned int end = (warp+1)*numWorkUnits/totalWarps; unsigned int end = (warp+1)*numWorkUnits/totalWarps;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
//float3* tempBuffer = (float3*) &sA[cSim.bornForce2_threads_per_block];
float3* tempBuffer = (float3*) &sA[blockDim.x]; float3* tempBuffer = (float3*) &sA[blockDim.x];
#endif #endif
...@@ -126,19 +122,6 @@ void METHOD_NAME(kCalculateGBVISoftcore, Forces2_kernel)(unsigned int* workUnit ...@@ -126,19 +122,6 @@ void METHOD_NAME(kCalculateGBVISoftcore, Forces2_kernel)(unsigned int* workUnit
dE = 0.0f; dE = 0.0f;
} }
/*
if( i == TARGET ){
pdE1[x+j].x = dE;
pdE1[x+j].y = psA[j].bornRadiusScaleFactor;
pdE1[x+j].z = r;
pdE1[x+j].w = dE1;
}
if( (x+j) == TARGET ){
pdE2[i].x = dE;
pdE2[i].y = psA[j].bornRadiusScaleFactor;
pdE2[i].z = r;
pdE2[i].w = psA[j].sr-ar.x;
}*/
float d = dx * dE; float d = dx * dE;
af.x -= d; af.x -= d;
psA[j].fx += d; psA[j].fx += d;
......
...@@ -29,7 +29,7 @@ ...@@ -29,7 +29,7 @@
#include <cudatypes.h> #include <cudatypes.h>
#include "kSoftcoreLJ.h" #include "kSoftcoreLJ.h"
#define PARAMETER_PRINT 1 #define PARAMETER_PRINT 0
#define MAX_PARAMETER_PRINT 10 #define MAX_PARAMETER_PRINT 10
static __constant__ cudaGmxSimulation cSim; static __constant__ cudaGmxSimulation cSim;
......
...@@ -117,9 +117,6 @@ void kCalculateCDLJSoftcoreForces( freeEnergyGpuContext freeEnergyGpu ) ...@@ -117,9 +117,6 @@ void kCalculateCDLJSoftcoreForces( freeEnergyGpuContext freeEnergyGpu )
{ {
gpuContext gpu = freeEnergyGpu->gpuContext; gpuContext gpu = freeEnergyGpu->gpuContext;
// (void) fprintf( stderr,"kCalculateCDLJCutoffForces %d warp=%u nonbond_blocks=%u nonbond_threads_per_block=%u rfK=%15.7e rfC=%15.7e\n", freeEnergyGpu->freeEnergySim.nonbondedMethod,
// gpu->bOutputBufferPerWarp, gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block, gpu->sim.reactionFieldK, gpu->sim.reactionFieldC); fflush( stderr );
switch (freeEnergyGpu->freeEnergySim.nonbondedMethod) switch (freeEnergyGpu->freeEnergySim.nonbondedMethod)
{ {
case FREE_ENERGY_NO_CUTOFF: case FREE_ENERGY_NO_CUTOFF:
......
...@@ -34,7 +34,6 @@ ...@@ -34,7 +34,6 @@
#define PARAMETER_PRINT 0 #define PARAMETER_PRINT 0
#define MAX_PARAMETER_PRINT 10 #define MAX_PARAMETER_PRINT 10
//#define DEBUG
struct Atom { struct Atom {
float x; float x;
...@@ -211,7 +210,6 @@ __global__ void kReduceObcGbsaSoftcoreBornSum_kernel() ...@@ -211,7 +210,6 @@ __global__ void kReduceObcGbsaSoftcoreBornSum_kernel()
// Now calculate Born radius and OBC term. // Now calculate Born radius and OBC term.
sum *= 0.5f * atom.x; sum *= 0.5f * atom.x;
sum *= gbsaSimDev.pNonPolarScalingFactors[pos];
float sum2 = sum * sum; float sum2 = sum * sum;
float sum3 = sum * sum2; float sum3 = sum * sum2;
float tanhSum = tanh(cSim.alphaOBC * sum - cSim.betaOBC * sum2 + cSim.gammaOBC * sum3); float tanhSum = tanh(cSim.alphaOBC * sum - cSim.betaOBC * sum2 + cSim.gammaOBC * sum3);
...@@ -377,31 +375,6 @@ void kCalculateObcGbsaSoftcoreBornSum( freeEnergyGpuContext freeEnergyGpu ) ...@@ -377,31 +375,6 @@ void kCalculateObcGbsaSoftcoreBornSum( freeEnergyGpuContext freeEnergyGpu )
// printf("kCalculateObcGbsaSoftcoreBornSum\n"); // printf("kCalculateObcGbsaSoftcoreBornSum\n");
gpuContext gpu = freeEnergyGpu->gpuContext; gpuContext gpu = freeEnergyGpu->gpuContext;
#ifdef DEBUG
fprintf( stderr, "kCalculateObcGbsaSoftcoreBornSum cutoff=%15.7e\n", gpu->sim.nonbondedCutoffSqr );
int psize = gpu->sim.paddedNumberOfAtoms;
CUDAStream<float4>* pdE1 = new CUDAStream<float4>( psize, 1, "pdE");
CUDAStream<float4>* pdE2 = new CUDAStream<float4>( psize, 1, "pdE");
float bF;
float bF1;
showWorkUnitsFreeEnergy( freeEnergyGpu, 1 );
for( int ii = 0; ii < psize; ii++ ){
pdE1->_pSysData[ii].x = 0.0f;
pdE1->_pSysData[ii].y = 0.001f;
pdE1->_pSysData[ii].z = 0.001f;
pdE1->_pSysData[ii].w = 0.001f;
pdE2->_pSysData[ii].x = 0.001f;
pdE2->_pSysData[ii].y = 0.001f;
pdE2->_pSysData[ii].z = 0.001f;
pdE2->_pSysData[ii].w = 0.001f;
}
pdE1->Upload();
pdE2->Upload();
#endif
kClearObcGbsaSoftcoreBornSum(gpu); kClearObcGbsaSoftcoreBornSum(gpu);
LAUNCHERROR("kClearBornSum from kCalculateObcGbsaSoftcoreBornSum"); LAUNCHERROR("kClearBornSum from kCalculateObcGbsaSoftcoreBornSum");
...@@ -409,22 +382,13 @@ pdE2->Upload(); ...@@ -409,22 +382,13 @@ pdE2->Upload();
{ {
case FREE_ENERGY_NO_CUTOFF: case FREE_ENERGY_NO_CUTOFF:
#ifdef DEBUG
if (gpu->bOutputBufferPerWarp)
kCalculateObcGbsaSoftcoreN2ByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(Atom)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit, pdE1->_pDevData, pdE2->_pDevData);
else
kCalculateObcGbsaSoftcoreN2BornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(Atom)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit, pdE1->_pDevData, pdE2->_pDevData);
#else
if (gpu->bOutputBufferPerWarp) if (gpu->bOutputBufferPerWarp)
kCalculateObcGbsaSoftcoreN2ByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block, kCalculateObcGbsaSoftcoreN2ByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(Atom)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit); sizeof(Atom)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit);
else else
kCalculateObcGbsaSoftcoreN2BornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block, kCalculateObcGbsaSoftcoreN2BornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(Atom)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit); sizeof(Atom)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit);
#endif
break; break;
case FREE_ENERGY_CUTOFF: case FREE_ENERGY_CUTOFF:
...@@ -437,14 +401,6 @@ pdE2->Upload(); ...@@ -437,14 +401,6 @@ pdE2->Upload();
kFindInteractionsWithinBlocksCutoff_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block, kFindInteractionsWithinBlocksCutoff_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit); sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
#ifdef DEBUG
if (gpu->bOutputBufferPerWarp)
kCalculateObcGbsaSoftcoreCutoffByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit, pdE1->_pDevData, pdE2->_pDevData);
else
kCalculateObcGbsaSoftcoreCutoffBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit, pdE1->_pDevData, pdE2->_pDevData);
#else
if (gpu->bOutputBufferPerWarp) if (gpu->bOutputBufferPerWarp)
kCalculateObcGbsaSoftcoreCutoffByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block, kCalculateObcGbsaSoftcoreCutoffByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
...@@ -452,7 +408,6 @@ pdE2->Upload(); ...@@ -452,7 +408,6 @@ pdE2->Upload();
else else
kCalculateObcGbsaSoftcoreCutoffBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block, kCalculateObcGbsaSoftcoreCutoffBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit); (sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
#endif
break; break;
...@@ -466,16 +421,6 @@ pdE2->Upload(); ...@@ -466,16 +421,6 @@ pdE2->Upload();
kFindInteractionsWithinBlocksPeriodic_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block, kFindInteractionsWithinBlocksPeriodic_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit); sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
#ifdef DEBUG
if (gpu->bOutputBufferPerWarp)
kCalculateObcGbsaSoftcorePeriodicByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit, pdE1->_pDevData, pdE2->_pDevData);
else
kCalculateObcGbsaSoftcorePeriodicBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit, pdE1->_pDevData, pdE2->_pDevData);
#else
if (gpu->bOutputBufferPerWarp) if (gpu->bOutputBufferPerWarp)
kCalculateObcGbsaSoftcorePeriodicByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block, kCalculateObcGbsaSoftcorePeriodicByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit); (sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
...@@ -483,7 +428,7 @@ pdE2->Upload(); ...@@ -483,7 +428,7 @@ pdE2->Upload();
kCalculateObcGbsaSoftcorePeriodicBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block, kCalculateObcGbsaSoftcorePeriodicBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit); (sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
#endif
break; break;
default: default:
...@@ -492,23 +437,4 @@ pdE2->Upload(); ...@@ -492,23 +437,4 @@ pdE2->Upload();
} }
LAUNCHERROR("kCalculateObcGbsaSoftcoreBornSum"); LAUNCHERROR("kCalculateObcGbsaSoftcoreBornSum");
#ifdef DEBUG
pdE1->Download();
pdE2->Download();
//gpu->psBornRadii->Download();
//gpu->psObcData->Download();
fprintf( stderr, "bL Obc Cud\n" );
bF = 0.0;
bF1 = 0.0;
for( int ii = 0; ii < gpu->natoms; ii++ ){
bF1 += pdE1->_pSysData[ii].x;
fprintf( stderr, "%4d %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e\n", ii,
pdE1->_pSysData[ii].x, pdE1->_pSysData[ii].y, pdE1->_pSysData[ii].z, pdE1->_pSysData[ii].w,
pdE2->_pSysData[ii].x, pdE2->_pSysData[ii].y, pdE2->_pSysData[ii].z, pdE2->_pSysData[ii].w );
bF += pdE1->_pSysData[ii].x;
}
fprintf( stderr, "bS Obc Cud %6d %15.7e %15.7e\n", TARGET, bF, bF1 );
#endif
} }
...@@ -41,11 +41,7 @@ __launch_bounds__(GT2XX_NONBOND_THREADS_PER_BLOCK, 1) ...@@ -41,11 +41,7 @@ __launch_bounds__(GT2XX_NONBOND_THREADS_PER_BLOCK, 1)
#else #else
__launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1) __launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1)
#endif #endif
#ifdef DEBUG
void METHOD_NAME(kCalculateObcGbsaSoftcore, BornSum_kernel)(unsigned int* workUnit, float4* pdE1, float4* pdE2)
#else
void METHOD_NAME(kCalculateObcGbsaSoftcore, BornSum_kernel)(unsigned int* workUnit) void METHOD_NAME(kCalculateObcGbsaSoftcore, BornSum_kernel)(unsigned int* workUnit)
#endif
{ {
extern __shared__ Atom sA[]; extern __shared__ Atom sA[];
unsigned int totalWarps = cSim.nonbond_blocks*cSim.nonbond_threads_per_block/GRID; unsigned int totalWarps = cSim.nonbond_blocks*cSim.nonbond_threads_per_block/GRID;
...@@ -135,32 +131,6 @@ void METHOD_NAME(kCalculateObcGbsaSoftcore, BornSum_kernel)(unsigned int* workUn ...@@ -135,32 +131,6 @@ void METHOD_NAME(kCalculateObcGbsaSoftcore, BornSum_kernel)(unsigned int* workUn
term += 2.0f * ((1.0f / ar.x) - l_ij); term += 2.0f * ((1.0f / ar.x) - l_ij);
} }
apos.w += psA[j].polarScaleData*term; apos.w += psA[j].polarScaleData*term;
#ifdef DEBUG
int jIdx = j;
if( i == TARGET ){
int tjj = y+jIdx;
pdE1[tjj].x = term;
pdE1[tjj].y = r;
pdE1[tjj].z = ar.x;
pdE1[tjj].w = 1.0f;
pdE2[tjj].x = r;
pdE2[tjj].y = l_ij;
pdE2[tjj].z = rj;
pdE2[tjj].w = 1.0f;
}
/*
if( (y+jIdx) == TARGET ){
int tjj = i;
pdE2[tjj].x = sum;
pdE2[tjj].y = psA[jIdx].polarScaleData;
pdE2[tjj].z = ar.x;
pdE2[tjj].w = -1.0f;
} */
#endif
} }
} }
} }
...@@ -254,37 +224,6 @@ pdE2[tjj].w = -1.0f; ...@@ -254,37 +224,6 @@ pdE2[tjj].w = -1.0f;
} }
apos.w += (scale*term); apos.w += (scale*term);
#ifdef DEBUG
int jIdx = tj;
if( i == TARGET ){
int tjj = y+jIdx;
pdE1[tjj].x = term;
pdE1[tjj].y = r;
pdE1[tjj].z = ar.x;
pdE1[tjj].w = 2.0f;
/*
pdE2[tjj].x = r;
pdE2[tjj].y = l_ij;
pdE2[tjj].z = rj;
pdE2[tjj].w = 2.0f;
*/
}
if( (y+jIdx) == TARGET ){
int tjj = i;
/*
pdE1[tjj].x = term;
pdE1[tjj].y = r;
pdE1[tjj].z = ar.x;
pdE1[tjj].w = -2.0f;
*/
pdE2[tjj].x = term;
pdE2[tjj].y = r;
pdE2[tjj].z = ar.x;
pdE2[tjj].w = -2.0f;
}
#endif
} }
float rScaledRadiusI = r + ar.y; float rScaledRadiusI = r + ar.y;
if (psA[tj].r < rScaledRadiusI) if (psA[tj].r < rScaledRadiusI)
...@@ -307,25 +246,6 @@ pdE2[tjj].w = -2.0f; ...@@ -307,25 +246,6 @@ pdE2[tjj].w = -2.0f;
} }
psA[tj].sum += polarScaleDataI*term; psA[tj].sum += polarScaleDataI*term;
#ifdef DEBUG
int jIdx = tj;
if( i == TARGET ){
int tjj = y+jIdx;
pdE1[tjj].x = term;
pdE1[tjj].y = r;
pdE1[tjj].z = ar.x;
pdE1[tjj].w = 3.0f;
}
if( (y+jIdx) == TARGET ){
int tjj = i;
pdE2[tjj].x = term;
pdE2[tjj].y = r;
pdE2[tjj].z = ar.x;
pdE2[tjj].w = -3.0f;
}
#endif
} }
} }
tj = (tj - 1) & (GRID - 1); tj = (tj - 1) & (GRID - 1);
......
...@@ -44,14 +44,10 @@ void METHOD_NAME(kCalculateObcGbsaSoftcore, Forces2_kernel)(unsigned int* workUn ...@@ -44,14 +44,10 @@ void METHOD_NAME(kCalculateObcGbsaSoftcore, Forces2_kernel)(unsigned int* workUn
unsigned int totalWarps = gridDim.x*blockDim.x/GRID; unsigned int totalWarps = gridDim.x*blockDim.x/GRID;
unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID; unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
//unsigned int totalWarps = cSim.bornForce2_blocks*cSim.bornForce2_threads_per_block/GRID;
//unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
unsigned int numWorkUnits = cSim.pInteractionCount[0]; unsigned int numWorkUnits = cSim.pInteractionCount[0];
unsigned int pos = warp*numWorkUnits/totalWarps; unsigned int pos = warp*numWorkUnits/totalWarps;
unsigned int end = (warp+1)*numWorkUnits/totalWarps; unsigned int end = (warp+1)*numWorkUnits/totalWarps;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
//float3* tempBuffer = (float3*) &sA[cSim.bornForce2_threads_per_block];
float3* tempBuffer = (float3*) &sA[blockDim.x]; float3* tempBuffer = (float3*) &sA[blockDim.x];
#endif #endif
...@@ -124,7 +120,7 @@ void METHOD_NAME(kCalculateObcGbsaSoftcore, Forces2_kernel)(unsigned int* workUn ...@@ -124,7 +120,7 @@ void METHOD_NAME(kCalculateObcGbsaSoftcore, Forces2_kernel)(unsigned int* workUn
// Born Forces term // Born Forces term
float term = 0.125f * (1.000f + psA[j].sr * psA[j].sr * r2Inverse) * t3 + float term = 0.125f * (1.000f + psA[j].sr * psA[j].sr * r2Inverse) * t3 +
0.250f * t1 * r2Inverse; 0.250f * t1 * r2Inverse;
term *= psA[j].npScale*nonPolarScaleDataI; term *= psA[j].npScale;
float dE = fb * term; float dE = fb * term;
#if defined USE_CUTOFF #if defined USE_CUTOFF
...@@ -231,7 +227,7 @@ void METHOD_NAME(kCalculateObcGbsaSoftcore, Forces2_kernel)(unsigned int* workUn ...@@ -231,7 +227,7 @@ void METHOD_NAME(kCalculateObcGbsaSoftcore, Forces2_kernel)(unsigned int* workUn
float term = 0.125f * float term = 0.125f *
(1.000f + psA[tj].sr * psA[tj].sr * r2Inverse) * t3J + (1.000f + psA[tj].sr * psA[tj].sr * r2Inverse) * t3J +
0.250f * t1J * r2Inverse; 0.250f * t1J * r2Inverse;
term *= psA[tj].npScale*nonPolarScaleDataI; term *= psA[tj].npScale;
float dE = fb * term; float dE = fb * term;
#if defined USE_CUTOFF #if defined USE_CUTOFF
...@@ -257,7 +253,7 @@ void METHOD_NAME(kCalculateObcGbsaSoftcore, Forces2_kernel)(unsigned int* workUn ...@@ -257,7 +253,7 @@ void METHOD_NAME(kCalculateObcGbsaSoftcore, Forces2_kernel)(unsigned int* workUn
term = 0.125f * term = 0.125f *
(1.000f + sr2 * r2Inverse) * t3I + (1.000f + sr2 * r2Inverse) * t3I +
0.250f * t1I * r2Inverse; 0.250f * t1I * r2Inverse;
term *= psA[tj].npScale*nonPolarScaleDataI; term *= nonPolarScaleDataI;
dE = psA[tj].fb * term; dE = psA[tj].fb * term;
float rj = psA[tj].r; float rj = psA[tj].r;
...@@ -293,11 +289,11 @@ void METHOD_NAME(kCalculateObcGbsaSoftcore, Forces2_kernel)(unsigned int* workUn ...@@ -293,11 +289,11 @@ void METHOD_NAME(kCalculateObcGbsaSoftcore, Forces2_kernel)(unsigned int* workUn
float dx = psA[j].x - apos.x; float dx = psA[j].x - apos.x;
float dy = psA[j].y - apos.y; float dy = psA[j].y - apos.y;
float dz = psA[j].z - apos.z; float dz = psA[j].z - apos.z;
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
dx -= floor(dx/cSim.periodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX; dx -= floor(dx/cSim.periodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy/cSim.periodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY; dy -= floor(dy/cSim.periodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz/cSim.periodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ; dz -= floor(dz/cSim.periodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
#endif #endif
float r2 = dx * dx + dy * dy + dz * dz; float r2 = dx * dx + dy * dy + dz * dz;
float r = sqrt(r2); float r = sqrt(r2);
...@@ -327,16 +323,16 @@ void METHOD_NAME(kCalculateObcGbsaSoftcore, Forces2_kernel)(unsigned int* workUn ...@@ -327,16 +323,16 @@ void METHOD_NAME(kCalculateObcGbsaSoftcore, Forces2_kernel)(unsigned int* workUn
float term = 0.125f * float term = 0.125f *
(1.000f + psA[j].sr * psA[j].sr * r2Inverse) * t3J + (1.000f + psA[j].sr * psA[j].sr * r2Inverse) * t3J +
0.250f * t1J * r2Inverse; 0.250f * t1J * r2Inverse;
term *= psA[j].npScale*nonPolarScaleDataI; term *= psA[j].npScale;
float dE = fb * term; float dE = fb * term;
#if defined USE_PERIODIC #if defined USE_PERIODIC
if (a.x >= rScaledRadiusJ || i >= cSim.atoms || y+j >= cSim.atoms || r2 > cSim.nonbondedCutoffSqr) if (a.x >= rScaledRadiusJ || i >= cSim.atoms || y+j >= cSim.atoms || r2 > cSim.nonbondedCutoffSqr)
#elif defined USE_CUTOFF #elif defined USE_CUTOFF
if (a.x >= rScaledRadiusJ || r2 > cSim.nonbondedCutoffSqr) if (a.x >= rScaledRadiusJ || r2 > cSim.nonbondedCutoffSqr)
#else #else
if (a.x >= rScaledRadiusJ) if (a.x >= rScaledRadiusJ)
#endif #endif
{ {
dE = 0.0f; dE = 0.0f;
} }
...@@ -355,17 +351,17 @@ void METHOD_NAME(kCalculateObcGbsaSoftcore, Forces2_kernel)(unsigned int* workUn ...@@ -355,17 +351,17 @@ void METHOD_NAME(kCalculateObcGbsaSoftcore, Forces2_kernel)(unsigned int* workUn
term = 0.125f * term = 0.125f *
(1.000f + sr2 * r2Inverse) * t3I + (1.000f + sr2 * r2Inverse) * t3I +
0.250f * t1I * r2Inverse; 0.250f * t1I * r2Inverse;
term *= psA[j].npScale*nonPolarScaleDataI; term *= nonPolarScaleDataI;
dE = psA[j].fb * term; dE = psA[j].fb * term;
float rj = psA[j].r; float rj = psA[j].r;
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
if (rj >= rScaledRadiusI || i >= cSim.atoms || y+j >= cSim.atoms || r2 > cSim.nonbondedCutoffSqr) if (rj >= rScaledRadiusI || i >= cSim.atoms || y+j >= cSim.atoms || r2 > cSim.nonbondedCutoffSqr)
#elif defined USE_CUTOFF #elif defined USE_CUTOFF
if (rj >= rScaledRadiusI || r2 > cSim.nonbondedCutoffSqr) if (rj >= rScaledRadiusI || r2 > cSim.nonbondedCutoffSqr)
#else #else
if (rj >= rScaledRadiusI) if (rj >= rScaledRadiusI)
#endif #endif
{ {
dE = 0.0f; dE = 0.0f;
} }
......
...@@ -15,14 +15,12 @@ SET( INCLUDE_SERIALIZATION FALSE ) ...@@ -15,14 +15,12 @@ SET( INCLUDE_SERIALIZATION FALSE )
#SET( INCLUDE_SERIALIZATION TRUE ) #SET( INCLUDE_SERIALIZATION TRUE )
SET( SHARED_OPENMM_TARGET OpenMMFreeEnergy) SET( SHARED_OPENMM_TARGET OpenMMFreeEnergy)
SET( STATIC_OPENMM_TARGET OpenMMFreeEnergy_static)
SET( SHARED_CUDA_TARGET OpenMMCuda) SET( SHARED_CUDA_TARGET OpenMMCuda)
SET( STATIC_CUDA_TARGET OpenMMCuda_static)
IF( INCLUDE_SERIALIZATION ) IF( INCLUDE_SERIALIZATION )
INCLUDE_DIRECTORIES(${OPENMM_DIR}/serialization/include) INCLUDE_DIRECTORIES(${OPENMM_DIR}/serialization/include)
SET( SHARED_OPENMM_SERIALIZATION OpenMMSerialization ) SET( SHARED_OPENMM_SERIALIZATION "OpenMMSerialization" )
SET( SHARED_FREE_ENERGY_SERIALIZATION FreeEnergySerialization ) SET( SHARED_FREE_ENERGY_SERIALIZATION "FreeEnergySerialization" )
ENDIF( INCLUDE_SERIALIZATION ) ENDIF( INCLUDE_SERIALIZATION )
IF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug) IF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
...@@ -32,8 +30,6 @@ IF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug) ...@@ -32,8 +30,6 @@ IF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
SET(SHARED_OPENMM_SERIALIZATION ${SHARED_OPENMM_SERIALIZATION}_d) SET(SHARED_OPENMM_SERIALIZATION ${SHARED_OPENMM_SERIALIZATION}_d)
SET(SHARED_FREE_ENERGY_SERIALIZATION ${SHARED_FREE_ENERGY_SERIALIZATION}_d) SET(SHARED_FREE_ENERGY_SERIALIZATION ${SHARED_FREE_ENERGY_SERIALIZATION}_d)
ENDIF( INCLUDE_SERIALIZATION ) ENDIF( INCLUDE_SERIALIZATION )
SET(STATIC_CUDA_TARGET ${STATIC_CUDA_TARGET}_d)
SET(STATIC_OPENMM_TARGET ${STATIC_OPENMM_TARGET}_d)
ENDIF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug) ENDIF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
# Automatically create tests using files named "Test*.cpp" # Automatically create tests using files named "Test*.cpp"
...@@ -54,13 +50,52 @@ FOREACH(TEST_PROG ${TEST_PROGS}) ...@@ -54,13 +50,52 @@ FOREACH(TEST_PROG ${TEST_PROGS})
SET(DEFINE_STRING "${DEFINE_STRING} -DOPENMM_SERIALIZE") SET(DEFINE_STRING "${DEFINE_STRING} -DOPENMM_SERIALIZE")
ENDIF( INCLUDE_SERIALIZATION ) ENDIF( INCLUDE_SERIALIZATION )
IF( ${TEST_ROOT} STREQUAL "TestCudaOBCSoftcoreForce" ) IF( ${TEST_ROOT} STREQUAL "TestCudaGBVISoftcoreForce2" )
SET(DEFINE_STRING "${DEFINE_STRING} -DIMPLICIT_SOLVENT=1")
ENDIF( ${TEST_ROOT} STREQUAL "TestCudaOBCSoftcoreForce" ) # serialize
SET(DEFINE_STRING "-DTEST_PLATFORM=0 ")
IF( INCLUDE_SERIALIZATION )
SET(DEFINE_STRING "${DEFINE_STRING} -DOPENMM_SERIALIZE ")
SET(TARGET_LINK_LIBRARIES_STRING "${SHARED_TARGET} ${SHARED_OPENMM_SERIALIZATION}")
ELSE( INCLUDE_SERIALIZATION )
SET(TARGET_LINK_LIBRARIES_STRING "${SHARED_TARGET}")
ENDIF( INCLUDE_SERIALIZATION )
# obc
SET(OBC_DEFINE_STRING "${DEFINE_STRING} -DIMPLICIT_SOLVENT=1")
SET(OBC_TEST "TestCudaGBSAOBCSoftcoreForce2")
CUDA_ADD_EXECUTABLE(${OBC_TEST} ${TEST_PROG})
IF( INCLUDE_SERIALIZATION )
TARGET_LINK_LIBRARIES(${OBC_TEST} ${SHARED_TARGET} ${SHARED_OPENMM_SERIALIZATION} )
ELSE( INCLUDE_SERIALIZATION )
TARGET_LINK_LIBRARIES(${OBC_TEST} ${SHARED_TARGET})
ENDIF( INCLUDE_SERIALIZATION )
SET_TARGET_PROPERTIES(${OBC_TEST} PROPERTIES COMPILE_FLAGS ${OBC_DEFINE_STRING} )
ADD_TEST(${OBC_TEST} ${EXECUTABLE_OUTPUT_PATH}/${OBC_TEST})
# nonbond
SET(NONBOND_DEFINE_STRING "${DEFINE_STRING} -DIMPLICIT_SOLVENT=0")
SET(NONBOND_TEST "TestCudaNonbondSoftcoreForce2")
CUDA_ADD_EXECUTABLE(${NONBOND_TEST} ${TEST_PROG})
IF( INCLUDE_SERIALIZATION )
TARGET_LINK_LIBRARIES(${NONBOND_TEST} ${SHARED_TARGET} ${SHARED_OPENMM_SERIALIZATION} )
ELSE( INCLUDE_SERIALIZATION )
TARGET_LINK_LIBRARIES(${NONBOND_TEST} ${SHARED_TARGET})
ENDIF( INCLUDE_SERIALIZATION )
SET_TARGET_PROPERTIES(${NONBOND_TEST} PROPERTIES COMPILE_FLAGS ${NONBOND_DEFINE_STRING} )
ADD_TEST(${NONBOND_TEST} ${EXECUTABLE_OUTPUT_PATH}/${NONBOND_TEST})
# gbvi
IF( ${TEST_ROOT} STREQUAL "TestCudaGBVISoftcoreForce" )
SET(DEFINE_STRING "${DEFINE_STRING} -DIMPLICIT_SOLVENT=2") SET(DEFINE_STRING "${DEFINE_STRING} -DIMPLICIT_SOLVENT=2")
ENDIF( ${TEST_ROOT} STREQUAL "TestCudaGBVISoftcoreForce" ) SET_TARGET_PROPERTIES(${TEST_ROOT} PROPERTIES COMPILE_FLAGS ${DEFINE_STRING} )
ENDIF( ${TEST_ROOT} STREQUAL "TestCudaGBVISoftcoreForce2" )
#MESSAGE( "${TEST_ROOT} ${DEFINE_STRING}" ) #MESSAGE( "${TEST_ROOT} ${DEFINE_STRING}" )
SET_TARGET_PROPERTIES(${TEST_ROOT} PROPERTIES COMPILE_FLAGS ${DEFINE_STRING} ) SET_TARGET_PROPERTIES(${TEST_ROOT} PROPERTIES COMPILE_FLAGS ${DEFINE_STRING} )
......
...@@ -221,7 +221,7 @@ void CpuObcSoftcore::computeBornRadii( const vector<RealVec>& atomCoordinates, ...@@ -221,7 +221,7 @@ void CpuObcSoftcore::computeBornRadii( const vector<RealVec>& atomCoordinates,
// OBC-specific code (Eqs. 6-8 in paper) // OBC-specific code (Eqs. 6-8 in paper)
sum *= nonPolarScaleFactors[atomI]*half*offsetRadiusI; sum *= half*offsetRadiusI;
RealOpenMM sum2 = sum*sum; RealOpenMM sum2 = sum*sum;
RealOpenMM sum3 = sum*sum2; RealOpenMM sum3 = sum*sum2;
RealOpenMM tanhSum = TANH( alphaObc*sum - betaObc*sum2 + gammaObc*sum3 ); RealOpenMM tanhSum = TANH( alphaObc*sum - betaObc*sum2 + gammaObc*sum3 );
...@@ -486,7 +486,7 @@ RealOpenMM CpuObcSoftcore::computeBornEnergyForces( vector<RealVec>& atomCoordin ...@@ -486,7 +486,7 @@ RealOpenMM CpuObcSoftcore::computeBornEnergyForces( vector<RealVec>& atomCoordin
RealOpenMM r2Inverse = rInverse*rInverse; RealOpenMM r2Inverse = rInverse*rInverse;
RealOpenMM t3 = eighth*(one + scaledRadiusJ2*r2Inverse)*(l_ij2 - u_ij2) + fourth*LN( u_ij/l_ij )*r2Inverse; RealOpenMM t3 = eighth*(one + scaledRadiusJ2*r2Inverse)*(l_ij2 - u_ij2) + fourth*LN( u_ij/l_ij )*r2Inverse;
t3 *= nonPolarScaleFactors[atomI]*nonPolarScaleFactors[atomJ]; t3 *= nonPolarScaleFactors[atomJ];
RealOpenMM de = bornForces[atomI]*t3*rInverse; RealOpenMM de = bornForces[atomI]*t3*rInverse;
......
...@@ -77,7 +77,7 @@ void testOBCSoftcore( double lambda1, double lambda2 ){ ...@@ -77,7 +77,7 @@ void testOBCSoftcore( double lambda1, double lambda2 ){
custom->addGlobalParameter("solventDielectric", obc->getSolventDielectric()); custom->addGlobalParameter("solventDielectric", obc->getSolventDielectric());
custom->addGlobalParameter("soluteDielectric", obc->getSoluteDielectric()); custom->addGlobalParameter("soluteDielectric", obc->getSoluteDielectric());
custom->addComputedValue("I", "lambda1*lambda2*step(r+sr2-or1)*0.5*(1/L-1/U+0.25*(1/U^2-1/L^2)*(r-sr2*sr2/r)+0.5*log(L/U)/r+C);" custom->addComputedValue("I", "lambda2*step(r+sr2-or1)*0.5*(1/L-1/U+0.25*(1/U^2-1/L^2)*(r-sr2*sr2/r)+0.5*log(L/U)/r+C);"
"U=r+sr2;" "U=r+sr2;"
"C=2*(1/or1-1/L)*step(sr2-r-or1);" "C=2*(1/or1-1/L)*step(sr2-r-or1);"
"L=max(or1, D);" "L=max(or1, D);"
...@@ -86,8 +86,8 @@ void testOBCSoftcore( double lambda1, double lambda2 ){ ...@@ -86,8 +86,8 @@ void testOBCSoftcore( double lambda1, double lambda2 ){
"or1 = radius1-0.009; or2 = radius2-0.009", CustomGBForce::ParticlePairNoExclusions); "or1 = radius1-0.009; or2 = radius2-0.009", CustomGBForce::ParticlePairNoExclusions);
custom->addComputedValue("B", "1/(1/or-tanh(1*psi-0.8*psi^2+4.85*psi^3)/radius);" custom->addComputedValue("B", "1/(1/or-tanh(1*psi-0.8*psi^2+4.85*psi^3)/radius);"
"psi=I*or; or=radius-0.009", CustomGBForce::SingleParticle); "psi=I*or; or=radius-0.009", CustomGBForce::SingleParticle);
custom->addEnergyTerm("lambda*28.3919551*(radius+0.14)^2*(radius/B)^6-lambda*lambda*0.5*138.935485*(1/soluteDielectric-1/solventDielectric)*q^2/B", CustomGBForce::SingleParticle); custom->addEnergyTerm("lambda*28.3919551*(radius+0.14)^2*(radius/B)^6-0.5*138.935485*(1/soluteDielectric-1/solventDielectric)*q^2/B", CustomGBForce::SingleParticle);
custom->addEnergyTerm("-138.935485*lambda1*lambda2*(1/soluteDielectric-1/solventDielectric)*q1*q2/f;" custom->addEnergyTerm("-138.935485*(1/soluteDielectric-1/solventDielectric)*q1*q2/f;"
"f=sqrt(r^2+B1*B2*exp(-r^2/(4*B1*B2)))", CustomGBForce::ParticlePairNoExclusions); "f=sqrt(r^2+B1*B2*exp(-r^2/(4*B1*B2)))", CustomGBForce::ParticlePairNoExclusions);
vector<Vec3> positions(numParticles); vector<Vec3> positions(numParticles);
...@@ -103,14 +103,14 @@ void testOBCSoftcore( double lambda1, double lambda2 ){ ...@@ -103,14 +103,14 @@ void testOBCSoftcore( double lambda1, double lambda2 ){
obc->addParticle( charge*lambda1, 0.2, 0.5, lambda1); obc->addParticle( charge*lambda1, 0.2, 0.5, lambda1);
obc->addParticle(-charge*lambda1, 0.1, 0.5, lambda1); obc->addParticle(-charge*lambda1, 0.1, 0.5, lambda1);
params[0] = charge; params[0] = charge*lambda1;
params[1] = 0.2; params[1] = 0.2;
params[2] = 0.5; params[2] = 0.5;
params[3] = lambda1; params[3] = lambda1;
custom->addParticle(params); custom->addParticle(params);
params[0] = -charge; params[0] = -charge*lambda1;
params[1] = 0.1; params[1] = 0.1;
custom->addParticle(params); custom->addParticle(params);
...@@ -119,13 +119,13 @@ void testOBCSoftcore( double lambda1, double lambda2 ){ ...@@ -119,13 +119,13 @@ void testOBCSoftcore( double lambda1, double lambda2 ){
obc->addParticle( charge*lambda2, 0.2, 0.8, lambda2); obc->addParticle( charge*lambda2, 0.2, 0.8, lambda2);
obc->addParticle(-charge*lambda2, 0.1, 0.8, lambda2); obc->addParticle(-charge*lambda2, 0.1, 0.8, lambda2);
params[0] = charge; params[0] = charge*lambda2;
params[1] = 0.2; params[1] = 0.2;
params[2] = 0.8; params[2] = 0.8;
params[3] = lambda2; params[3] = lambda2;
custom->addParticle(params); custom->addParticle(params);
params[0] = -charge; params[0] = -charge*lambda2;
params[1] = 0.1; params[1] = 0.1;
custom->addParticle(params); custom->addParticle(params);
} }
......
...@@ -59,10 +59,7 @@ ...@@ -59,10 +59,7 @@
#include <iostream> #include <iostream>
#include <vector> #include <vector>
#include <algorithm> #include <algorithm>
#include <iostream>
#include <cstdio> #include <cstdio>
#include <vector>
#include <typeinfo> #include <typeinfo>
extern "C" void registerFreeEnergyCudaKernelFactories(); extern "C" void registerFreeEnergyCudaKernelFactories();
...@@ -1322,29 +1319,27 @@ static NonbondedForce* copyNonbondedForce( const NonbondedForce& nonbondedForce ...@@ -1322,29 +1319,27 @@ static NonbondedForce* copyNonbondedForce( const NonbondedForce& nonbondedForce
* *
*/ */
static System* copySystem( const System& inputSystem ){ static void copySystem( const System& inputSystem, System& systemCopy ){
System* systemCopy = new System();
for( unsigned int ii = 0; ii < inputSystem.getNumParticles(); ii++ ){ for( unsigned int ii = 0; ii < inputSystem.getNumParticles(); ii++ ){
systemCopy->addParticle( inputSystem.getParticleMass( static_cast<int>(ii) ) ); systemCopy.addParticle( inputSystem.getParticleMass( static_cast<int>(ii) ) );
} }
Vec3 a; Vec3 a;
Vec3 b; Vec3 b;
Vec3 c; Vec3 c;
inputSystem.getDefaultPeriodicBoxVectors( a, b, c ); inputSystem.getDefaultPeriodicBoxVectors( a, b, c );
systemCopy->setDefaultPeriodicBoxVectors( a, b, c ); systemCopy.setDefaultPeriodicBoxVectors( a, b, c );
for( unsigned int ii = 0; ii < inputSystem.getNumConstraints(); ii++ ){ for( unsigned int ii = 0; ii < inputSystem.getNumConstraints(); ii++ ){
int index; int index;
int particle1, particle2; int particle1, particle2;
double distance; double distance;
inputSystem.getConstraintParameters( ii, particle1, particle2, distance); inputSystem.getConstraintParameters( ii, particle1, particle2, distance);
systemCopy->addConstraint( particle1, particle2, distance); systemCopy.addConstraint( particle1, particle2, distance);
} }
return systemCopy; return;
} }
/** /**
...@@ -1809,8 +1804,9 @@ void runSystemComparisonTest( System& system1, System& system2, ...@@ -1809,8 +1804,9 @@ void runSystemComparisonTest( System& system1, System& system2,
(void) fprintf( log, "System1: particles=%d forces=%d System2: particles=%d forces=%d\n", (void) fprintf( log, "System1: particles=%d forces=%d System2: particles=%d forces=%d\n",
system1.getNumParticles(), system1.getNumForces(), system1.getNumParticles(), system1.getNumForces(),
system2.getNumParticles(), system2.getNumForces() ); system2.getNumParticles(), system2.getNumForces() );
(void) fprintf( log, "Positions=%u\n", (void) fprintf( log, "Positions=%u\n", static_cast<unsigned int>(positions.size()) );
static_cast<unsigned int>(positions.size()) ); (void) fprintf( log, "Platform1=%s Platform2=%s\n", platform1.c_str(), platform2.c_str() );
(void) fprintf( log, "relativeTolerance=%8.2e applyAssert=%d\n", relativeTolerance, applyAssert );
MapStringInt stringForceVector1; MapStringInt stringForceVector1;
MapStringInt stringForceVector2; MapStringInt stringForceVector2;
...@@ -1836,7 +1832,7 @@ void runSystemComparisonTest( System& system1, System& system2, ...@@ -1836,7 +1832,7 @@ void runSystemComparisonTest( System& system1, System& system2,
if( system1.getNumParticles() != static_cast<int>(positions.size()) ){ if( system1.getNumParticles() != static_cast<int>(positions.size()) ){
std::stringstream msg; std::stringstream msg;
msg << "Number of partciles for system des not equal size of position array: " << system1.getNumParticles() << " != " << positions.size(); msg << "Number of particles for system does not equal size of position array: " << system1.getNumParticles() << " != " << positions.size();
throw OpenMMException( msg.str() ); throw OpenMMException( msg.str() );
} }
...@@ -1844,7 +1840,7 @@ void runSystemComparisonTest( System& system1, System& system2, ...@@ -1844,7 +1840,7 @@ void runSystemComparisonTest( System& system1, System& system2,
context1.setPositions(positions); context1.setPositions(positions);
State state1 = context1.getState(State::Forces | State::Energy); State state1 = context1.getState(State::Forces | State::Energy);
Context context2( system2, integrator2, Platform::getPlatformByName( "Reference")); Context context2( system2, integrator2, Platform::getPlatformByName( platform2 ));
context2.setPositions(positions); context2.setPositions(positions);
State state2 = context2.getState(State::Forces | State::Energy); State state2 = context2.getState(State::Forces | State::Energy);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment