"ssh:/git@developer.sourcefind.cn:2222/OpenDAS/torchani.git" did not exist on "2cb357bd16ef196d1b84106b3a42cd04e8dab9fd"
Commit 032e18de authored by Mark Friedrichs's avatar Mark Friedrichs
Browse files

Removed debug code

In Born radii calculation for particle i, removed factor of lambda associated w/ particle i
parent 076eb95e
...@@ -38,7 +38,6 @@ ...@@ -38,7 +38,6 @@
#include <sstream> #include <sstream>
#define USE_SOFTCORE_LJ #define USE_SOFTCORE_LJ
//#define DEBUG
struct Atom { struct Atom {
float x; float x;
...@@ -134,52 +133,11 @@ void kCalculateCDLJObcGbsaSoftcoreForces1( freeEnergyGpuContext freeEnergyGpu ) ...@@ -134,52 +133,11 @@ void kCalculateCDLJObcGbsaSoftcoreForces1( freeEnergyGpuContext freeEnergyGpu )
} }
threadsPerBlock = threadsPerBlockPerMethod[methodIndex]; threadsPerBlock = threadsPerBlockPerMethod[methodIndex];
#ifdef DEBUG
fprintf( stderr, "kCalculateCDLJObcGbsaSoftcoreForces1 blks=%u thread/block=%u %u shMem=%u nbMethod==%d warp=%u\n",
gpu->sim.nonbond_blocks, threadsPerBlock, gpu->sim.nonbond_threads_per_block, sizeof(Atom)*threadsPerBlock,
freeEnergyGpu->freeEnergySim.nonbondedMethod, gpu->bOutputBufferPerWarp);
int psize = gpu->sim.paddedNumberOfAtoms;
CUDAStream<float4>* pdE1 = new CUDAStream<float4>( psize, 1, "pdE");
CUDAStream<float4>* pdE2 = new CUDAStream<float4>( psize, 1, "pdE");
float bF,bR;
float bF1,b2;
float ratio;
float atomicRadii;
showWorkUnitsFreeEnergy( freeEnergyGpu, 1 );
for( int ii = 0; ii < psize; ii++ ){
pdE1->_pSysData[ii].x = 0.0f;
pdE1->_pSysData[ii].y = 0.001f;
pdE1->_pSysData[ii].z = 0.001f;
pdE1->_pSysData[ii].w = 0.001f;
pdE2->_pSysData[ii].x = 0.0f;
pdE2->_pSysData[ii].y = 0.001f;
pdE2->_pSysData[ii].z = 0.001f;
pdE2->_pSysData[ii].w = 0.001f;
}
pdE1->Upload();
pdE2->Upload();
#endif
switch( freeEnergyGpu->freeEnergySim.nonbondedMethod ) switch( freeEnergyGpu->freeEnergySim.nonbondedMethod )
{ {
case FREE_ENERGY_NO_CUTOFF: case FREE_ENERGY_NO_CUTOFF:
// use softcore LJ potential // use softcore LJ potential
#ifdef DEBUG
(void) fprintf( stderr, "kCalculateCDLJObcGbsaSoftcoreForces1 ver=%u blks=%u threadsPerBlock=%u shMem=%u %u wrp=%u\n", gpu->sm_version,
gpu->sim.nonbond_blocks, threadsPerBlock, sizeof(Atom)*threadsPerBlock, gpu->sharedMemoryPerBlock, gpu->bOutputBufferPerWarp ); fflush( stderr );
if (gpu->bOutputBufferPerWarp)
kCalculateCDLJObcGbsaSoftcoreN2ByWarpForces1_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
sizeof(Atom)*threadsPerBlock>>>(gpu->sim.pWorkUnit,pdE1->_pDevData, pdE2->_pDevData);
else
kCalculateCDLJObcGbsaSoftcoreN2Forces1_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
sizeof(Atom)*threadsPerBlock>>>(gpu->sim.pWorkUnit, pdE1->_pDevData, pdE2->_pDevData);
#else
if (gpu->bOutputBufferPerWarp) if (gpu->bOutputBufferPerWarp)
kCalculateCDLJObcGbsaSoftcoreN2ByWarpForces1_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, kCalculateCDLJObcGbsaSoftcoreN2ByWarpForces1_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
sizeof(Atom)*threadsPerBlock>>>(gpu->sim.pWorkUnit ); sizeof(Atom)*threadsPerBlock>>>(gpu->sim.pWorkUnit );
...@@ -187,33 +145,17 @@ pdE2->Upload(); ...@@ -187,33 +145,17 @@ pdE2->Upload();
kCalculateCDLJObcGbsaSoftcoreN2Forces1_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, kCalculateCDLJObcGbsaSoftcoreN2Forces1_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
sizeof(Atom)*threadsPerBlock>>>(gpu->sim.pWorkUnit ); sizeof(Atom)*threadsPerBlock>>>(gpu->sim.pWorkUnit );
#endif
LAUNCHERROR("kCalculateCDLJObcGbsaSoftcoreForces1"); LAUNCHERROR("kCalculateCDLJObcGbsaSoftcoreForces1");
break; break;
case FREE_ENERGY_CUTOFF: case FREE_ENERGY_CUTOFF:
#ifdef DEBUG
(void) fprintf( stderr, "kCalculateCDLJObcGbsaSoftcoreCutoffForces1 %6d blks=%u nonbond_threads_per_block=%5u shMem=%5u\n",
gpu->natoms, gpu->sim.nonbond_blocks, threadsPerBlock, (sizeof(Atom)+sizeof(float))*threadsPerBlock);
(void) fflush( stderr );
if (gpu->bOutputBufferPerWarp)
kCalculateCDLJObcGbsaSoftcoreCutoffByWarpForces1_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
(sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit, pdE1->_pDevData, pdE2->_pDevData);
else
kCalculateCDLJObcGbsaSoftcoreCutoffForces1_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
(sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit, pdE1->_pDevData, pdE2->_pDevData);
#else
if (gpu->bOutputBufferPerWarp) if (gpu->bOutputBufferPerWarp)
kCalculateCDLJObcGbsaSoftcoreCutoffByWarpForces1_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, kCalculateCDLJObcGbsaSoftcoreCutoffByWarpForces1_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
(sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit ); (sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit );
else else
kCalculateCDLJObcGbsaSoftcoreCutoffForces1_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, kCalculateCDLJObcGbsaSoftcoreCutoffForces1_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
(sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit ); (sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit );
#endif
LAUNCHERROR("kCalculateCDLJObcGbsaSoftcoreCutoffForces1"); LAUNCHERROR("kCalculateCDLJObcGbsaSoftcoreCutoffForces1");
...@@ -221,85 +163,15 @@ pdE2->Upload(); ...@@ -221,85 +163,15 @@ pdE2->Upload();
case FREE_ENERGY_PERIODIC: case FREE_ENERGY_PERIODIC:
#ifdef DEBUG
if (gpu->bOutputBufferPerWarp)
kCalculateCDLJObcGbsaSoftcorePeriodicByWarpForces1_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
(sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit, pdE1->_pDevData, pdE2->_pDevData);
else
kCalculateCDLJObcGbsaSoftcorePeriodicForces1_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
(sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit, pdE1->_pDevData, pdE2->_pDevData);
#else
if (gpu->bOutputBufferPerWarp) if (gpu->bOutputBufferPerWarp)
kCalculateCDLJObcGbsaSoftcorePeriodicByWarpForces1_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, kCalculateCDLJObcGbsaSoftcorePeriodicByWarpForces1_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
(sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit); (sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit);
else else
kCalculateCDLJObcGbsaSoftcorePeriodicForces1_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, kCalculateCDLJObcGbsaSoftcorePeriodicForces1_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
(sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit); (sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit);
#endif
LAUNCHERROR("kCalculateCDLJObcGbsaSoftcorePeriodicForces1"); LAUNCHERROR("kCalculateCDLJObcGbsaSoftcorePeriodicForces1");
break; break;
} }
#ifdef DEBUG
/*
gpu->psBornForce->Download();
freeEnergyGpu->psSwitchDerivative->Download();
gpu->psBornRadii->Download();
fprintf( stderr, "XX bR=%15.7e swd=%15.7e\n", gpu->psBornRadii->_pSysData[0], freeEnergyGpu->psSwitchDerivative->_pSysData[0] );
for( int ii = 0; ii < gpu->sim.nonbondOutputBuffers; ii++ ){
fprintf( stderr, "strx %4d %15.7e %15.7e %15.7e %15.7e\n", ii,
gpu->psBornForce->_pSysStream[ii][0],
gpu->psBornForce->_pSysStream[ii][1],
gpu->psBornForce->_pSysStream[ii][2],
gpu->psBornForce->_pSysStream[ii][3] );
if( gpu->natoms > 1984 ){
int idx = 1983;
fprintf( stderr, "stry %4d %15.7e %15.7e %15.7e %15.7e %5d\n", ii,
gpu->psBornForce->_pSysStream[ii][idx+0],
gpu->psBornForce->_pSysStream[ii][idx+1],
gpu->psBornForce->_pSysStream[ii][idx+2],
gpu->psBornForce->_pSysStream[ii][idx+3], idx );
}
}
int bufferI = 62;
if( bufferI < gpu->sim.nonbondOutputBuffers ){
fprintf( stderr, "BufferI %4d \n", bufferI );
for( int ii = 0; ii < gpu->sim.paddedNumberOfAtoms; ii++ ){
fprintf( stderr, "strz %4d %15.7e\n", ii, gpu->psBornForce->_pSysStream[bufferI][ii] );
}
}
*/
pdE1->Download();
pdE2->Download();
gpu->psPosq4->Download();
gpu->psGBVIData->Download();
gpu->psBornRadii->Download();
gpu->psBornForce->Download();
freeEnergyGpu->psSwitchDerivative->Download();
fprintf( stderr, "PdeCud %d\n", TARGET );
bF = 0.0;
int count =0;
for( int ii = 0; ii < psize; ii++ ){
bF += pdE1->_pSysData[ii].x;
if( fabs( pdE1->_pSysData[ii].w ) > 1.0e-03 && fabs( pdE1->_pSysData[ii].x ) > 0.0 ){
count++;
fprintf( stderr, "%4d %4d %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e bF=%15.7e swd=%15.7e\n", count, ii,
pdE1->_pSysData[ii].x, pdE1->_pSysData[ii].y, pdE1->_pSysData[ii].z, pdE1->_pSysData[ii].w,
pdE2->_pSysData[ii].x, pdE2->_pSysData[ii].y, pdE2->_pSysData[ii].z, pdE2->_pSysData[ii].w, gpu->psBornForce->_pSysData[ii],
freeEnergyGpu->psSwitchDerivative->_pSysData[ii] );
}
}
bR = gpu->psBornRadii->_pSysData[TARGET];
atomicRadii = gpu->psGBVIData->_pSysData[TARGET].x;
ratio = (atomicRadii/bR);
bF1 = bF + (3.0f*gpu->psGBVIData->_pSysData[TARGET].z*ratio*ratio*ratio)/bR;
b2 = bR*bR;
bF1 *= (1.0f/3.0f)*b2*b2;
fprintf( stderr, "sumbF Cud %6d count=%d %15.7e %15.7e %15.7e\n", TARGET, count, bF, bF1, bR);
#endif
} }
...@@ -36,9 +36,6 @@ ...@@ -36,9 +36,6 @@
#include "kSoftcoreLJ.h" #include "kSoftcoreLJ.h"
#endif #endif
#undef TARGET
#define TARGET 1926
__global__ __global__
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_NONBOND_THREADS_PER_BLOCK, 1) __launch_bounds__(GF1XX_NONBOND_THREADS_PER_BLOCK, 1)
...@@ -57,15 +54,12 @@ void METHOD_NAME(kCalculateCDLJObcGbsaSoftcore, Forces1_kernel)(unsigned int* wo ...@@ -57,15 +54,12 @@ void METHOD_NAME(kCalculateCDLJObcGbsaSoftcore, Forces1_kernel)(unsigned int* wo
unsigned int totalWarps = gridDim.x*blockDim.x/GRID; unsigned int totalWarps = gridDim.x*blockDim.x/GRID;
unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID; unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
//unsigned int totalWarps = cSim.nonbond_blocks*cSim.nonbond_threads_per_block/GRID;
//unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
unsigned int numWorkUnits = cSim.pInteractionCount[0]; unsigned int numWorkUnits = cSim.pInteractionCount[0];
unsigned int pos = warp*numWorkUnits/totalWarps; unsigned int pos = warp*numWorkUnits/totalWarps;
unsigned int end = (warp+1)*numWorkUnits/totalWarps; unsigned int end = (warp+1)*numWorkUnits/totalWarps;
float CDLJObcGbsa_energy; float CDLJObcGbsa_energy;
float energy = 0.0f; float energy = 0.0f;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
//float* tempBuffer = (float*) &sA[cSim.nonbond_threads_per_block];
float* tempBuffer = (float*) &sA[blockDim.x]; float* tempBuffer = (float*) &sA[blockDim.x];
#endif #endif
...@@ -171,24 +165,6 @@ void METHOD_NAME(kCalculateCDLJObcGbsaSoftcore, Forces1_kernel)(unsigned int* wo ...@@ -171,24 +165,6 @@ void METHOD_NAME(kCalculateCDLJObcGbsaSoftcore, Forces1_kernel)(unsigned int* wo
dGpol_dalpha2_ij = 0.0f; dGpol_dalpha2_ij = 0.0f;
} }
af.w += dGpol_dalpha2_ij * psA[j].br; af.w += dGpol_dalpha2_ij * psA[j].br;
#ifdef DEBUG
int jIdx = j;
if( i == TARGET ){
int tjj = y+jIdx;
pdE1[tjj].x = dGpol_dalpha2_ij * psA[jIdx].br;
pdE1[tjj].y = sqrt(r2);
pdE1[tjj].w = 1.0f;
}
if( (y+jIdx) == TARGET ){
int tjj = i;
pdE1[tjj].x = dGpol_dalpha2_ij * psA[jIdx].br;
pdE1[tjj].y = sqrt(r2);
pdE1[tjj].w = -1.0f;
}
#endif
energy += 0.5f*CDLJObcGbsa_energy; energy += 0.5f*CDLJObcGbsa_energy;
// Add Forces // Add Forces
...@@ -278,22 +254,6 @@ pdE1[tjj].w = -1.0f; ...@@ -278,22 +254,6 @@ pdE1[tjj].w = -1.0f;
dGpol_dalpha2_ij = 0.0f; dGpol_dalpha2_ij = 0.0f;
} }
#ifdef DEBUG
int jIdx = j;
if( i == TARGET ){
int tjj = (y+jIdx);
pdE1[tjj].x = dGpol_dalpha2_ij * psA[jIdx].br;
pdE1[tjj].y = sqrt(r2);
pdE1[tjj].w = 2.0f;
}
if( (y+jIdx) == TARGET ){
int tjj = i;
pdE1[tjj].x = dGpol_dalpha2_ij * psA[jIdx].br;
pdE1[tjj].y = sqrt(r2);
pdE1[tjj].w = -2.0f;
}
#endif
af.w += dGpol_dalpha2_ij * psA[j].br; af.w += dGpol_dalpha2_ij * psA[j].br;
energy += 0.5f*CDLJObcGbsa_energy; energy += 0.5f*CDLJObcGbsa_energy;
...@@ -428,26 +388,6 @@ pdE1[tjj].w = -2.0f; ...@@ -428,26 +388,6 @@ pdE1[tjj].w = -2.0f;
af.w += dGpol_dalpha2_ij * psA[tj].br; af.w += dGpol_dalpha2_ij * psA[tj].br;
energy += CDLJObcGbsa_energy; energy += CDLJObcGbsa_energy;
#ifdef DEBUG
int jIdx = tj;
if( i == TARGET ){
int tjj = y+jIdx;
pdE1[tjj].x = dGpol_dalpha2_ij * psA[jIdx].br;
pdE1[tjj].y = sqrt(r2);
pdE1[tjj].w = 3.0f;
}
if( (y+jIdx) == TARGET ){
int tjj = i;
pdE1[tjj].x = dGpol_dalpha2_ij * br;
pdE1[tjj].y = sqrt(r2);
pdE1[tjj].w = -3.0f;
}
#endif
// Add forces // Add forces
dx *= dEdR; dx *= dEdR;
...@@ -545,24 +485,6 @@ pdE1[tjj].w = -3.0f; ...@@ -545,24 +485,6 @@ pdE1[tjj].w = -3.0f;
if (tgx == 0) if (tgx == 0)
psA[j].fb += tempBuffer[threadIdx.x] + tempBuffer[threadIdx.x+16]; psA[j].fb += tempBuffer[threadIdx.x] + tempBuffer[threadIdx.x+16];
#ifdef DEBUG
int jIdx = j;
if( i == TARGET ){
int tjj = y+jIdx;
pdE1[tjj].x = dGpol_dalpha2_ij * psA[j].br;
pdE1[tjj].y = sqrt(r2);
pdE1[tjj].w = 4.0f;
}
if( (y+jIdx) == TARGET ){
int tjj = i;
pdE1[tjj].x = tempBuffer[threadIdx.x] + tempBuffer[threadIdx.x+16];
pdE1[tjj].y = sqrt(r2);
pdE1[tjj].w = -4.0f;
}
#endif
energy += CDLJObcGbsa_energy; energy += CDLJObcGbsa_energy;
// Add forces // Add forces
...@@ -678,26 +600,6 @@ pdE1[tjj].w = -4.0f; ...@@ -678,26 +600,6 @@ pdE1[tjj].w = -4.0f;
dGpol_dalpha2_ij = 0.0f; dGpol_dalpha2_ij = 0.0f;
} }
#ifdef DEBUG
int jIdx = tj;
if( i == TARGET ){
int tjj = y+jIdx;
pdE1[tjj].x = dGpol_dalpha2_ij * psA[tj].br;
pdE1[tjj].y = sqrt(r2);
pdE1[tjj].z = dGpol_dalpha2_ij;
pdE1[tjj].w = 6.0f;
}
if( (y+jIdx) == TARGET ){
int tjj = i;
pdE1[tjj].x = dGpol_dalpha2_ij * br;
pdE1[tjj].y = sqrt(r2);
pdE1[tjj].z = dGpol_dalpha2_ij;
pdE1[tjj].w = -6.0f;
}
#endif
af.w += dGpol_dalpha2_ij * psA[tj].br; af.w += dGpol_dalpha2_ij * psA[tj].br;
psA[tj].fb += dGpol_dalpha2_ij * br; psA[tj].fb += dGpol_dalpha2_ij * br;
energy += CDLJObcGbsa_energy; energy += CDLJObcGbsa_energy;
......
...@@ -39,7 +39,6 @@ ...@@ -39,7 +39,6 @@
#define PARAMETER_PRINT 0 #define PARAMETER_PRINT 0
#define MAX_PARAMETER_PRINT 10 #define MAX_PARAMETER_PRINT 10
//#define DEBUG
static __constant__ cudaGmxSimulation cSim; static __constant__ cudaGmxSimulation cSim;
static __constant__ cudaFreeEnergyGmxSimulation gbviSimDev; static __constant__ cudaFreeEnergyGmxSimulation gbviSimDev;
...@@ -525,35 +524,6 @@ void kCalculateGBVISoftcoreBornSum( freeEnergyGpuContext freeEnergyGpu ) ...@@ -525,35 +524,6 @@ void kCalculateGBVISoftcoreBornSum( freeEnergyGpuContext freeEnergyGpu )
} }
threadsPerBlock = threadsPerBlockPerMethod[methodIndex]; threadsPerBlock = threadsPerBlockPerMethod[methodIndex];
#ifdef DEBUG
fprintf( stderr, "kCalculateGBVISoftcoreBornSum blks=%u threadsPerBlock=%u %u shMem=%u\n",
gpu->sim.nonbond_blocks, threadsPerBlock, gpu->sim.nonbond_threads_per_block, (sizeof(Atom)+sizeof(float))*threadsPerBlock ); fflush( stderr );
int psize = gpu->sim.paddedNumberOfAtoms;
CUDAStream<float4>* pdE1 = new CUDAStream<float4>( psize, 1, "pdE");
CUDAStream<float4>* pdE2 = new CUDAStream<float4>( psize, 1, "pdE");
float bF;
float bF1;
showWorkUnitsFreeEnergy( freeEnergyGpu, 1 );
for( int ii = 0; ii < psize; ii++ ){
pdE1->_pSysData[ii].x = 0.0f;
pdE1->_pSysData[ii].y = 0.001f;
pdE1->_pSysData[ii].z = 0.001f;
pdE1->_pSysData[ii].w = 0.001f;
pdE2->_pSysData[ii].x = 0.001f;
pdE2->_pSysData[ii].y = 0.001f;
pdE2->_pSysData[ii].z = 0.001f;
pdE2->_pSysData[ii].w = 0.001f;
}
pdE1->Upload();
pdE2->Upload();
#endif
kClearGBVISoftcoreBornSum( gpu ); kClearGBVISoftcoreBornSum( gpu );
LAUNCHERROR("kClearGBVIBornSum from kCalculateGBVISoftcoreBornSum"); LAUNCHERROR("kClearGBVIBornSum from kCalculateGBVISoftcoreBornSum");
...@@ -561,15 +531,6 @@ pdE2->Upload(); ...@@ -561,15 +531,6 @@ pdE2->Upload();
{ {
case FREE_ENERGY_NO_CUTOFF: case FREE_ENERGY_NO_CUTOFF:
#ifdef DEBUG
if (gpu->bOutputBufferPerWarp){
kCalculateGBVISoftcoreN2ByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
sizeof(Atom)*threadsPerBlock>>>(gpu->sim.pWorkUnit, pdE1->_pDevData, pdE2->_pDevData);
} else {
kCalculateGBVISoftcoreN2BornSum_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
sizeof(Atom)*threadsPerBlock>>>(gpu->sim.pWorkUnit, pdE1->_pDevData, pdE2->_pDevData);
}
#else
if (gpu->bOutputBufferPerWarp){ if (gpu->bOutputBufferPerWarp){
kCalculateGBVISoftcoreN2ByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, kCalculateGBVISoftcoreN2ByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
sizeof(Atom)*threadsPerBlock>>>(gpu->sim.pWorkUnit); sizeof(Atom)*threadsPerBlock>>>(gpu->sim.pWorkUnit);
...@@ -577,8 +538,6 @@ pdE2->Upload(); ...@@ -577,8 +538,6 @@ pdE2->Upload();
kCalculateGBVISoftcoreN2BornSum_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, kCalculateGBVISoftcoreN2BornSum_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
sizeof(Atom)*threadsPerBlock>>>(gpu->sim.pWorkUnit); sizeof(Atom)*threadsPerBlock>>>(gpu->sim.pWorkUnit);
} }
#endif
break; break;
case FREE_ENERGY_CUTOFF: case FREE_ENERGY_CUTOFF:
...@@ -591,27 +550,13 @@ pdE2->Upload(); ...@@ -591,27 +550,13 @@ pdE2->Upload();
kFindInteractionsWithinBlocksCutoff_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, kFindInteractionsWithinBlocksCutoff_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
sizeof(unsigned int)*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit); sizeof(unsigned int)*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit);
#ifdef DEBUG
(void) fprintf( stderr, "kCalculateGBVISoftcoreBornSum cutoff=%15.7e warp=%u GridBoundingBox.length=%u interaction_blocks=%u interaction_threads_per_block=%u nonbond_blocks=%u nonbond_threads_per_block=%u\n",
gpu->sim.nonbondedCutoffSqr, gpu->bOutputBufferPerWarp, gpu->psGridBoundingBox->_length, gpu->sim.interaction_blocks,
gpu->sim.interaction_threads_per_block, gpu->sim.nonbond_blocks, threadsPerBlock ); fflush( stderr );
if (gpu->bOutputBufferPerWarp)
kCalculateGBVISoftcoreCutoffByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
(sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit, pdE1->_pDevData, pdE2->_pDevData);
else
kCalculateGBVISoftcoreCutoffBornSum_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
(sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit, pdE1->_pDevData, pdE2->_pDevData );
#else
if (gpu->bOutputBufferPerWarp) if (gpu->bOutputBufferPerWarp)
kCalculateGBVISoftcoreCutoffByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, kCalculateGBVISoftcoreCutoffByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
(sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit); (sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit);
else else
kCalculateGBVISoftcoreCutoffBornSum_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, kCalculateGBVISoftcoreCutoffBornSum_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
(sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit ); (sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit );
#endif
break; break;
case FREE_ENERGY_PERIODIC: case FREE_ENERGY_PERIODIC:
...@@ -624,21 +569,13 @@ pdE2->Upload(); ...@@ -624,21 +569,13 @@ pdE2->Upload();
kFindInteractionsWithinBlocksPeriodic_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, kFindInteractionsWithinBlocksPeriodic_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
sizeof(unsigned int)*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit); sizeof(unsigned int)*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit);
#ifdef DEBUG
if (gpu->bOutputBufferPerWarp)
kCalculateGBVISoftcorePeriodicByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
(sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit, pdE1->_pDevData, pdE2->_pDevData );
else
kCalculateGBVISoftcorePeriodicBornSum_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
(sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit, pdE1->_pDevData, pdE2->_pDevData );
#else
if (gpu->bOutputBufferPerWarp) if (gpu->bOutputBufferPerWarp)
kCalculateGBVISoftcorePeriodicByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, kCalculateGBVISoftcorePeriodicByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
(sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit ); (sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit );
else else
kCalculateGBVISoftcorePeriodicBornSum_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, kCalculateGBVISoftcorePeriodicBornSum_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock,
(sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit ); (sizeof(Atom)+sizeof(float))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit );
#endif
break; break;
default: default:
...@@ -647,23 +584,4 @@ pdE2->Upload(); ...@@ -647,23 +584,4 @@ pdE2->Upload();
} }
LAUNCHERROR("kCalculateGBVISoftcoreBornSum"); LAUNCHERROR("kCalculateGBVISoftcoreBornSum");
#ifdef DEBUG
pdE1->Download();
pdE2->Download();
fprintf( stderr, "bSum Cud method=%u warp=%u\n", freeEnergyGpu->freeEnergySim.nonbondedMethod, gpu->bOutputBufferPerWarp );
bF = 0.0;
bF1 = 0.0;
for( int ii = 0; ii < gpu->natoms; ii++ ){
if( fabsf( pdE1->_pSysData[ii].w ) > 0.002 ){
bF1 += pdE1->_pSysData[ii].x;
if( fabsf( pdE1->_pSysData[ii].x ) > 0.001 ){
fprintf( stderr, "%4d %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e\n", ii,
pdE1->_pSysData[ii].x, pdE1->_pSysData[ii].y, pdE1->_pSysData[ii].z, pdE1->_pSysData[ii].w,
pdE2->_pSysData[ii].x, pdE2->_pSysData[ii].y, pdE2->_pSysData[ii].z, pdE2->_pSysData[ii].w );
}
}
bF += pdE1->_pSysData[ii].x;
}
fprintf( stderr, "bSum Cud %6d %15.7e %15.7e\n", TARGET, bF, bF1 );
#endif
} }
...@@ -37,9 +37,6 @@ ...@@ -37,9 +37,6 @@
#include "kCalculateGBVIAux.h" #include "kCalculateGBVIAux.h"
#undef TARGET
//#define TARGET 39
__global__ __global__
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_NONBOND_THREADS_PER_BLOCK, 1) __launch_bounds__(GF1XX_NONBOND_THREADS_PER_BLOCK, 1)
...@@ -48,11 +45,7 @@ __launch_bounds__(GT2XX_NONBOND_THREADS_PER_BLOCK, 1) ...@@ -48,11 +45,7 @@ __launch_bounds__(GT2XX_NONBOND_THREADS_PER_BLOCK, 1)
#else #else
__launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1) __launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1)
#endif #endif
#ifdef DEBUG
void METHOD_NAME(kCalculateGBVISoftcore, BornSum_kernel)(unsigned int* workUnit, float4* pdE1, float4* pdE2 )
#else
void METHOD_NAME(kCalculateGBVISoftcore, BornSum_kernel)(unsigned int* workUnit) void METHOD_NAME(kCalculateGBVISoftcore, BornSum_kernel)(unsigned int* workUnit)
#endif
{ {
extern __shared__ Atom sA[]; extern __shared__ Atom sA[];
...@@ -119,28 +112,6 @@ void METHOD_NAME(kCalculateGBVISoftcore, BornSum_kernel)(unsigned int* workUnit) ...@@ -119,28 +112,6 @@ void METHOD_NAME(kCalculateGBVISoftcore, BornSum_kernel)(unsigned int* workUnit)
{ {
bSum += psA[j].bornRadiusScaleFactor*getGBVI_Volume( sqrt(r2), ar.x, psA[j].sr ); bSum += psA[j].bornRadiusScaleFactor*getGBVI_Volume( sqrt(r2), ar.x, psA[j].sr );
#ifdef DEBUG
int jIdx = j;
if( i == TARGET ){
int tjj = y+jIdx;
pdE1[tjj].x = psA[jIdx].bornRadiusScaleFactor*getGBVI_Volume( sqrt(r2), ar.x, psA[jIdx].sr );
pdE1[tjj].y = psA[jIdx].bornRadiusScaleFactor;
pdE1[tjj].z = ar.x;
pdE1[tjj].w = 1.0f;
pdE2[tjj].x = sqrt(r2);
pdE2[tjj].y = psA[jIdx].sr;
pdE2[tjj].z = ar.x;
pdE2[tjj].w = 1.0f;
}
if( (y+jIdx) == TARGET ){
int tjj = i;
pdE1[tjj].x = psA[jIdx].bornRadiusScaleFactor*getGBVI_Volume( sqrt(r2), ar.x, psA[jIdx].sr );
pdE1[tjj].y = psA[jIdx].bornRadiusScaleFactor;
pdE1[tjj].z = ar.x;
pdE1[tjj].w = -1.0f;
}
#endif
} }
} }
...@@ -214,32 +185,6 @@ pdE1[tjj].w = -1.0f; ...@@ -214,32 +185,6 @@ pdE1[tjj].w = -1.0f;
apos.w += psA[tj].bornRadiusScaleFactor*getGBVI_Volume( r, ar.x, psA[tj].sr ); apos.w += psA[tj].bornRadiusScaleFactor*getGBVI_Volume( r, ar.x, psA[tj].sr );
psA[tj].sum += ar.w*getGBVI_Volume( r, psA[tj].r, ar.y ); psA[tj].sum += ar.w*getGBVI_Volume( r, psA[tj].r, ar.y );
#ifdef DEBUG
int jIdx = tj;
if( i == TARGET ){
int tjj = y+jIdx;
pdE1[tjj].x = psA[jIdx].bornRadiusScaleFactor*getGBVI_Volume( r, ar.x, psA[jIdx].sr );
pdE1[tjj].y = psA[jIdx].bornRadiusScaleFactor;
pdE1[tjj].z = ar.x;
pdE1[tjj].w = 2.0f;
float R = ar.x;
float S = psA[tj].sr;
pdE2[tjj].x = getGBVI_L( r, (r + S), S );
pdE2[tjj].y = -getGBVI_L( r, (r - S), S );
pdE2[tjj].z = -getGBVI_L( r, R, S );
pdE2[tjj].w = (1.0f/(R*R*R));
}
if( (y+jIdx) == TARGET ){
int tjj = i;
pdE1[tjj].x = ar.w*getGBVI_Volume( r, psA[jIdx].r, ar.y );
pdE1[tjj].y = ar.w;
pdE1[tjj].z = psA[jIdx].r;
pdE1[tjj].w = -2.0f;
}
#endif
} }
tj = (tj - 1) & (GRID - 1); tj = (tj - 1) & (GRID - 1);
} }
......
...@@ -68,66 +68,6 @@ void SetCalculateGBVISoftcoreForces2Sim( freeEnergyGpuContext gpu) ...@@ -68,66 +68,6 @@ void SetCalculateGBVISoftcoreForces2Sim( freeEnergyGpuContext gpu)
#include "kCalculateGBVIAux.h" #include "kCalculateGBVIAux.h"
/**
* This file contains the kernel for evalauating the second stage of GBSA. It is included
* several times in kCalculateGBVIForces2.cu with different #defines to generate
* different versions of the kernels.
*/
__global__ void kCalculateGBVISoftcoreForces2a_kernel()
{
unsigned int pos = (blockIdx.x * blockDim.x + threadIdx.x);
if( pos >= cSim.atoms )return;
float4 apos = cSim.pPosq[pos];
float4 ar = cSim.pGBVIData[pos];
float fb = cSim.pBornForce[pos];
unsigned int posJ = 0;
float4 force;
force.x = force.y = force.z = force.w = 0.0f;
while ( posJ < cSim.atoms )
{
float4 aposJ = cSim.pPosq[posJ];
float4 arJ = cSim.pGBVIData[posJ];
float fbJ = cSim.pBornForce[posJ];
float dx = aposJ.x - apos.x;
float dy = aposJ.y - apos.y;
float dz = aposJ.z - apos.z;
float r2 = dx * dx + dy * dy + dz * dz;
float r = sqrt(r2);
float dE = getGBVI_dE2( r, ar.x, arJ.y, fb );
dE = r > 1.0e-08f ? dE : 0.0f;
//dx = dy = dz = 1.0f;
float d = dx*dE;
force.x -= d;
d = dy*dE;
force.y -= d;
d = dz*dE;
force.z -= d;
#if 1
dE = getGBVI_dE2( r, arJ.x, ar.y, fbJ );
dE = r > 1.0e-08f ? dE : 0.0f;
d = dx*dE;
force.x -= d;
d = dy*dE;
force.y -= d;
d = dz*dE;
force.z -= d;
#endif
posJ += 1;
}
// Write results
cSim.pForce4[pos] = force;
}
// Include versions of the kernels for N^2 calculations. // Include versions of the kernels for N^2 calculations.
#define METHOD_NAME(a, b) a##N2##b #define METHOD_NAME(a, b) a##N2##b
...@@ -180,46 +120,6 @@ void kCalculateGBVISoftcoreForces2( freeEnergyGpuContext freeEnergyGpu ) ...@@ -180,46 +120,6 @@ void kCalculateGBVISoftcoreForces2( freeEnergyGpuContext freeEnergyGpu )
} }
threadsPerBlock = threadsPerBlockPerMethod[methodIndex]; threadsPerBlock = threadsPerBlockPerMethod[methodIndex];
#ifdef DEBUG
fprintf( stderr,"kCalculateGBVISoftcoreForces2 nonbondedMethod=%d bornForce2_blocks=%u threadsPerBlock=%u shMem=%u\n",
freeEnergyGpu->freeEnergySim.nonbondedMethod,
gpu->sim.bornForce2_blocks, threadsPerBlock, (sizeof(Atom)+sizeof(float3))*threadsPerBlock ); fflush( stderr );
int psize = 64;
CUDAStream<float4>* pdE1 = new CUDAStream<float4>( psize, 1, "pdE");
CUDAStream<float4>* pdE2 = new CUDAStream<float4>( psize, 1, "pdE");
for( int ii = 0; ii < 32; ii++ ){
pdE1->_pSysData[ii].x = 0.0f;
pdE1->_pSysData[ii].y = 0.0f;
pdE1->_pSysData[ii].z = 0.0f;
pdE1->_pSysData[ii].w = 0.0f;
pdE2->_pSysData[ii].x = 0.0f;
pdE2->_pSysData[ii].y = 0.0f;
pdE2->_pSysData[ii].z = 0.0f;
pdE2->_pSysData[ii].w = 0.0f;
}
pdE1->Upload();
pdE2->Upload();
if (gpu->bOutputBufferPerWarp)
kCalculateGBVISoftcoreN2ByWarpForces2_kernel<<<gpu->sim.bornForce2_blocks, threadsPerBlock,
sizeof(Atom)*threadsPerBlock>>>(gpu->sim.pWorkUnit, gpu->sim.workUnits, pdE1->_pDevData, pdE2->_pDevData);
else
kCalculateGBVISoftcoreN2Forces2_kernel<<<gpu->sim.bornForce2_blocks, threadsPerBlock,
sizeof(Atom)*threadsPerBlock>>>(gpu->sim.pWorkUnit, gpu->sim.workUnits, pdE1->_pDevData, pdE2->_pDevData);
pdE1->Download();
pdE2->Download();
fprintf( stderr, "Pde\n" );
for( int ii = 0; ii < 32; ii++ ){
fprintf( stderr, "%4d %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e\n", ii,
pdE1->_pSysData[ii].x, pdE1->_pSysData[ii].y, pdE1->_pSysData[ii].z, pdE1->_pSysData[ii].w,
pdE2->_pSysData[ii].x, pdE2->_pSysData[ii].y, pdE2->_pSysData[ii].z, pdE2->_pSysData[ii].w );
}
break;
#endif
switch (freeEnergyGpu->freeEnergySim.nonbondedMethod) switch (freeEnergyGpu->freeEnergySim.nonbondedMethod)
{ {
case FREE_ENERGY_NO_CUTOFF: case FREE_ENERGY_NO_CUTOFF:
......
...@@ -47,18 +47,14 @@ __launch_bounds__(G8X_BORNFORCE2_THREADS_PER_BLOCK, 1) ...@@ -47,18 +47,14 @@ __launch_bounds__(G8X_BORNFORCE2_THREADS_PER_BLOCK, 1)
#endif #endif
void METHOD_NAME(kCalculateGBVISoftcore, Forces2_kernel)(unsigned int* workUnit ) void METHOD_NAME(kCalculateGBVISoftcore, Forces2_kernel)(unsigned int* workUnit )
{ {
//METHOD_NAME(kCalculateGBVISoftcore, Forces2_kernel)(unsigned int* workUnit, float4* pdE1, float4* pdE2 )
extern __shared__ Atom sA[]; extern __shared__ Atom sA[];
unsigned int totalWarps = gridDim.x*blockDim.x/GRID; unsigned int totalWarps = gridDim.x*blockDim.x/GRID;
unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID; unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
//unsigned int totalWarps = cSim.bornForce2_blocks*cSim.bornForce2_threads_per_block/GRID;
//unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
unsigned int numWorkUnits = cSim.pInteractionCount[0]; unsigned int numWorkUnits = cSim.pInteractionCount[0];
unsigned int pos = warp*numWorkUnits/totalWarps; unsigned int pos = warp*numWorkUnits/totalWarps;
unsigned int end = (warp+1)*numWorkUnits/totalWarps; unsigned int end = (warp+1)*numWorkUnits/totalWarps;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
//float3* tempBuffer = (float3*) &sA[cSim.bornForce2_threads_per_block];
float3* tempBuffer = (float3*) &sA[blockDim.x]; float3* tempBuffer = (float3*) &sA[blockDim.x];
#endif #endif
...@@ -126,19 +122,6 @@ void METHOD_NAME(kCalculateGBVISoftcore, Forces2_kernel)(unsigned int* workUnit ...@@ -126,19 +122,6 @@ void METHOD_NAME(kCalculateGBVISoftcore, Forces2_kernel)(unsigned int* workUnit
dE = 0.0f; dE = 0.0f;
} }
/*
if( i == TARGET ){
pdE1[x+j].x = dE;
pdE1[x+j].y = psA[j].bornRadiusScaleFactor;
pdE1[x+j].z = r;
pdE1[x+j].w = dE1;
}
if( (x+j) == TARGET ){
pdE2[i].x = dE;
pdE2[i].y = psA[j].bornRadiusScaleFactor;
pdE2[i].z = r;
pdE2[i].w = psA[j].sr-ar.x;
}*/
float d = dx * dE; float d = dx * dE;
af.x -= d; af.x -= d;
psA[j].fx += d; psA[j].fx += d;
......
...@@ -29,7 +29,7 @@ ...@@ -29,7 +29,7 @@
#include <cudatypes.h> #include <cudatypes.h>
#include "kSoftcoreLJ.h" #include "kSoftcoreLJ.h"
#define PARAMETER_PRINT 1 #define PARAMETER_PRINT 0
#define MAX_PARAMETER_PRINT 10 #define MAX_PARAMETER_PRINT 10
static __constant__ cudaGmxSimulation cSim; static __constant__ cudaGmxSimulation cSim;
......
...@@ -117,9 +117,6 @@ void kCalculateCDLJSoftcoreForces( freeEnergyGpuContext freeEnergyGpu ) ...@@ -117,9 +117,6 @@ void kCalculateCDLJSoftcoreForces( freeEnergyGpuContext freeEnergyGpu )
{ {
gpuContext gpu = freeEnergyGpu->gpuContext; gpuContext gpu = freeEnergyGpu->gpuContext;
// (void) fprintf( stderr,"kCalculateCDLJCutoffForces %d warp=%u nonbond_blocks=%u nonbond_threads_per_block=%u rfK=%15.7e rfC=%15.7e\n", freeEnergyGpu->freeEnergySim.nonbondedMethod,
// gpu->bOutputBufferPerWarp, gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block, gpu->sim.reactionFieldK, gpu->sim.reactionFieldC); fflush( stderr );
switch (freeEnergyGpu->freeEnergySim.nonbondedMethod) switch (freeEnergyGpu->freeEnergySim.nonbondedMethod)
{ {
case FREE_ENERGY_NO_CUTOFF: case FREE_ENERGY_NO_CUTOFF:
......
...@@ -34,7 +34,6 @@ ...@@ -34,7 +34,6 @@
#define PARAMETER_PRINT 0 #define PARAMETER_PRINT 0
#define MAX_PARAMETER_PRINT 10 #define MAX_PARAMETER_PRINT 10
//#define DEBUG
struct Atom { struct Atom {
float x; float x;
...@@ -211,7 +210,6 @@ __global__ void kReduceObcGbsaSoftcoreBornSum_kernel() ...@@ -211,7 +210,6 @@ __global__ void kReduceObcGbsaSoftcoreBornSum_kernel()
// Now calculate Born radius and OBC term. // Now calculate Born radius and OBC term.
sum *= 0.5f * atom.x; sum *= 0.5f * atom.x;
sum *= gbsaSimDev.pNonPolarScalingFactors[pos];
float sum2 = sum * sum; float sum2 = sum * sum;
float sum3 = sum * sum2; float sum3 = sum * sum2;
float tanhSum = tanh(cSim.alphaOBC * sum - cSim.betaOBC * sum2 + cSim.gammaOBC * sum3); float tanhSum = tanh(cSim.alphaOBC * sum - cSim.betaOBC * sum2 + cSim.gammaOBC * sum3);
...@@ -377,31 +375,6 @@ void kCalculateObcGbsaSoftcoreBornSum( freeEnergyGpuContext freeEnergyGpu ) ...@@ -377,31 +375,6 @@ void kCalculateObcGbsaSoftcoreBornSum( freeEnergyGpuContext freeEnergyGpu )
// printf("kCalculateObcGbsaSoftcoreBornSum\n"); // printf("kCalculateObcGbsaSoftcoreBornSum\n");
gpuContext gpu = freeEnergyGpu->gpuContext; gpuContext gpu = freeEnergyGpu->gpuContext;
#ifdef DEBUG
fprintf( stderr, "kCalculateObcGbsaSoftcoreBornSum cutoff=%15.7e\n", gpu->sim.nonbondedCutoffSqr );
int psize = gpu->sim.paddedNumberOfAtoms;
CUDAStream<float4>* pdE1 = new CUDAStream<float4>( psize, 1, "pdE");
CUDAStream<float4>* pdE2 = new CUDAStream<float4>( psize, 1, "pdE");
float bF;
float bF1;
showWorkUnitsFreeEnergy( freeEnergyGpu, 1 );
for( int ii = 0; ii < psize; ii++ ){
pdE1->_pSysData[ii].x = 0.0f;
pdE1->_pSysData[ii].y = 0.001f;
pdE1->_pSysData[ii].z = 0.001f;
pdE1->_pSysData[ii].w = 0.001f;
pdE2->_pSysData[ii].x = 0.001f;
pdE2->_pSysData[ii].y = 0.001f;
pdE2->_pSysData[ii].z = 0.001f;
pdE2->_pSysData[ii].w = 0.001f;
}
pdE1->Upload();
pdE2->Upload();
#endif
kClearObcGbsaSoftcoreBornSum(gpu); kClearObcGbsaSoftcoreBornSum(gpu);
LAUNCHERROR("kClearBornSum from kCalculateObcGbsaSoftcoreBornSum"); LAUNCHERROR("kClearBornSum from kCalculateObcGbsaSoftcoreBornSum");
...@@ -409,22 +382,13 @@ pdE2->Upload(); ...@@ -409,22 +382,13 @@ pdE2->Upload();
{ {
case FREE_ENERGY_NO_CUTOFF: case FREE_ENERGY_NO_CUTOFF:
#ifdef DEBUG
if (gpu->bOutputBufferPerWarp)
kCalculateObcGbsaSoftcoreN2ByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(Atom)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit, pdE1->_pDevData, pdE2->_pDevData);
else
kCalculateObcGbsaSoftcoreN2BornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(Atom)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit, pdE1->_pDevData, pdE2->_pDevData);
#else
if (gpu->bOutputBufferPerWarp) if (gpu->bOutputBufferPerWarp)
kCalculateObcGbsaSoftcoreN2ByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block, kCalculateObcGbsaSoftcoreN2ByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(Atom)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit); sizeof(Atom)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit);
else else
kCalculateObcGbsaSoftcoreN2BornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block, kCalculateObcGbsaSoftcoreN2BornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(Atom)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit); sizeof(Atom)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit);
#endif
break; break;
case FREE_ENERGY_CUTOFF: case FREE_ENERGY_CUTOFF:
...@@ -437,14 +401,6 @@ pdE2->Upload(); ...@@ -437,14 +401,6 @@ pdE2->Upload();
kFindInteractionsWithinBlocksCutoff_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block, kFindInteractionsWithinBlocksCutoff_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit); sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
#ifdef DEBUG
if (gpu->bOutputBufferPerWarp)
kCalculateObcGbsaSoftcoreCutoffByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit, pdE1->_pDevData, pdE2->_pDevData);
else
kCalculateObcGbsaSoftcoreCutoffBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit, pdE1->_pDevData, pdE2->_pDevData);
#else
if (gpu->bOutputBufferPerWarp) if (gpu->bOutputBufferPerWarp)
kCalculateObcGbsaSoftcoreCutoffByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block, kCalculateObcGbsaSoftcoreCutoffByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
...@@ -452,7 +408,6 @@ pdE2->Upload(); ...@@ -452,7 +408,6 @@ pdE2->Upload();
else else
kCalculateObcGbsaSoftcoreCutoffBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block, kCalculateObcGbsaSoftcoreCutoffBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit); (sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
#endif
break; break;
...@@ -466,16 +421,6 @@ pdE2->Upload(); ...@@ -466,16 +421,6 @@ pdE2->Upload();
kFindInteractionsWithinBlocksPeriodic_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block, kFindInteractionsWithinBlocksPeriodic_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit); sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
#ifdef DEBUG
if (gpu->bOutputBufferPerWarp)
kCalculateObcGbsaSoftcorePeriodicByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit, pdE1->_pDevData, pdE2->_pDevData);
else
kCalculateObcGbsaSoftcorePeriodicBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit, pdE1->_pDevData, pdE2->_pDevData);
#else
if (gpu->bOutputBufferPerWarp) if (gpu->bOutputBufferPerWarp)
kCalculateObcGbsaSoftcorePeriodicByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block, kCalculateObcGbsaSoftcorePeriodicByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit); (sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
...@@ -483,7 +428,7 @@ pdE2->Upload(); ...@@ -483,7 +428,7 @@ pdE2->Upload();
kCalculateObcGbsaSoftcorePeriodicBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block, kCalculateObcGbsaSoftcorePeriodicBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit); (sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
#endif
break; break;
default: default:
...@@ -492,23 +437,4 @@ pdE2->Upload(); ...@@ -492,23 +437,4 @@ pdE2->Upload();
} }
LAUNCHERROR("kCalculateObcGbsaSoftcoreBornSum"); LAUNCHERROR("kCalculateObcGbsaSoftcoreBornSum");
#ifdef DEBUG
pdE1->Download();
pdE2->Download();
//gpu->psBornRadii->Download();
//gpu->psObcData->Download();
fprintf( stderr, "bL Obc Cud\n" );
bF = 0.0;
bF1 = 0.0;
for( int ii = 0; ii < gpu->natoms; ii++ ){
bF1 += pdE1->_pSysData[ii].x;
fprintf( stderr, "%4d %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e\n", ii,
pdE1->_pSysData[ii].x, pdE1->_pSysData[ii].y, pdE1->_pSysData[ii].z, pdE1->_pSysData[ii].w,
pdE2->_pSysData[ii].x, pdE2->_pSysData[ii].y, pdE2->_pSysData[ii].z, pdE2->_pSysData[ii].w );
bF += pdE1->_pSysData[ii].x;
}
fprintf( stderr, "bS Obc Cud %6d %15.7e %15.7e\n", TARGET, bF, bF1 );
#endif
} }
...@@ -41,11 +41,7 @@ __launch_bounds__(GT2XX_NONBOND_THREADS_PER_BLOCK, 1) ...@@ -41,11 +41,7 @@ __launch_bounds__(GT2XX_NONBOND_THREADS_PER_BLOCK, 1)
#else #else
__launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1) __launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1)
#endif #endif
#ifdef DEBUG
void METHOD_NAME(kCalculateObcGbsaSoftcore, BornSum_kernel)(unsigned int* workUnit, float4* pdE1, float4* pdE2)
#else
void METHOD_NAME(kCalculateObcGbsaSoftcore, BornSum_kernel)(unsigned int* workUnit) void METHOD_NAME(kCalculateObcGbsaSoftcore, BornSum_kernel)(unsigned int* workUnit)
#endif
{ {
extern __shared__ Atom sA[]; extern __shared__ Atom sA[];
unsigned int totalWarps = cSim.nonbond_blocks*cSim.nonbond_threads_per_block/GRID; unsigned int totalWarps = cSim.nonbond_blocks*cSim.nonbond_threads_per_block/GRID;
...@@ -135,32 +131,6 @@ void METHOD_NAME(kCalculateObcGbsaSoftcore, BornSum_kernel)(unsigned int* workUn ...@@ -135,32 +131,6 @@ void METHOD_NAME(kCalculateObcGbsaSoftcore, BornSum_kernel)(unsigned int* workUn
term += 2.0f * ((1.0f / ar.x) - l_ij); term += 2.0f * ((1.0f / ar.x) - l_ij);
} }
apos.w += psA[j].polarScaleData*term; apos.w += psA[j].polarScaleData*term;
#ifdef DEBUG
int jIdx = j;
if( i == TARGET ){
int tjj = y+jIdx;
pdE1[tjj].x = term;
pdE1[tjj].y = r;
pdE1[tjj].z = ar.x;
pdE1[tjj].w = 1.0f;
pdE2[tjj].x = r;
pdE2[tjj].y = l_ij;
pdE2[tjj].z = rj;
pdE2[tjj].w = 1.0f;
}
/*
if( (y+jIdx) == TARGET ){
int tjj = i;
pdE2[tjj].x = sum;
pdE2[tjj].y = psA[jIdx].polarScaleData;
pdE2[tjj].z = ar.x;
pdE2[tjj].w = -1.0f;
} */
#endif
} }
} }
} }
...@@ -254,37 +224,6 @@ pdE2[tjj].w = -1.0f; ...@@ -254,37 +224,6 @@ pdE2[tjj].w = -1.0f;
} }
apos.w += (scale*term); apos.w += (scale*term);
#ifdef DEBUG
int jIdx = tj;
if( i == TARGET ){
int tjj = y+jIdx;
pdE1[tjj].x = term;
pdE1[tjj].y = r;
pdE1[tjj].z = ar.x;
pdE1[tjj].w = 2.0f;
/*
pdE2[tjj].x = r;
pdE2[tjj].y = l_ij;
pdE2[tjj].z = rj;
pdE2[tjj].w = 2.0f;
*/
}
if( (y+jIdx) == TARGET ){
int tjj = i;
/*
pdE1[tjj].x = term;
pdE1[tjj].y = r;
pdE1[tjj].z = ar.x;
pdE1[tjj].w = -2.0f;
*/
pdE2[tjj].x = term;
pdE2[tjj].y = r;
pdE2[tjj].z = ar.x;
pdE2[tjj].w = -2.0f;
}
#endif
} }
float rScaledRadiusI = r + ar.y; float rScaledRadiusI = r + ar.y;
if (psA[tj].r < rScaledRadiusI) if (psA[tj].r < rScaledRadiusI)
...@@ -307,25 +246,6 @@ pdE2[tjj].w = -2.0f; ...@@ -307,25 +246,6 @@ pdE2[tjj].w = -2.0f;
} }
psA[tj].sum += polarScaleDataI*term; psA[tj].sum += polarScaleDataI*term;
#ifdef DEBUG
int jIdx = tj;
if( i == TARGET ){
int tjj = y+jIdx;
pdE1[tjj].x = term;
pdE1[tjj].y = r;
pdE1[tjj].z = ar.x;
pdE1[tjj].w = 3.0f;
}
if( (y+jIdx) == TARGET ){
int tjj = i;
pdE2[tjj].x = term;
pdE2[tjj].y = r;
pdE2[tjj].z = ar.x;
pdE2[tjj].w = -3.0f;
}
#endif
} }
} }
tj = (tj - 1) & (GRID - 1); tj = (tj - 1) & (GRID - 1);
......
...@@ -44,14 +44,10 @@ void METHOD_NAME(kCalculateObcGbsaSoftcore, Forces2_kernel)(unsigned int* workUn ...@@ -44,14 +44,10 @@ void METHOD_NAME(kCalculateObcGbsaSoftcore, Forces2_kernel)(unsigned int* workUn
unsigned int totalWarps = gridDim.x*blockDim.x/GRID; unsigned int totalWarps = gridDim.x*blockDim.x/GRID;
unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID; unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
//unsigned int totalWarps = cSim.bornForce2_blocks*cSim.bornForce2_threads_per_block/GRID;
//unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
unsigned int numWorkUnits = cSim.pInteractionCount[0]; unsigned int numWorkUnits = cSim.pInteractionCount[0];
unsigned int pos = warp*numWorkUnits/totalWarps; unsigned int pos = warp*numWorkUnits/totalWarps;
unsigned int end = (warp+1)*numWorkUnits/totalWarps; unsigned int end = (warp+1)*numWorkUnits/totalWarps;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
//float3* tempBuffer = (float3*) &sA[cSim.bornForce2_threads_per_block];
float3* tempBuffer = (float3*) &sA[blockDim.x]; float3* tempBuffer = (float3*) &sA[blockDim.x];
#endif #endif
...@@ -124,7 +120,7 @@ void METHOD_NAME(kCalculateObcGbsaSoftcore, Forces2_kernel)(unsigned int* workUn ...@@ -124,7 +120,7 @@ void METHOD_NAME(kCalculateObcGbsaSoftcore, Forces2_kernel)(unsigned int* workUn
// Born Forces term // Born Forces term
float term = 0.125f * (1.000f + psA[j].sr * psA[j].sr * r2Inverse) * t3 + float term = 0.125f * (1.000f + psA[j].sr * psA[j].sr * r2Inverse) * t3 +
0.250f * t1 * r2Inverse; 0.250f * t1 * r2Inverse;
term *= psA[j].npScale*nonPolarScaleDataI; term *= psA[j].npScale;
float dE = fb * term; float dE = fb * term;
#if defined USE_CUTOFF #if defined USE_CUTOFF
...@@ -231,7 +227,7 @@ void METHOD_NAME(kCalculateObcGbsaSoftcore, Forces2_kernel)(unsigned int* workUn ...@@ -231,7 +227,7 @@ void METHOD_NAME(kCalculateObcGbsaSoftcore, Forces2_kernel)(unsigned int* workUn
float term = 0.125f * float term = 0.125f *
(1.000f + psA[tj].sr * psA[tj].sr * r2Inverse) * t3J + (1.000f + psA[tj].sr * psA[tj].sr * r2Inverse) * t3J +
0.250f * t1J * r2Inverse; 0.250f * t1J * r2Inverse;
term *= psA[tj].npScale*nonPolarScaleDataI; term *= psA[tj].npScale;
float dE = fb * term; float dE = fb * term;
#if defined USE_CUTOFF #if defined USE_CUTOFF
...@@ -257,7 +253,7 @@ void METHOD_NAME(kCalculateObcGbsaSoftcore, Forces2_kernel)(unsigned int* workUn ...@@ -257,7 +253,7 @@ void METHOD_NAME(kCalculateObcGbsaSoftcore, Forces2_kernel)(unsigned int* workUn
term = 0.125f * term = 0.125f *
(1.000f + sr2 * r2Inverse) * t3I + (1.000f + sr2 * r2Inverse) * t3I +
0.250f * t1I * r2Inverse; 0.250f * t1I * r2Inverse;
term *= psA[tj].npScale*nonPolarScaleDataI; term *= nonPolarScaleDataI;
dE = psA[tj].fb * term; dE = psA[tj].fb * term;
float rj = psA[tj].r; float rj = psA[tj].r;
...@@ -293,11 +289,11 @@ void METHOD_NAME(kCalculateObcGbsaSoftcore, Forces2_kernel)(unsigned int* workUn ...@@ -293,11 +289,11 @@ void METHOD_NAME(kCalculateObcGbsaSoftcore, Forces2_kernel)(unsigned int* workUn
float dx = psA[j].x - apos.x; float dx = psA[j].x - apos.x;
float dy = psA[j].y - apos.y; float dy = psA[j].y - apos.y;
float dz = psA[j].z - apos.z; float dz = psA[j].z - apos.z;
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
dx -= floor(dx/cSim.periodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX; dx -= floor(dx/cSim.periodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy/cSim.periodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY; dy -= floor(dy/cSim.periodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz/cSim.periodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ; dz -= floor(dz/cSim.periodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
#endif #endif
float r2 = dx * dx + dy * dy + dz * dz; float r2 = dx * dx + dy * dy + dz * dz;
float r = sqrt(r2); float r = sqrt(r2);
...@@ -327,16 +323,16 @@ void METHOD_NAME(kCalculateObcGbsaSoftcore, Forces2_kernel)(unsigned int* workUn ...@@ -327,16 +323,16 @@ void METHOD_NAME(kCalculateObcGbsaSoftcore, Forces2_kernel)(unsigned int* workUn
float term = 0.125f * float term = 0.125f *
(1.000f + psA[j].sr * psA[j].sr * r2Inverse) * t3J + (1.000f + psA[j].sr * psA[j].sr * r2Inverse) * t3J +
0.250f * t1J * r2Inverse; 0.250f * t1J * r2Inverse;
term *= psA[j].npScale*nonPolarScaleDataI; term *= psA[j].npScale;
float dE = fb * term; float dE = fb * term;
#if defined USE_PERIODIC #if defined USE_PERIODIC
if (a.x >= rScaledRadiusJ || i >= cSim.atoms || y+j >= cSim.atoms || r2 > cSim.nonbondedCutoffSqr) if (a.x >= rScaledRadiusJ || i >= cSim.atoms || y+j >= cSim.atoms || r2 > cSim.nonbondedCutoffSqr)
#elif defined USE_CUTOFF #elif defined USE_CUTOFF
if (a.x >= rScaledRadiusJ || r2 > cSim.nonbondedCutoffSqr) if (a.x >= rScaledRadiusJ || r2 > cSim.nonbondedCutoffSqr)
#else #else
if (a.x >= rScaledRadiusJ) if (a.x >= rScaledRadiusJ)
#endif #endif
{ {
dE = 0.0f; dE = 0.0f;
} }
...@@ -355,17 +351,17 @@ void METHOD_NAME(kCalculateObcGbsaSoftcore, Forces2_kernel)(unsigned int* workUn ...@@ -355,17 +351,17 @@ void METHOD_NAME(kCalculateObcGbsaSoftcore, Forces2_kernel)(unsigned int* workUn
term = 0.125f * term = 0.125f *
(1.000f + sr2 * r2Inverse) * t3I + (1.000f + sr2 * r2Inverse) * t3I +
0.250f * t1I * r2Inverse; 0.250f * t1I * r2Inverse;
term *= psA[j].npScale*nonPolarScaleDataI; term *= nonPolarScaleDataI;
dE = psA[j].fb * term; dE = psA[j].fb * term;
float rj = psA[j].r; float rj = psA[j].r;
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
if (rj >= rScaledRadiusI || i >= cSim.atoms || y+j >= cSim.atoms || r2 > cSim.nonbondedCutoffSqr) if (rj >= rScaledRadiusI || i >= cSim.atoms || y+j >= cSim.atoms || r2 > cSim.nonbondedCutoffSqr)
#elif defined USE_CUTOFF #elif defined USE_CUTOFF
if (rj >= rScaledRadiusI || r2 > cSim.nonbondedCutoffSqr) if (rj >= rScaledRadiusI || r2 > cSim.nonbondedCutoffSqr)
#else #else
if (rj >= rScaledRadiusI) if (rj >= rScaledRadiusI)
#endif #endif
{ {
dE = 0.0f; dE = 0.0f;
} }
......
...@@ -15,14 +15,12 @@ SET( INCLUDE_SERIALIZATION FALSE ) ...@@ -15,14 +15,12 @@ SET( INCLUDE_SERIALIZATION FALSE )
#SET( INCLUDE_SERIALIZATION TRUE ) #SET( INCLUDE_SERIALIZATION TRUE )
SET( SHARED_OPENMM_TARGET OpenMMFreeEnergy) SET( SHARED_OPENMM_TARGET OpenMMFreeEnergy)
SET( STATIC_OPENMM_TARGET OpenMMFreeEnergy_static)
SET( SHARED_CUDA_TARGET OpenMMCuda) SET( SHARED_CUDA_TARGET OpenMMCuda)
SET( STATIC_CUDA_TARGET OpenMMCuda_static)
IF( INCLUDE_SERIALIZATION ) IF( INCLUDE_SERIALIZATION )
INCLUDE_DIRECTORIES(${OPENMM_DIR}/serialization/include) INCLUDE_DIRECTORIES(${OPENMM_DIR}/serialization/include)
SET( SHARED_OPENMM_SERIALIZATION OpenMMSerialization ) SET( SHARED_OPENMM_SERIALIZATION "OpenMMSerialization" )
SET( SHARED_FREE_ENERGY_SERIALIZATION FreeEnergySerialization ) SET( SHARED_FREE_ENERGY_SERIALIZATION "FreeEnergySerialization" )
ENDIF( INCLUDE_SERIALIZATION ) ENDIF( INCLUDE_SERIALIZATION )
IF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug) IF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
...@@ -32,8 +30,6 @@ IF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug) ...@@ -32,8 +30,6 @@ IF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
SET(SHARED_OPENMM_SERIALIZATION ${SHARED_OPENMM_SERIALIZATION}_d) SET(SHARED_OPENMM_SERIALIZATION ${SHARED_OPENMM_SERIALIZATION}_d)
SET(SHARED_FREE_ENERGY_SERIALIZATION ${SHARED_FREE_ENERGY_SERIALIZATION}_d) SET(SHARED_FREE_ENERGY_SERIALIZATION ${SHARED_FREE_ENERGY_SERIALIZATION}_d)
ENDIF( INCLUDE_SERIALIZATION ) ENDIF( INCLUDE_SERIALIZATION )
SET(STATIC_CUDA_TARGET ${STATIC_CUDA_TARGET}_d)
SET(STATIC_OPENMM_TARGET ${STATIC_OPENMM_TARGET}_d)
ENDIF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug) ENDIF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
# Automatically create tests using files named "Test*.cpp" # Automatically create tests using files named "Test*.cpp"
...@@ -54,13 +50,52 @@ FOREACH(TEST_PROG ${TEST_PROGS}) ...@@ -54,13 +50,52 @@ FOREACH(TEST_PROG ${TEST_PROGS})
SET(DEFINE_STRING "${DEFINE_STRING} -DOPENMM_SERIALIZE") SET(DEFINE_STRING "${DEFINE_STRING} -DOPENMM_SERIALIZE")
ENDIF( INCLUDE_SERIALIZATION ) ENDIF( INCLUDE_SERIALIZATION )
IF( ${TEST_ROOT} STREQUAL "TestCudaOBCSoftcoreForce" ) IF( ${TEST_ROOT} STREQUAL "TestCudaGBVISoftcoreForce2" )
SET(DEFINE_STRING "${DEFINE_STRING} -DIMPLICIT_SOLVENT=1")
ENDIF( ${TEST_ROOT} STREQUAL "TestCudaOBCSoftcoreForce" ) # serialize
SET(DEFINE_STRING "-DTEST_PLATFORM=0 ")
IF( INCLUDE_SERIALIZATION )
SET(DEFINE_STRING "${DEFINE_STRING} -DOPENMM_SERIALIZE ")
SET(TARGET_LINK_LIBRARIES_STRING "${SHARED_TARGET} ${SHARED_OPENMM_SERIALIZATION}")
ELSE( INCLUDE_SERIALIZATION )
SET(TARGET_LINK_LIBRARIES_STRING "${SHARED_TARGET}")
ENDIF( INCLUDE_SERIALIZATION )
# obc
SET(OBC_DEFINE_STRING "${DEFINE_STRING} -DIMPLICIT_SOLVENT=1")
SET(OBC_TEST "TestCudaGBSAOBCSoftcoreForce2")
CUDA_ADD_EXECUTABLE(${OBC_TEST} ${TEST_PROG})
IF( INCLUDE_SERIALIZATION )
TARGET_LINK_LIBRARIES(${OBC_TEST} ${SHARED_TARGET} ${SHARED_OPENMM_SERIALIZATION} )
ELSE( INCLUDE_SERIALIZATION )
TARGET_LINK_LIBRARIES(${OBC_TEST} ${SHARED_TARGET})
ENDIF( INCLUDE_SERIALIZATION )
SET_TARGET_PROPERTIES(${OBC_TEST} PROPERTIES COMPILE_FLAGS ${OBC_DEFINE_STRING} )
ADD_TEST(${OBC_TEST} ${EXECUTABLE_OUTPUT_PATH}/${OBC_TEST})
# nonbond
SET(NONBOND_DEFINE_STRING "${DEFINE_STRING} -DIMPLICIT_SOLVENT=0")
SET(NONBOND_TEST "TestCudaNonbondSoftcoreForce2")
CUDA_ADD_EXECUTABLE(${NONBOND_TEST} ${TEST_PROG})
IF( INCLUDE_SERIALIZATION )
TARGET_LINK_LIBRARIES(${NONBOND_TEST} ${SHARED_TARGET} ${SHARED_OPENMM_SERIALIZATION} )
ELSE( INCLUDE_SERIALIZATION )
TARGET_LINK_LIBRARIES(${NONBOND_TEST} ${SHARED_TARGET})
ENDIF( INCLUDE_SERIALIZATION )
SET_TARGET_PROPERTIES(${NONBOND_TEST} PROPERTIES COMPILE_FLAGS ${NONBOND_DEFINE_STRING} )
ADD_TEST(${NONBOND_TEST} ${EXECUTABLE_OUTPUT_PATH}/${NONBOND_TEST})
# gbvi
IF( ${TEST_ROOT} STREQUAL "TestCudaGBVISoftcoreForce" )
SET(DEFINE_STRING "${DEFINE_STRING} -DIMPLICIT_SOLVENT=2") SET(DEFINE_STRING "${DEFINE_STRING} -DIMPLICIT_SOLVENT=2")
ENDIF( ${TEST_ROOT} STREQUAL "TestCudaGBVISoftcoreForce" ) SET_TARGET_PROPERTIES(${TEST_ROOT} PROPERTIES COMPILE_FLAGS ${DEFINE_STRING} )
ENDIF( ${TEST_ROOT} STREQUAL "TestCudaGBVISoftcoreForce2" )
#MESSAGE( "${TEST_ROOT} ${DEFINE_STRING}" ) #MESSAGE( "${TEST_ROOT} ${DEFINE_STRING}" )
SET_TARGET_PROPERTIES(${TEST_ROOT} PROPERTIES COMPILE_FLAGS ${DEFINE_STRING} ) SET_TARGET_PROPERTIES(${TEST_ROOT} PROPERTIES COMPILE_FLAGS ${DEFINE_STRING} )
......
...@@ -221,7 +221,7 @@ void CpuObcSoftcore::computeBornRadii( const vector<RealVec>& atomCoordinates, ...@@ -221,7 +221,7 @@ void CpuObcSoftcore::computeBornRadii( const vector<RealVec>& atomCoordinates,
// OBC-specific code (Eqs. 6-8 in paper) // OBC-specific code (Eqs. 6-8 in paper)
sum *= nonPolarScaleFactors[atomI]*half*offsetRadiusI; sum *= half*offsetRadiusI;
RealOpenMM sum2 = sum*sum; RealOpenMM sum2 = sum*sum;
RealOpenMM sum3 = sum*sum2; RealOpenMM sum3 = sum*sum2;
RealOpenMM tanhSum = TANH( alphaObc*sum - betaObc*sum2 + gammaObc*sum3 ); RealOpenMM tanhSum = TANH( alphaObc*sum - betaObc*sum2 + gammaObc*sum3 );
...@@ -486,7 +486,7 @@ RealOpenMM CpuObcSoftcore::computeBornEnergyForces( vector<RealVec>& atomCoordin ...@@ -486,7 +486,7 @@ RealOpenMM CpuObcSoftcore::computeBornEnergyForces( vector<RealVec>& atomCoordin
RealOpenMM r2Inverse = rInverse*rInverse; RealOpenMM r2Inverse = rInverse*rInverse;
RealOpenMM t3 = eighth*(one + scaledRadiusJ2*r2Inverse)*(l_ij2 - u_ij2) + fourth*LN( u_ij/l_ij )*r2Inverse; RealOpenMM t3 = eighth*(one + scaledRadiusJ2*r2Inverse)*(l_ij2 - u_ij2) + fourth*LN( u_ij/l_ij )*r2Inverse;
t3 *= nonPolarScaleFactors[atomI]*nonPolarScaleFactors[atomJ]; t3 *= nonPolarScaleFactors[atomJ];
RealOpenMM de = bornForces[atomI]*t3*rInverse; RealOpenMM de = bornForces[atomI]*t3*rInverse;
......
...@@ -77,7 +77,7 @@ void testOBCSoftcore( double lambda1, double lambda2 ){ ...@@ -77,7 +77,7 @@ void testOBCSoftcore( double lambda1, double lambda2 ){
custom->addGlobalParameter("solventDielectric", obc->getSolventDielectric()); custom->addGlobalParameter("solventDielectric", obc->getSolventDielectric());
custom->addGlobalParameter("soluteDielectric", obc->getSoluteDielectric()); custom->addGlobalParameter("soluteDielectric", obc->getSoluteDielectric());
custom->addComputedValue("I", "lambda1*lambda2*step(r+sr2-or1)*0.5*(1/L-1/U+0.25*(1/U^2-1/L^2)*(r-sr2*sr2/r)+0.5*log(L/U)/r+C);" custom->addComputedValue("I", "lambda2*step(r+sr2-or1)*0.5*(1/L-1/U+0.25*(1/U^2-1/L^2)*(r-sr2*sr2/r)+0.5*log(L/U)/r+C);"
"U=r+sr2;" "U=r+sr2;"
"C=2*(1/or1-1/L)*step(sr2-r-or1);" "C=2*(1/or1-1/L)*step(sr2-r-or1);"
"L=max(or1, D);" "L=max(or1, D);"
...@@ -86,8 +86,8 @@ void testOBCSoftcore( double lambda1, double lambda2 ){ ...@@ -86,8 +86,8 @@ void testOBCSoftcore( double lambda1, double lambda2 ){
"or1 = radius1-0.009; or2 = radius2-0.009", CustomGBForce::ParticlePairNoExclusions); "or1 = radius1-0.009; or2 = radius2-0.009", CustomGBForce::ParticlePairNoExclusions);
custom->addComputedValue("B", "1/(1/or-tanh(1*psi-0.8*psi^2+4.85*psi^3)/radius);" custom->addComputedValue("B", "1/(1/or-tanh(1*psi-0.8*psi^2+4.85*psi^3)/radius);"
"psi=I*or; or=radius-0.009", CustomGBForce::SingleParticle); "psi=I*or; or=radius-0.009", CustomGBForce::SingleParticle);
custom->addEnergyTerm("lambda*28.3919551*(radius+0.14)^2*(radius/B)^6-lambda*lambda*0.5*138.935485*(1/soluteDielectric-1/solventDielectric)*q^2/B", CustomGBForce::SingleParticle); custom->addEnergyTerm("lambda*28.3919551*(radius+0.14)^2*(radius/B)^6-0.5*138.935485*(1/soluteDielectric-1/solventDielectric)*q^2/B", CustomGBForce::SingleParticle);
custom->addEnergyTerm("-138.935485*lambda1*lambda2*(1/soluteDielectric-1/solventDielectric)*q1*q2/f;" custom->addEnergyTerm("-138.935485*(1/soluteDielectric-1/solventDielectric)*q1*q2/f;"
"f=sqrt(r^2+B1*B2*exp(-r^2/(4*B1*B2)))", CustomGBForce::ParticlePairNoExclusions); "f=sqrt(r^2+B1*B2*exp(-r^2/(4*B1*B2)))", CustomGBForce::ParticlePairNoExclusions);
vector<Vec3> positions(numParticles); vector<Vec3> positions(numParticles);
...@@ -103,14 +103,14 @@ void testOBCSoftcore( double lambda1, double lambda2 ){ ...@@ -103,14 +103,14 @@ void testOBCSoftcore( double lambda1, double lambda2 ){
obc->addParticle( charge*lambda1, 0.2, 0.5, lambda1); obc->addParticle( charge*lambda1, 0.2, 0.5, lambda1);
obc->addParticle(-charge*lambda1, 0.1, 0.5, lambda1); obc->addParticle(-charge*lambda1, 0.1, 0.5, lambda1);
params[0] = charge; params[0] = charge*lambda1;
params[1] = 0.2; params[1] = 0.2;
params[2] = 0.5; params[2] = 0.5;
params[3] = lambda1; params[3] = lambda1;
custom->addParticle(params); custom->addParticle(params);
params[0] = -charge; params[0] = -charge*lambda1;
params[1] = 0.1; params[1] = 0.1;
custom->addParticle(params); custom->addParticle(params);
...@@ -119,13 +119,13 @@ void testOBCSoftcore( double lambda1, double lambda2 ){ ...@@ -119,13 +119,13 @@ void testOBCSoftcore( double lambda1, double lambda2 ){
obc->addParticle( charge*lambda2, 0.2, 0.8, lambda2); obc->addParticle( charge*lambda2, 0.2, 0.8, lambda2);
obc->addParticle(-charge*lambda2, 0.1, 0.8, lambda2); obc->addParticle(-charge*lambda2, 0.1, 0.8, lambda2);
params[0] = charge; params[0] = charge*lambda2;
params[1] = 0.2; params[1] = 0.2;
params[2] = 0.8; params[2] = 0.8;
params[3] = lambda2; params[3] = lambda2;
custom->addParticle(params); custom->addParticle(params);
params[0] = -charge; params[0] = -charge*lambda2;
params[1] = 0.1; params[1] = 0.1;
custom->addParticle(params); custom->addParticle(params);
} }
......
...@@ -59,10 +59,7 @@ ...@@ -59,10 +59,7 @@
#include <iostream> #include <iostream>
#include <vector> #include <vector>
#include <algorithm> #include <algorithm>
#include <iostream>
#include <cstdio> #include <cstdio>
#include <vector>
#include <typeinfo> #include <typeinfo>
extern "C" void registerFreeEnergyCudaKernelFactories(); extern "C" void registerFreeEnergyCudaKernelFactories();
...@@ -1322,29 +1319,27 @@ static NonbondedForce* copyNonbondedForce( const NonbondedForce& nonbondedForce ...@@ -1322,29 +1319,27 @@ static NonbondedForce* copyNonbondedForce( const NonbondedForce& nonbondedForce
* *
*/ */
static System* copySystem( const System& inputSystem ){ static void copySystem( const System& inputSystem, System& systemCopy ){
System* systemCopy = new System();
for( unsigned int ii = 0; ii < inputSystem.getNumParticles(); ii++ ){ for( unsigned int ii = 0; ii < inputSystem.getNumParticles(); ii++ ){
systemCopy->addParticle( inputSystem.getParticleMass( static_cast<int>(ii) ) ); systemCopy.addParticle( inputSystem.getParticleMass( static_cast<int>(ii) ) );
} }
Vec3 a; Vec3 a;
Vec3 b; Vec3 b;
Vec3 c; Vec3 c;
inputSystem.getDefaultPeriodicBoxVectors( a, b, c ); inputSystem.getDefaultPeriodicBoxVectors( a, b, c );
systemCopy->setDefaultPeriodicBoxVectors( a, b, c ); systemCopy.setDefaultPeriodicBoxVectors( a, b, c );
for( unsigned int ii = 0; ii < inputSystem.getNumConstraints(); ii++ ){ for( unsigned int ii = 0; ii < inputSystem.getNumConstraints(); ii++ ){
int index; int index;
int particle1, particle2; int particle1, particle2;
double distance; double distance;
inputSystem.getConstraintParameters( ii, particle1, particle2, distance); inputSystem.getConstraintParameters( ii, particle1, particle2, distance);
systemCopy->addConstraint( particle1, particle2, distance); systemCopy.addConstraint( particle1, particle2, distance);
} }
return systemCopy; return;
} }
/** /**
...@@ -1809,8 +1804,9 @@ void runSystemComparisonTest( System& system1, System& system2, ...@@ -1809,8 +1804,9 @@ void runSystemComparisonTest( System& system1, System& system2,
(void) fprintf( log, "System1: particles=%d forces=%d System2: particles=%d forces=%d\n", (void) fprintf( log, "System1: particles=%d forces=%d System2: particles=%d forces=%d\n",
system1.getNumParticles(), system1.getNumForces(), system1.getNumParticles(), system1.getNumForces(),
system2.getNumParticles(), system2.getNumForces() ); system2.getNumParticles(), system2.getNumForces() );
(void) fprintf( log, "Positions=%u\n", (void) fprintf( log, "Positions=%u\n", static_cast<unsigned int>(positions.size()) );
static_cast<unsigned int>(positions.size()) ); (void) fprintf( log, "Platform1=%s Platform2=%s\n", platform1.c_str(), platform2.c_str() );
(void) fprintf( log, "relativeTolerance=%8.2e applyAssert=%d\n", relativeTolerance, applyAssert );
MapStringInt stringForceVector1; MapStringInt stringForceVector1;
MapStringInt stringForceVector2; MapStringInt stringForceVector2;
...@@ -1836,7 +1832,7 @@ void runSystemComparisonTest( System& system1, System& system2, ...@@ -1836,7 +1832,7 @@ void runSystemComparisonTest( System& system1, System& system2,
if( system1.getNumParticles() != static_cast<int>(positions.size()) ){ if( system1.getNumParticles() != static_cast<int>(positions.size()) ){
std::stringstream msg; std::stringstream msg;
msg << "Number of partciles for system des not equal size of position array: " << system1.getNumParticles() << " != " << positions.size(); msg << "Number of particles for system does not equal size of position array: " << system1.getNumParticles() << " != " << positions.size();
throw OpenMMException( msg.str() ); throw OpenMMException( msg.str() );
} }
...@@ -1844,7 +1840,7 @@ void runSystemComparisonTest( System& system1, System& system2, ...@@ -1844,7 +1840,7 @@ void runSystemComparisonTest( System& system1, System& system2,
context1.setPositions(positions); context1.setPositions(positions);
State state1 = context1.getState(State::Forces | State::Energy); State state1 = context1.getState(State::Forces | State::Energy);
Context context2( system2, integrator2, Platform::getPlatformByName( "Reference")); Context context2( system2, integrator2, Platform::getPlatformByName( platform2 ));
context2.setPositions(positions); context2.setPositions(positions);
State state2 = context2.getState(State::Forces | State::Energy); State state2 = context2.getState(State::Forces | State::Energy);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment