".github/vscode:/vscode.git/clone" did not exist on "1c679302224971b1da31837c942d4baa5b14915e"
Commit b9d12c46 authored by Mark Friedrichs's avatar Mark Friedrichs
Browse files

Clean code and several bug fixes to GB/VI and Obc for nonbonded methods w/ cutoffs

parent ce74c6ae
......@@ -133,3 +133,7 @@ extern void SetCustomExternalGlobalParams(const std::vector<float>& paramValues)
extern void SetCustomNonbondedForceExpression(const Expression<256>& expression);
extern void SetCustomNonbondedEnergyExpression(const Expression<256>& expression);
extern void SetCustomNonbondedGlobalParams(const std::vector<float>& paramValues);
extern void kPrintGBVI( gpuContext gpu, std::string callId, int call, FILE* log);
extern void kPrintObc( gpuContext gpu, std::string callId, int call, FILE* log);
......@@ -64,16 +64,6 @@ void GetCalculateCDLJObcGbsaForces1Sim(gpuContext gpu)
status = cudaMemcpyFromSymbol(&gpu->sim, cSim, sizeof(cudaGmxSimulation));
RTERROR(status, "cudaMemcpyFromSymbol: SetSim copy from cSim failed");
}
texture<float, 1, cudaReadModeElementType> tabulatedErfcRef;
static __device__ float fastErfc(float r)
{
float normalized = cSim.tabulatedErfcScale*r;
int index = (int) normalized;
float fract2 = normalized-index;
float fract1 = 1.0f-fract2;
return fract1*tex1Dfetch(tabulatedErfcRef, index) + fract2*tex1Dfetch(tabulatedErfcRef, index+1);
}
// Include versions of the kernel for N^2 calculations.
......@@ -134,9 +124,11 @@ extern void kCalculatePME(gpuContext gpu);
void kCalculateCDLJObcGbsaForces1(gpuContext gpu)
{
// printf("kCalculateCDLJObcGbsaForces1\n");
switch (gpu->sim.nonbondedMethod)
{
case NO_CUTOFF:
if (gpu->bRecalculateBornRadii)
{
if( gpu->bIncludeGBVI ){
......@@ -148,16 +140,19 @@ void kCalculateCDLJObcGbsaForces1(gpuContext gpu)
}
}
if (gpu->bOutputBufferPerWarp)
kCalculateCDLJObcGbsaN2ByWarpForces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(Atom)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit);
else
kCalculateCDLJObcGbsaN2Forces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(Atom)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit);
LAUNCHERROR("kCalculateCDLJObcGbsaN2Forces1");
break;
case CUTOFF:
kFindBlockBoundsCutoff_kernel<<<(gpu->psGridBoundingBox->_length+63)/64, 64>>>();
LAUNCHERROR("kFindBlockBoundsCutoff");
kFindBlocksWithInteractionsCutoff_kernel<<<gpu->sim.interaction_blocks, gpu->sim.interaction_threads_per_block>>>();
......@@ -165,20 +160,31 @@ void kCalculateCDLJObcGbsaForces1(gpuContext gpu)
compactStream(gpu->compactPlan, gpu->sim.pInteractingWorkUnit, gpu->sim.pWorkUnit, gpu->sim.pInteractionFlag, gpu->sim.workUnits, gpu->sim.pInteractionCount);
kFindInteractionsWithinBlocksCutoff_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
if (gpu->bRecalculateBornRadii)
{
kCalculateObcGbsaBornSum(gpu);
kReduceObcGbsaBornSum(gpu);
if( gpu->bIncludeGBVI ){
kCalculateGBVIBornSum(gpu);
kReduceGBVIBornSum(gpu);
} else {
kCalculateObcGbsaBornSum(gpu);
kReduceObcGbsaBornSum(gpu);
}
}
if (gpu->bOutputBufferPerWarp)
kCalculateCDLJObcGbsaCutoffByWarpForces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
else
kCalculateCDLJObcGbsaCutoffForces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kCalculateCDLJObcGbsaCutoffForces1");
break;
case PERIODIC:
kFindBlockBoundsPeriodic_kernel<<<(gpu->psGridBoundingBox->_length+63)/64, 64>>>();
LAUNCHERROR("kFindBlockBoundsPeriodic");
kFindBlocksWithInteractionsPeriodic_kernel<<<gpu->sim.interaction_blocks, gpu->sim.interaction_threads_per_block>>>();
......@@ -186,52 +192,28 @@ void kCalculateCDLJObcGbsaForces1(gpuContext gpu)
compactStream(gpu->compactPlan, gpu->sim.pInteractingWorkUnit, gpu->sim.pWorkUnit, gpu->sim.pInteractionFlag, gpu->sim.workUnits, gpu->sim.pInteractionCount);
kFindInteractionsWithinBlocksPeriodic_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
if (gpu->bRecalculateBornRadii)
{
kCalculateObcGbsaBornSum(gpu);
kReduceObcGbsaBornSum(gpu);
if( gpu->bIncludeGBVI ){
kCalculateGBVIBornSum(gpu);
kReduceGBVIBornSum(gpu);
} else {
kCalculateObcGbsaBornSum(gpu);
kReduceObcGbsaBornSum(gpu);
}
}
if (gpu->bOutputBufferPerWarp)
kCalculateCDLJObcGbsaPeriodicByWarpForces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
else
kCalculateCDLJObcGbsaPeriodicForces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kCalculateCDLJObcGbsaPeriodicForces1");
break;
case EWALD:
case PARTICLE_MESH_EWALD:
kFindBlockBoundsPeriodic_kernel<<<(gpu->psGridBoundingBox->_length+63)/64, 64>>>();
LAUNCHERROR("kFindBlockBoundsPeriodic");
kFindBlocksWithInteractionsPeriodic_kernel<<<gpu->sim.interaction_blocks, gpu->sim.interaction_threads_per_block>>>();
LAUNCHERROR("kFindBlocksWithInteractionsPeriodic");
compactStream(gpu->compactPlan, gpu->sim.pInteractingWorkUnit, gpu->sim.pWorkUnit, gpu->sim.pInteractionFlag, gpu->sim.workUnits, gpu->sim.pInteractionCount);
kFindInteractionsWithinBlocksPeriodic_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kFindInteractionsWithinBlocksPeriodic");
if (gpu->bRecalculateBornRadii)
{
kCalculateObcGbsaBornSum(gpu);
kReduceObcGbsaBornSum(gpu);
}
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
cudaBindTexture(NULL, &tabulatedErfcRef, gpu->psTabulatedErfc->_pDevData, &channelDesc, gpu->psTabulatedErfc->_length*sizeof(float));
if (gpu->bOutputBufferPerWarp)
kCalculateCDLJObcGbsaEwaldByWarpForces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
else
kCalculateCDLJObcGbsaEwaldForces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kCalculateCDLJObcGbsaEwaldForces");
if (gpu->sim.nonbondedMethod == EWALD)
{
// Ewald summation
kCalculateEwaldFastCosSinSums_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block>>>();
LAUNCHERROR("kCalculateEwaldFastCosSinSums");
kCalculateEwaldFastForces_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
LAUNCHERROR("kCalculateEwaldFastForces");
}
else
kCalculatePME(gpu);
}
}
......@@ -237,71 +237,48 @@ __global__ void kReduceGBVIBornSum_kernel()
void kReduceGBVIBornSum(gpuContext gpu)
{
//printf("kReduceGBVIBornSum\n");
#define GBVI_DEBUG 0
#if ( GBVI_DEBUG == 1 )
gpu->psGBVIData->Download();
gpu->psBornSum->Download();
gpu->psPosq4->Download();
(void) fprintf( stderr, "\nkReduceGBVIBornSum: Post BornSum %s Born radii & params\n",
(gpu->bIncludeGBVI ? "GBVI" : "Obc") );
for( int ii = 0; ii < gpu->natoms; ii++ ){
(void) fprintf( stderr, "%d bSum=%14.6e param[%14.6e %14.6e %14.6e] x[%14.6f %14.6f %14.6f %14.6f]\n",
ii,
gpu->psBornSum->_pSysStream[0][ii],
gpu->psGBVIData->_pSysStream[0][ii].x,
gpu->psGBVIData->_pSysStream[0][ii].y,
gpu->psGBVIData->_pSysStream[0][ii].z,
gpu->psPosq4->_pSysStream[0][ii].x, gpu->psPosq4->_pSysStream[0][ii].y,
gpu->psPosq4->_pSysStream[0][ii].z, gpu->psPosq4->_pSysStream[0][ii].w
);
}
#endif
#undef GBVI_DEBUG
kReduceGBVIBornSum_kernel<<<gpu->sim.blocks, 384>>>();
gpu->bRecalculateBornRadii = false;
LAUNCHERROR("kReduceGBVIBornSum");
}
void kPrintGBVI( gpuContext gpu, std::string callId, int call, FILE* log)
{
gpu->psGBVIData->Download();
gpu->psBornRadii->Download();
gpu->psBornForce->Download();
gpu->psPosq4->Download();
gpu->psSigEps2->Download();
(void) fprintf( log, "kPrintGBVI Cuda comp bR bF prm sigeps2\n" );
(void) fprintf( stderr, "kCalculateGBVIBornSum: bOutputBufferPerWarp=%u blks=%u th/blk=%u wu=%u %u shrd=%u\n", gpu->bOutputBufferPerWarp,
gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block, gpu->sim.workUnits, gpu->psWorkUnit->_pSysStream[0][0],
sizeof(Atom)*gpu->sim.nonbond_threads_per_block );
for( int ii = 0; ii < gpu->sim.paddedNumberOfAtoms; ii++ ){
(void) fprintf( log, "%6d %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e \n", ii,
gpu->psBornRadii->_pSysData[ii],
gpu->psBornForce->_pSysData[ii],
gpu->psGBVIData->_pSysData[ii].x,
gpu->psGBVIData->_pSysData[ii].y,
gpu->psGBVIData->_pSysData[ii].z,
gpu->psGBVIData->_pSysData[ii].w,
gpu->psSigEps2->_pSysData[ii].x,
gpu->psSigEps2->_pSysData[ii].y );
}
}
void kCalculateGBVIBornSum(gpuContext gpu)
{
//printf("kCalculateGBVIBornSum\n");
//size_t numWithInteractions;
switch (gpu->sim.nonbondedMethod)
{
case NO_CUTOFF:
#define GBVI 0
#if GBVI == 1
int maxPrint = 10;
gpu->psWorkUnit->Download();
fprintf( stderr, "kCalculateGBVIBornSum: bOutputBufferPerWarp=%u blks=%u th/blk=%u wu=%u %u shrd=%u\n", gpu->bOutputBufferPerWarp,
gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block, gpu->sim.workUnits, gpu->psWorkUnit->_pSysStream[0][0],
sizeof(Atom)*gpu->sim.nonbond_threads_per_block );
gpu->psGBVIData->Download();
gpu->psBornSum->Download();
gpu->psPosq4->Download();
(void) fprintf( stderr, "\nkCalculateGBVIBornSum: pre BornSum %s Born radii & params\n",
(gpu->bIncludeGBVI ? "GBVI" : "Obc") );
for( int ii = 0; ii < gpu->natoms; ii++ ){
(void) fprintf( stderr, "%d bSum=%14.6e param[%14.6e %14.6e %14.6e] x[%14.6f %14.6f %14.6f %14.6f]\n",
ii,
gpu->psBornSum->_pSysStream[0][ii],
gpu->psGBVIData->_pSysStream[0][ii].x,
gpu->psGBVIData->_pSysStream[0][ii].y,
gpu->psGBVIData->_pSysStream[0][ii].z,
gpu->psPosq4->_pSysStream[0][ii].x, gpu->psPosq4->_pSysStream[0][ii].y,
gpu->psPosq4->_pSysStream[0][ii].z, gpu->psPosq4->_pSysStream[0][ii].w
);
if( (ii == maxPrint) && ( ii < (gpu->natoms - maxPrint)) ){
ii = gpu->natoms - maxPrint;
}
}
#endif
#undef GBVI
if (gpu->bOutputBufferPerWarp){
kCalculateGBVIN2ByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(Atom)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit);
......@@ -310,6 +287,7 @@ fprintf( stderr, "kCalculateGBVIBornSum: bOutputBufferPerWarp=%u blks=%u th/blk=
sizeof(Atom)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit);
}
break;
case CUTOFF:
if (gpu->bOutputBufferPerWarp)
kCalculateGBVICutoffByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
......@@ -318,6 +296,7 @@ fprintf( stderr, "kCalculateGBVIBornSum: bOutputBufferPerWarp=%u blks=%u th/blk=
kCalculateGBVICutoffBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit );
break;
case PERIODIC:
if (gpu->bOutputBufferPerWarp)
kCalculateGBVIPeriodicByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
......@@ -326,6 +305,7 @@ fprintf( stderr, "kCalculateGBVIBornSum: bOutputBufferPerWarp=%u blks=%u th/blk=
kCalculateGBVIPeriodicBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit );
break;
}
LAUNCHERROR("kCalculateGBVIBornSum");
}
......@@ -167,76 +167,41 @@ __global__ void kCalculateGBVIForces2a_kernel()
void kCalculateGBVIForces2(gpuContext gpu)
{
//printf("kCalculateGBVIForces2\n");
size_t numWithInteractions;
#if 0
kClearForces(gpu);
(void) fprintf( stderr, "\nkCalculateGBVIForces2: cleared force prior loop2\n" ); (void) fflush( stderr );
kCalculateGBVIForces2a_kernel<<<gpu->sim.blocks, 384>>>();
(void) fprintf( stderr, "\ncalled kCalculateGBVIForces2a\n" ); (void) fflush( stderr );
return;
#endif
switch (gpu->sim.nonbondedMethod)
{
case NO_CUTOFF:
if (gpu->bOutputBufferPerWarp)
kCalculateGBVIN2ByWarpForces2_kernel<<<gpu->sim.bornForce2_blocks, gpu->sim.bornForce2_threads_per_block,
sizeof(Atom)*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pWorkUnit, gpu->sim.workUnits);
sizeof(Atom)*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pWorkUnit );
else
kCalculateGBVIN2Forces2_kernel<<<gpu->sim.bornForce2_blocks, gpu->sim.bornForce2_threads_per_block,
sizeof(Atom)*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pWorkUnit, gpu->sim.workUnits);
//(void) fprintf( stderr, "\nkCalculateGBVIForces2: Born radii/force forces warp=%u\n", gpu->bOutputBufferPerWarp ); (void) fflush( stderr );
#define GBVI_DEBUG 0
#if ( GBVI_DEBUG == 1 )
(void) fprintf( stderr, "\nkCalculateGBVIForces2: Born radii/force forces:\n" ); (void) fflush( stderr );
gpu->psBornForce->Download();
gpu->psForce4->Download();
for( int ii = 0; ii < gpu->natoms; ii++ ){
(void) fprintf( stderr, "%d bF=%14.6e Fa[%14.6e %14.6e %14.6e] Fb[%14.6e %14.6e %14.6e]\n",
ii,
gpu->psBornForce->_pSysStream[0][ii],
gpu->psForce4->_pSysStream[0][ii].x,
gpu->psForce4->_pSysStream[0][ii].y,
gpu->psForce4->_pSysStream[0][ii].z,
gpu->psForce4->_pSysStream[1][ii].x,
gpu->psForce4->_pSysStream[1][ii].y,
gpu->psForce4->_pSysStream[1][ii].z
);
}
for( int ii = 0; ii < gpu->sim.paddedNumberOfAtoms*2; ii++ ){
(void) fprintf( stderr, "%d bF=%14.6e Fa[%14.6e %14.6e %14.6e %14.6e]\n",
ii,
gpu->psBornForce->_pSysStream[0][ii],
gpu->psForce4->_pSysStream[0][ii].x,
gpu->psForce4->_pSysStream[0][ii].y,
gpu->psForce4->_pSysStream[0][ii].z,
gpu->psForce4->_pSysStream[0][ii].w
);
}
#endif
#undef GBVI_DEBUG
sizeof(Atom)*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pWorkUnit );
break;
case CUTOFF:
numWithInteractions = gpu->psInteractionCount->_pSysData[0];
if (gpu->bOutputBufferPerWarp)
kCalculateGBVICutoffByWarpForces2_kernel<<<gpu->sim.bornForce2_blocks, gpu->sim.bornForce2_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pInteractingWorkUnit, numWithInteractions);
(sizeof(Atom)+sizeof(float3))*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pInteractingWorkUnit );
else
kCalculateGBVICutoffForces2_kernel<<<gpu->sim.bornForce2_blocks, gpu->sim.bornForce2_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pInteractingWorkUnit, numWithInteractions);
(sizeof(Atom)+sizeof(float3))*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
break;
case PERIODIC:
numWithInteractions = gpu->psInteractionCount->_pSysData[0];
if (gpu->bOutputBufferPerWarp)
kCalculateGBVIPeriodicByWarpForces2_kernel<<<gpu->sim.bornForce2_blocks, gpu->sim.bornForce2_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pInteractingWorkUnit, numWithInteractions);
(sizeof(Atom)+sizeof(float3))*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pInteractingWorkUnit );
else
kCalculateGBVIPeriodicForces2_kernel<<<gpu->sim.bornForce2_blocks, gpu->sim.bornForce2_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pInteractingWorkUnit, numWithInteractions);
(sizeof(Atom)+sizeof(float3))*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pInteractingWorkUnit );
break;
}
LAUNCHERROR("kCalculateGBVIForces2");
//kPrintGBVI( gpu, "kCalculateGBVIForces2", 0, stderr);
}
......@@ -45,15 +45,16 @@ __launch_bounds__(GT2XX_BORNFORCE2_THREADS_PER_BLOCK, 1)
#else
__launch_bounds__(G8X_BORNFORCE2_THREADS_PER_BLOCK, 1)
#endif
METHOD_NAME(kCalculateGBVI, Forces2_kernel)(unsigned int* workUnit, unsigned int numWorkUnits)
METHOD_NAME(kCalculateGBVI, Forces2_kernel)(unsigned int* workUnit )
{
extern __shared__ Atom sA[];
unsigned int totalWarps = cSim.bornForce2_blocks*cSim.bornForce2_threads_per_block/GRID;
unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
unsigned int pos = warp*numWorkUnits/totalWarps;
unsigned int end = (warp+1)*numWorkUnits/totalWarps;
unsigned int totalWarps = cSim.bornForce2_blocks*cSim.bornForce2_threads_per_block/GRID;
unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
unsigned int numWorkUnits = cSim.pInteractionCount[0];
unsigned int pos = warp*numWorkUnits/totalWarps;
unsigned int end = (warp+1)*numWorkUnits/totalWarps;
#ifdef USE_CUTOFF
float3* tempBuffer = (float3*) &sA[cSim.bornForce2_threads_per_block];
float3* tempBuffer = (float3*) &sA[cSim.bornForce2_threads_per_block];
#endif
unsigned int lasty = -0xFFFFFFFF;
......
......@@ -35,9 +35,6 @@ using namespace std;
#include "gputypes.h"
#define UNROLLXX 0
#define UNROLLXY 0
struct Atom {
float x;
float y;
......@@ -105,19 +102,17 @@ void kReduceObcGbsaBornSum_kernel()
while (pos < cSim.atoms)
{
float sum = 0.0f;
float* pSt = cSim.pBornSum + pos;
float sum = 0.0f;
float* pSt = cSim.pBornSum + pos;
float2 atom = cSim.pObcData[pos];
// Get summed Born data
for (int i = 0; i < cSim.nonbondOutputBuffers; i++)
{
sum += *pSt;
// printf("%4d %4d A: %9.4f\n", pos, i, *pSt);
pSt += cSim.stride;
}
// Now calculate Born radius and OBC term.
sum *= 0.5f * atom.x;
float sum2 = sum * sum;
......@@ -127,20 +122,49 @@ void kReduceObcGbsaBornSum_kernel()
float bornRadius = 1.0f / (1.0f / atom.x - tanhSum / nonOffsetRadii);
float obcChain = atom.x * (cSim.alphaOBC - 2.0f * cSim.betaOBC * sum + 3.0f * cSim.gammaOBC * sum2);
obcChain = (1.0f - tanhSum * tanhSum) * obcChain / nonOffsetRadii;
cSim.pBornRadii[pos] = bornRadius;
cSim.pObcChain[pos] = obcChain;
pos += gridDim.x * blockDim.x;
cSim.pBornRadii[pos] = bornRadius;
cSim.pObcChain[pos] = obcChain;
pos += gridDim.x * blockDim.x;
}
}
void OPENMMCUDA_EXPORT kReduceObcGbsaBornSum(gpuContext gpu)
{
// printf("kReduceObcGbsaBornSum\n");
kReduceObcGbsaBornSum_kernel<<<gpu->sim.blocks, 384>>>();
gpu->bRecalculateBornRadii = false;
LAUNCHERROR("kReduceObcGbsaBornSum");
}
void kPrintObc( gpuContext gpu, std::string callId, int call, FILE* log)
{
gpu->psObcData->Download();
gpu->psBornRadii->Download();
gpu->psObcChain->Download();
gpu->psBornForce->Download();
gpu->psPosq4->Download();
gpu->psSigEps2->Download();
(void) fprintf( log, "kPrintObc Cuda bCh bR bF prm[2] sigeps[2]\n" );
(void) fprintf( stderr, "bOutputWarp=%u blks=%u th/blk=%u wu=%u %u shrd=%u\n", gpu->bOutputBufferPerWarp,
gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block, gpu->sim.workUnits, gpu->psWorkUnit->_pSysStream[0][0],
sizeof(Atom)*gpu->sim.nonbond_threads_per_block );
for( int ii = 0; ii < gpu->sim.paddedNumberOfAtoms; ii++ ){
(void) fprintf( log, "%6d %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e \n", ii,
gpu->psObcChain->_pSysData[ii],
gpu->psBornRadii->_pSysData[ii],
gpu->psBornForce->_pSysData[ii],
gpu->psObcData->_pSysData[ii].x,
gpu->psObcData->_pSysData[ii].y,
gpu->psSigEps2->_pSysData[ii].x,
gpu->psSigEps2->_pSysData[ii].y );
}
}
void OPENMMCUDA_EXPORT kCalculateObcGbsaBornSum(gpuContext gpu)
{
// printf("kCalculateObcgbsaBornSum\n");
......@@ -155,7 +179,9 @@ void OPENMMCUDA_EXPORT kCalculateObcGbsaBornSum(gpuContext gpu)
kCalculateObcGbsaN2BornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(Atom)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pWorkUnit);
break;
case CUTOFF:
if (gpu->bOutputBufferPerWarp)
kCalculateObcGbsaCutoffByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
......@@ -163,7 +189,9 @@ void OPENMMCUDA_EXPORT kCalculateObcGbsaBornSum(gpuContext gpu)
kCalculateObcGbsaCutoffBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
break;
case PERIODIC:
if (gpu->bOutputBufferPerWarp)
kCalculateObcGbsaPeriodicByWarpBornSum_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
......@@ -172,5 +200,6 @@ void OPENMMCUDA_EXPORT kCalculateObcGbsaBornSum(gpuContext gpu)
(sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
break;
}
LAUNCHERROR("kCalculateBornSum");
}
......@@ -41,15 +41,12 @@ __launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1)
void METHOD_NAME(kCalculateObcGbsa, BornSum_kernel)(unsigned int* workUnit)
{
extern __shared__ Atom sA[];
unsigned int totalWarps = cSim.nonbond_blocks*cSim.nonbond_threads_per_block/GRID;
unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
unsigned int totalWarps = cSim.nonbond_blocks*cSim.nonbond_threads_per_block/GRID;
unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
unsigned int numWorkUnits = cSim.pInteractionCount[0];
unsigned int pos = warp*numWorkUnits/totalWarps;
unsigned int end = (warp+1)*numWorkUnits/totalWarps;
unsigned int pos = warp*numWorkUnits/totalWarps;
unsigned int end = (warp+1)*numWorkUnits/totalWarps;
#ifdef USE_OUTPUT_BUFFER_PER_WARP
// unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
#endif
#ifdef USE_CUTOFF
float* tempBuffer = (float*) &sA[cSim.nonbond_threads_per_block];
#endif
......@@ -69,8 +66,8 @@ void METHOD_NAME(kCalculateObcGbsa, BornSum_kernel)(unsigned int* workUnit)
unsigned int tgx = threadIdx.x & (GRID - 1);
unsigned int tbx = threadIdx.x - tgx;
unsigned int tj = tgx;
Atom* psA = &sA[tbx];
unsigned int tj = tgx;
Atom* psA = &sA[tbx];
if (x == y) // Handle diagonals uniquely at 50% efficiency
{
......@@ -91,15 +88,15 @@ void METHOD_NAME(kCalculateObcGbsa, BornSum_kernel)(unsigned int* workUnit)
dy = psA[j].y - apos.y;
dz = psA[j].z - apos.z;
#ifdef USE_PERIODIC
dx -= floor(dx*cSim.invPeriodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy*cSim.invPeriodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz*cSim.invPeriodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
dx -= floor(dx*cSim.invPeriodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy*cSim.invPeriodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz*cSim.invPeriodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
#endif
r2 = dx * dx + dy * dy + dz * dz;
#if defined USE_PERIODIC
#if defined USE_CUTOFF
if (i < cSim.atoms && x+j < cSim.atoms && r2 < cSim.nonbondedCutoffSqr)
#elif defined USE_CUTOFF
if (r2 < cSim.nonbondedCutoffSqr)
#else
if (i < cSim.atoms && x+j < cSim.atoms )
#endif
{
r = sqrt(r2);
......@@ -118,7 +115,7 @@ void METHOD_NAME(kCalculateObcGbsa, BornSum_kernel)(unsigned int* workUnit)
(0.50f * rInverse * ratio) +
(0.25f * psA[j].sr * psA[j].sr * rInverse) *
(l_ij2 - u_ij2);
float rj = psA[j].r;
float rj = psA[j].sr;
if (ar.x < (rj - r))
{
apos.w += 2.0f * ((1.0f / ar.x) - l_ij);
......@@ -170,15 +167,15 @@ void METHOD_NAME(kCalculateObcGbsa, BornSum_kernel)(unsigned int* workUnit)
dy = psA[tj].y - apos.y;
dz = psA[tj].z - apos.z;
#ifdef USE_PERIODIC
dx -= floor(dx*cSim.invPeriodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy*cSim.invPeriodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz*cSim.invPeriodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
dx -= floor(dx*cSim.invPeriodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy*cSim.invPeriodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz*cSim.invPeriodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
#endif
r2 = dx * dx + dy * dy + dz * dz;
#ifdef USE_PERIODIC
#ifdef USE_CUTOFF
if (i < cSim.atoms && y+tj < cSim.atoms && r2 < cSim.nonbondedCutoffSqr)
#elif defined USE_CUTOFF
if (r2 < cSim.nonbondedCutoffSqr)
#else
if (i < cSim.atoms && y+tj < cSim.atoms)
#endif
{
r = sqrt(r2);
......@@ -243,15 +240,15 @@ void METHOD_NAME(kCalculateObcGbsa, BornSum_kernel)(unsigned int* workUnit)
dy = psA[j].y - apos.y;
dz = psA[j].z - apos.z;
#ifdef USE_PERIODIC
dx -= floor(dx*cSim.invPeriodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy*cSim.invPeriodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz*cSim.invPeriodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
dx -= floor(dx*cSim.invPeriodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy*cSim.invPeriodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz*cSim.invPeriodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
#endif
r2 = dx * dx + dy * dy + dz * dz;
#ifdef USE_PERIODIC
#ifdef USE_CUTOFF
if (i < cSim.atoms && y+j < cSim.atoms && r2 < cSim.nonbondedCutoffSqr)
#elif defined USE_CUTOFF
if (r2 < cSim.nonbondedCutoffSqr)
#else
if (i < cSim.atoms && y+j < cSim.atoms)
#endif
{
r = sqrt(r2);
......
......@@ -130,4 +130,6 @@ void OPENMMCUDA_EXPORT kCalculateObcGbsaForces2(gpuContext gpu)
break;
}
LAUNCHERROR("kCalculateObcGbsaForces2");
//kPrintObc( gpu, "Post kCalculateObcGbsaForces2", 0, stderr );
}
......@@ -223,7 +223,7 @@ void kReduceBornSumAndForces_kernel()
void kReduceBornSumAndForces(gpuContext gpu)
{
//printf("kReduceBornSumAndForces\n");
fprintf( stderr, "kReduceBornSumAndForces\n");
kReduceBornSumAndForces_kernel<<<gpu->sim.blocks, gpu->sim.bsf_reduce_threads_per_block>>>();
LAUNCHERROR("kReduceBornSumAndForces");
}
......@@ -428,8 +428,6 @@ void kReduceGBVIBornForces_kernel()
void kReduceObcGbsaBornForces(gpuContext gpu)
{
//fprintf( stderr, "kReduceObcGbsaBornForces %u %u %u %u %u\n", gpu->sim.blocks, gpu->sim.bsf_reduce_threads_per_block, GF1XX_THREADS_PER_BLOCK,
// GT2XX_THREADS_PER_BLOCK, G8X_THREADS_PER_BLOCK ); fflush( stderr );
if( gpu->bIncludeGBSA ){
kReduceObcGbsaBornForces_kernel<<<gpu->sim.blocks, gpu->sim.bsf_reduce_threads_per_block>>>();
LAUNCHERROR("kReduceObcGbsaBornForces");
......
......@@ -254,7 +254,7 @@ void CpuGBVI::computeBornRadii( const vector<RealVec>& atomCoordinates, RealOpen
}
RealOpenMM atomicRadius3 = POW( radiusI, minusThree );
if( _gbviParameters->getBornRadiusScalingMethod() == GBVIParameters::QuinticSpline ){
if( _gbviParameters->getBornRadiusScalingMethod() != GBVIParameters::QuinticSpline ){
sum = atomicRadius3 - sum;
bornRadii[atomI] = POW( sum, minusOneThird );
switchDeriviatives[atomI] = one;
......
......@@ -495,5 +495,69 @@ RealOpenMM CpuObc::computeBornEnergyForces( const vector<RealVec>& atomCoordinat
}
//printObc( atomCoordinates, partialCharges, bornRadii, bornForces, inputForces, "Obc Post loop2", stderr );
return obcEnergy;
}
/**---------------------------------------------------------------------------------------
Print Obc parameters, radii, forces, ...
@param atomCoordinates atomic coordinates
@param partialCharges partial charges
@param bornRadii Born radii (may be empty)
@param bornForces Born forces (may be empty)
@param forces forces (may be empty)
@param idString id string (who is calling)
@param log log file
--------------------------------------------------------------------------------------- */
void CpuObc::printObc( const std::vector<OpenMM::RealVec>& atomCoordinates,
const RealOpenMMVector& partialCharges,
const RealOpenMMVector& bornRadii,
const RealOpenMMVector& bornForces,
const std::vector<OpenMM::RealVec>& forces,
const std::string& idString, FILE* log ){
// ---------------------------------------------------------------------------------------
const ObcParameters* obcParameters = getObcParameters();
const int numberOfAtoms = obcParameters->getNumberOfAtoms();
const RealOpenMMVector& atomicRadii = obcParameters->getAtomicRadii();
const RealOpenMM preFactor = 2.0*obcParameters->getElectricConstant();
const RealOpenMMVector& obcChain = getObcChain();
const RealOpenMMVector& scaledRadiusFactor = obcParameters->getScaledRadiusFactors();
const RealOpenMM alphaObc = obcParameters->getAlphaObc();
const RealOpenMM betaObc = obcParameters->getBetaObc();
const RealOpenMM gammaObc = obcParameters->getGammaObc();
// ---------------------------------------------------------------------------------------
(void) fprintf( log, "Reference Obc %s atoms=%d\n", idString.c_str(), numberOfAtoms );
(void) fprintf( log, " preFactor %15.7e\n", preFactor );
(void) fprintf( log, " alpha %15.7e\n", alphaObc);
(void) fprintf( log, " beta %15.7e\n", betaObc);
(void) fprintf( log, " gamma %15.7e\n", gammaObc );
for( int atomI = 0; atomI < numberOfAtoms; atomI++ ){
(void) fprintf( log, "%6d r=%15.7e q=%6.3f", atomI,
atomicRadii[atomI], partialCharges[atomI] );
if( obcChain.size() > atomI ){
(void) fprintf( log, " bChn=%15.7e", obcChain[atomI] );
}
if( bornRadii.size() > atomI ){
(void) fprintf( log, " bR=%15.7e", bornRadii[atomI] );
}
if( bornForces.size() > atomI ){
(void) fprintf( log, " bF=%15.7e", bornForces[atomI] );
}
(void) fprintf( log, "\n" );
}
return;
}
......@@ -158,6 +158,27 @@ class CpuObc {
RealOpenMM computeBornEnergyForces( const std::vector<OpenMM::RealVec>& atomCoordinates,
const RealOpenMMVector& partialCharges, std::vector<OpenMM::RealVec>& forces );
/**---------------------------------------------------------------------------------------
Print Obc parameters, radii, forces, ...
@param atomCoordinates atomic coordinates
@param partialCharges partial charges
@param bornRadii Born radii (may be empty)
@param bornForces Born forces (may be empty)
@param forces forces (may be empty)
@param idString id string (who is calling)
@param log log file
--------------------------------------------------------------------------------------- */
void printObc( const std::vector<OpenMM::RealVec>& atomCoordinates,
const RealOpenMMVector& partialCharges,
const RealOpenMMVector& bornRadii,
const RealOpenMMVector& bornForces,
const std::vector<OpenMM::RealVec>& forces,
const std::string& idString, FILE* log );
};
// ---------------------------------------------------------------------------------------
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment