"platforms/cuda/vscode:/vscode.git/clone" did not exist on "af57bf9852d7d7de958a2c0db75213059a79e35f"
Commit d8d1e0f3 authored by Mark Friedrichs's avatar Mark Friedrichs
Browse files

Cleanup GBVI code

parent 65f48926
......@@ -247,6 +247,7 @@ void kPrintGBVI( gpuContext gpu, std::string callId, int call, FILE* log)
gpu->psGBVIData->Download();
gpu->psBornRadii->Download();
gpu->psGBVISwitchDerivative->Download();
gpu->psBornForce->Download();
gpu->psPosq4->Download();
gpu->psSigEps2->Download();
......@@ -255,11 +256,12 @@ void kPrintGBVI( gpuContext gpu, std::string callId, int call, FILE* log)
(void) fprintf( stderr, "kCalculateGBVIBornSum: bOutputBufferPerWarp=%u blks=%u th/blk=%u wu=%u %u shrd=%u\n", gpu->bOutputBufferPerWarp,
gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block, gpu->sim.workUnits, gpu->psWorkUnit->_pSysStream[0][0],
sizeof(Atom)*gpu->sim.nonbond_threads_per_block );
(void) fprintf( stderr, "bR bF swd r scR ...\n" );
for( int ii = 0; ii < gpu->sim.paddedNumberOfAtoms; ii++ ){
(void) fprintf( log, "%6d %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e \n", ii,
(void) fprintf( log, "%6d %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e\n", ii,
gpu->psBornRadii->_pSysData[ii],
gpu->psBornForce->_pSysData[ii],
gpu->psGBVISwitchDerivative->_pSysData[ii],
gpu->psGBVIData->_pSysData[ii].x,
gpu->psGBVIData->_pSysData[ii].y,
gpu->psGBVIData->_pSysData[ii].z,
......
......@@ -49,16 +49,14 @@ void METHOD_NAME(kCalculateGBVI, BornSum_kernel)(unsigned int* workUnit)
{
extern __shared__ volatile Atom sA[];
unsigned int totalWarps = cSim.nonbond_blocks*cSim.nonbond_threads_per_block/GRID;
unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
unsigned int numWorkUnits = cSim.pInteractionCount[0];
unsigned int pos = warp*numWorkUnits/totalWarps;
unsigned int end = (warp+1)*numWorkUnits/totalWarps;
// int end = workUnits / gridDim.x;
// int pos = end - (threadIdx.x >> GRIDBITS) - 1;
unsigned int totalWarps = cSim.nonbond_blocks*cSim.nonbond_threads_per_block/GRID;
unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
unsigned int numWorkUnits = cSim.pInteractionCount[0];
unsigned int pos = warp*numWorkUnits/totalWarps;
unsigned int end = (warp+1)*numWorkUnits/totalWarps;
#ifdef USE_CUTOFF
volatile float* tempBuffer = (volatile float*) &sA[cSim.nonbond_threads_per_block];
volatile float* tempBuffer = (volatile float*) &sA[cSim.nonbond_threads_per_block];
#endif
while ( pos < end )
......@@ -76,10 +74,10 @@ void METHOD_NAME(kCalculateGBVI, BornSum_kernel)(unsigned int* workUnit)
// forces tgx into interval [0,31]
// forces tbx 0
unsigned int tgx = threadIdx.x & (GRID - 1);
unsigned int tbx = threadIdx.x - tgx;
unsigned int tj = tgx;
volatile Atom* psA = &sA[tbx];
unsigned int tgx = threadIdx.x & (GRID - 1);
unsigned int tbx = threadIdx.x - tgx;
unsigned int tj = tgx;
volatile Atom* psA = &sA[tbx];
if (x == y) // Handle diagonals uniquely at 50% efficiency
{
......@@ -94,28 +92,25 @@ void METHOD_NAME(kCalculateGBVI, BornSum_kernel)(unsigned int* workUnit)
sA[threadIdx.x].sr = ar.y;
apos.w = 0.0f;
for (unsigned int j = 0; j < GRID; j++)
for (unsigned int j = 0; j < GRID; j++)
{
dx = psA[j].x - apos.x;
dy = psA[j].y - apos.y;
dz = psA[j].z - apos.z;
#ifdef USE_PERIODIC
dx -= floor(dx*cSim.invPeriodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy*cSim.invPeriodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz*cSim.invPeriodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
dx -= floor(dx*cSim.invPeriodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy*cSim.invPeriodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz*cSim.invPeriodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
#endif
r2 = dx * dx + dy * dy + dz * dz;
#if defined USE_PERIODIC
if (i < cSim.atoms && x+j < cSim.atoms && r2 < cSim.nonbondedCutoffSqr)
#elif defined USE_CUTOFF
if (r2 < cSim.nonbondedCutoffSqr)
r2 = dx * dx + dy * dy + dz * dz;
#if defined USE_CUTOFF
if (i < cSim.atoms && x+j < cSim.atoms && r2 < cSim.nonbondedCutoffSqr && j != tgx)
#else
if (i < cSim.atoms && x+j < cSim.atoms && j != tgx )
#endif
{
r = sqrt(r2);
if ((j != tgx) )
{
apos.w += getGBVI_Volume( r, ar.x, psA[j].sr );
}
apos.w += getGBVI_Volume( r, ar.x, psA[j].sr );
}
}
......@@ -127,27 +122,30 @@ void METHOD_NAME(kCalculateGBVI, BornSum_kernel)(unsigned int* workUnit)
unsigned int offset = x + tgx + (x >> GRIDBITS) * cSim.stride;
cSim.pBornSum[offset] = apos.w;
#endif
}
else // 100% utilization
{
} else {
// Read fixed atom data into registers and GRF
unsigned int j = y + tgx;
unsigned int i = x + tgx;
float4 temp = cSim.pPosq[j];
float4 temp1 = cSim.pGBVIData[j];
float4 apos = cSim.pPosq[i]; // Local atom x, y, z, sum
float4 ar = cSim.pGBVIData[i]; // Local atom vr, sr
sA[threadIdx.x].x = temp.x;
sA[threadIdx.x].y = temp.y;
sA[threadIdx.x].z = temp.z;
sA[threadIdx.x].r = temp1.x;
sA[threadIdx.x].sr = temp1.y;
sA[threadIdx.x].sum = apos.w = 0.0f;
sA[threadIdx.x].sum = 0.0f;
apos.w = 0.0f;
#ifdef USE_CUTOFF
//unsigned int flags = cSim.pInteractionFlag[pos + (blockIdx.x*workUnits)/gridDim.x];
unsigned int flags = cSim.pInteractionFlag[pos];
unsigned int flags = cSim.pInteractionFlag[pos];
if (flags == 0)
{
// No interactions in this block.
......@@ -163,22 +161,19 @@ void METHOD_NAME(kCalculateGBVI, BornSum_kernel)(unsigned int* workUnit)
dy = psA[tj].y - apos.y;
dz = psA[tj].z - apos.z;
#ifdef USE_PERIODIC
dx -= floor(dx*cSim.invPeriodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy*cSim.invPeriodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz*cSim.invPeriodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
dx -= floor(dx*cSim.invPeriodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy*cSim.invPeriodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz*cSim.invPeriodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
#endif
r2 = dx * dx + dy * dy + dz * dz;
#ifdef USE_PERIODIC
#ifdef USE_CUTOFF
if (i < cSim.atoms && y+tj < cSim.atoms && r2 < cSim.nonbondedCutoffSqr)
#elif defined USE_CUTOFF
if (r2 < cSim.nonbondedCutoffSqr)
#else
if (i < cSim.atoms && y+tj < cSim.atoms )
#endif
{
r = sqrt(r2);
// psA[tj].sr = Sj
// ar.x = Ri
apos.w += getGBVI_Volume( r, ar.x, psA[tj].sr );
psA[tj].sum += getGBVI_Volume( r, psA[tj].r, ar.y );
}
......@@ -199,19 +194,20 @@ void METHOD_NAME(kCalculateGBVI, BornSum_kernel)(unsigned int* workUnit)
dy = psA[j].y - apos.y;
dz = psA[j].z - apos.z;
#ifdef USE_PERIODIC
dx -= floor(dx*cSim.invPeriodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy*cSim.invPeriodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz*cSim.invPeriodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
dx -= floor(dx*cSim.invPeriodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy*cSim.invPeriodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz*cSim.invPeriodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
#endif
r2 = dx * dx + dy * dy + dz * dz;
#ifdef USE_PERIODIC
#ifdef USE_CUTOFF
if (i < cSim.atoms && y+j < cSim.atoms && r2 < cSim.nonbondedCutoffSqr)
#elif defined USE_CUTOFF
if (r2 < cSim.nonbondedCutoffSqr)
#else
if (i < cSim.atoms && y+j < cSim.atoms)
#endif
{
r = sqrt(r2);
tempBuffer[threadIdx.x] = getGBVI_Volume( r, psA[tj].r, ar.y );
apos.w += getGBVI_Volume( r, ar.x, psA[j].sr );
tempBuffer[threadIdx.x] = getGBVI_Volume( r, psA[j].r, ar.y );
}
// Sum the terms.
......
......@@ -36,6 +36,7 @@
#include <string>
#include <iostream>
#include <fstream>
#include <sstream>
using namespace std;
#include "gputypes.h"
......@@ -72,66 +73,6 @@ void GetCalculateGBVIForces2Sim(gpuContext gpu)
#include "kCalculateGBVIAux.h"
/**
* This file contains the kernel for evalauating the second stage of GBSA. It is included
* several times in kCalculateGBVIForces2.cu with different #defines to generate
* different versions of the kernels.
*/
__global__ void kCalculateGBVIForces2a_kernel()
{
unsigned int pos = (blockIdx.x * blockDim.x + threadIdx.x);
if( pos >= cSim.atoms )return;
float4 apos = cSim.pPosq[pos];
float4 ar = cSim.pGBVIData[pos];
float fb = cSim.pBornForce[pos];
unsigned int posJ = 0;
float4 force;
force.x = force.y = force.z = force.w = 0.0f;
while ( posJ < cSim.atoms )
{
float4 aposJ = cSim.pPosq[posJ];
float4 arJ = cSim.pGBVIData[posJ];
float fbJ = cSim.pBornForce[posJ];
float dx = aposJ.x - apos.x;
float dy = aposJ.y - apos.y;
float dz = aposJ.z - apos.z;
float r2 = dx * dx + dy * dy + dz * dz;
float r = sqrt(r2);
float dE = getGBVI_dE2( r, ar.x, arJ.y, fb );
dE = r > 1.0e-08f ? dE : 0.0f;
//dx = dy = dz = 1.0f;
float d = dx*dE;
force.x -= d;
d = dy*dE;
force.y -= d;
d = dz*dE;
force.z -= d;
#if 1
dE = getGBVI_dE2( r, arJ.x, ar.y, fbJ );
dE = r > 1.0e-08f ? dE : 0.0f;
d = dx*dE;
force.x -= d;
d = dy*dE;
force.y -= d;
d = dz*dE;
force.z -= d;
#endif
posJ += 1;
}
// Write results
cSim.pForce4[pos] = force;
}
// Include versions of the kernels for N^2 calculations.
#define METHOD_NAME(a, b) a##N2##b
......
......@@ -94,9 +94,9 @@ METHOD_NAME(kCalculateGBVI, Forces2_kernel)(unsigned int* workUnit )
float dy = psA[j].y - apos.y;
float dz = psA[j].z - apos.z;
#ifdef USE_PERIODIC
dx -= floor(dx*cSim.invPeriodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy*cSim.invPeriodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz*cSim.invPeriodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
dx -= floor(dx*cSim.invPeriodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy*cSim.invPeriodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz*cSim.invPeriodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
#endif
float r2 = dx * dx + dy * dy + dz * dz;
float r = sqrt(r2);
......@@ -104,18 +104,15 @@ METHOD_NAME(kCalculateGBVI, Forces2_kernel)(unsigned int* workUnit )
// Atom I Born forces and sum
float dE = getGBVI_dE2( r, ar.x, psA[j].sr, fb );
#if defined USE_PERIODIC
#if defined USE_CUTOFF
if (i >= cSim.atoms || x+j >= cSim.atoms || r2 > cSim.nonbondedCutoffSqr)
{
dE = 0.0f;
}
#else
if (i >= cSim.atoms || x+j >= cSim.atoms )
#endif
#if defined USE_CUTOFF
if (r2 > cSim.nonbondedCutoffSqr)
{
dE = 0.0f;
}
#endif
float d = dx * dE;
af.x -= d;
psA[j].fx += d;
......@@ -139,9 +136,9 @@ METHOD_NAME(kCalculateGBVI, Forces2_kernel)(unsigned int* workUnit )
of.y += af.y + sA[threadIdx.x].fy;
of.z += af.z + sA[threadIdx.x].fz;
cSim.pForce4[offset] = of;
}
else
{
} else {
// Read fixed atom data into registers and GRF
if (lasty != y)
{
......@@ -157,11 +154,12 @@ METHOD_NAME(kCalculateGBVI, Forces2_kernel)(unsigned int* workUnit )
sA[threadIdx.x].sr = temp1.y;
}
#ifdef USE_CUTOFF
unsigned int flags = cSim.pInteractionFlag[pos];
unsigned int flags = cSim.pInteractionFlag[pos];
if (flags == 0)
{
// No interactions in this block.
}
//else if (flags)
else if (flags == 0xFFFFFFFF)
#endif
{
......@@ -173,27 +171,23 @@ METHOD_NAME(kCalculateGBVI, Forces2_kernel)(unsigned int* workUnit )
float dy = psA[tj].y - apos.y;
float dz = psA[tj].z - apos.z;
#ifdef USE_PERIODIC
dx -= floor(dx*cSim.invPeriodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy*cSim.invPeriodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz*cSim.invPeriodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
dx -= floor(dx*cSim.invPeriodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy*cSim.invPeriodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz*cSim.invPeriodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
#endif
float r2 = dx * dx + dy * dy + dz * dz;
float r = sqrt(r2);
float dE = getGBVI_dE2( r, ar.x, psA[tj].sr, fb );
#if defined USE_PERIODIC
#if defined USE_CUTOFF
if (i >= cSim.atoms || y+tj >= cSim.atoms || r2 > cSim.nonbondedCutoffSqr)
{
dE = 0.0f;
}
#else
if (i >= cSim.atoms || y+tj >= cSim.atoms )
#endif
#if defined USE_CUTOFF
if (r2 > cSim.nonbondedCutoffSqr)
{
dE = 0.0f;
}
#endif
float d = dx * dE;
af.x -= d;
......@@ -208,18 +202,15 @@ METHOD_NAME(kCalculateGBVI, Forces2_kernel)(unsigned int* workUnit )
// Atom J Born sum term
dE = getGBVI_dE2( r, psA[tj].r, ar.y, psA[tj].fb );
#ifdef USE_PERIODIC
#if defined USE_CUTOFF
if (i >= cSim.atoms || y+tj >= cSim.atoms || r2 > cSim.nonbondedCutoffSqr)
{
dE = 0.0f;
}
#else
if (i >= cSim.atoms || y+tj >= cSim.atoms )
#endif
#if defined USE_CUTOFF
if (r2 > cSim.nonbondedCutoffSqr)
{
{
dE = 0.0f;
}
#endif
dx *= dE;
dy *= dE;
dz *= dE;
......@@ -245,9 +236,9 @@ METHOD_NAME(kCalculateGBVI, Forces2_kernel)(unsigned int* workUnit )
float dy = psA[j].y - apos.y;
float dz = psA[j].z - apos.z;
#ifdef USE_PERIODIC
dx -= floor(dx*cSim.invPeriodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy*cSim.invPeriodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz*cSim.invPeriodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
dx -= floor(dx*cSim.invPeriodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
dy -= floor(dy*cSim.invPeriodicBoxSizeY+0.5f)*cSim.periodicBoxSizeY;
dz -= floor(dz*cSim.invPeriodicBoxSizeZ+0.5f)*cSim.periodicBoxSizeZ;
#endif
float r2 = dx * dx + dy * dy + dz * dz;
float r = sqrt(r2);
......@@ -255,18 +246,14 @@ METHOD_NAME(kCalculateGBVI, Forces2_kernel)(unsigned int* workUnit )
// Interleaved Atom I and J Born Forces and sum components
float dE = getGBVI_dE2( r, ar.x, psA[j].sr, fb );
#if defined USE_PERIODIC
#if defined USE_CUTOFF
if (i >= cSim.atoms || y+j >= cSim.atoms || r2 > cSim.nonbondedCutoffSqr)
{
dE = 0.0f;
}
#else
if (i >= cSim.atoms || y+j >= cSim.atoms )
#endif
#if defined USE_CUTOFF
if (r2 > cSim.nonbondedCutoffSqr)
{
dE = 0.0f;
}
#endif
float d = dx * dE;
af.x -= d;
......@@ -280,19 +267,15 @@ METHOD_NAME(kCalculateGBVI, Forces2_kernel)(unsigned int* workUnit )
// Atom J Born sum term
dE = getGBVI_dE2( r, psA[j].r, ar.y, psA[j].fb );
#ifdef USE_PERIODIC
#if defined USE_CUTOFF
if (i >= cSim.atoms || y+j >= cSim.atoms || r2 > cSim.nonbondedCutoffSqr)
{
dE = 0.0f;
}
#else
if (i >= cSim.atoms || y+j >= cSim.atoms )
#endif
#if defined USE_CUTOFF
if (r2 > cSim.nonbondedCutoffSqr)
{
dE = 0.0f;
}
#endif
dx *= dE;
dy *= dE;
dz *= dE;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment