Commit 3f64d970 authored by Mark Friedrichs's avatar Mark Friedrichs
Browse files

GBSA OBC thread block size now based in part on available shared memory

parent 09c1460d
......@@ -24,6 +24,10 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "openmm/OpenMMException.h"
#include "gputypes.h"
#include "freeEnergyGpuTypes.h"
#include <stdio.h>
#include <cuda.h>
#include <vector_functions.h>
......@@ -33,9 +37,6 @@
#include <fstream>
using namespace std;
#include "gputypes.h"
#include "freeEnergyGpuTypes.h"
struct Atom {
float x;
float y;
......@@ -99,38 +100,53 @@ void SetCalculateObcGbsaSoftcoreForces2Sim( freeEnergyGpuContext freeEnergyGpu )
void kCalculateObcGbsaSoftcoreForces2( freeEnergyGpuContext freeEnergyGpu )
{
//printf("kCalculateObcGbsaSoftcoreForces2\n");
unsigned int threadsPerBlock;
static unsigned int threadsPerBlockPerMethod[3] = { 0, 0, 0 };
static unsigned int natoms[3] = { 0, 0, 0 };
gpuContext gpu = freeEnergyGpu->gpuContext;
unsigned int methodIndex = static_cast<unsigned int>(freeEnergyGpu->freeEnergySim.nonbondedMethod);
if( methodIndex > 2 ){
throw OpenMM::OpenMMException( "kCalculateObcGbsaSoftcoreForces2 method index invalid." );
}
if( natoms[methodIndex] != gpu->natoms ){
unsigned int extra = methodIndex == 0 ? 0 : sizeof(float3);
threadsPerBlockPerMethod[methodIndex] = std::min(getThreadsPerBlockFEP( freeEnergyGpu, (sizeof(Atom) + extra), gpu->sharedMemoryPerBlock ), gpu->sim.nonbond_threads_per_block );
natoms[methodIndex] = gpu->natoms;
}
threadsPerBlock = threadsPerBlockPerMethod[methodIndex];
switch (freeEnergyGpu->freeEnergySim.nonbondedMethod)
{
case FREE_ENERGY_NO_CUTOFF:
if (gpu->bOutputBufferPerWarp)
kCalculateObcGbsaSoftcoreN2ByWarpForces2_kernel<<<gpu->sim.bornForce2_blocks, gpu->sim.bornForce2_threads_per_block,
sizeof(Atom)*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pWorkUnit);
kCalculateObcGbsaSoftcoreN2ByWarpForces2_kernel<<<gpu->sim.bornForce2_blocks, threadsPerBlock,
sizeof(Atom)*threadsPerBlock>>>(gpu->sim.pWorkUnit);
else
kCalculateObcGbsaSoftcoreN2Forces2_kernel<<<gpu->sim.bornForce2_blocks, gpu->sim.bornForce2_threads_per_block,
sizeof(Atom)*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pWorkUnit);
kCalculateObcGbsaSoftcoreN2Forces2_kernel<<<gpu->sim.bornForce2_blocks, threadsPerBlock,
sizeof(Atom)*threadsPerBlock>>>(gpu->sim.pWorkUnit);
break;
case FREE_ENERGY_CUTOFF:
if (gpu->bOutputBufferPerWarp)
kCalculateObcGbsaSoftcoreCutoffByWarpForces2_kernel<<<gpu->sim.bornForce2_blocks, gpu->sim.bornForce2_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
kCalculateObcGbsaSoftcoreCutoffByWarpForces2_kernel<<<gpu->sim.bornForce2_blocks, threadsPerBlock,
(sizeof(Atom)+sizeof(float3))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit);
else
kCalculateObcGbsaSoftcoreCutoffForces2_kernel<<<gpu->sim.bornForce2_blocks, gpu->sim.bornForce2_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
kCalculateObcGbsaSoftcoreCutoffForces2_kernel<<<gpu->sim.bornForce2_blocks, threadsPerBlock,
(sizeof(Atom)+sizeof(float3))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit);
break;
case FREE_ENERGY_PERIODIC:
if (gpu->bOutputBufferPerWarp)
kCalculateObcGbsaSoftcorePeriodicByWarpForces2_kernel<<<gpu->sim.bornForce2_blocks, gpu->sim.bornForce2_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
kCalculateObcGbsaSoftcorePeriodicByWarpForces2_kernel<<<gpu->sim.bornForce2_blocks, threadsPerBlock,
(sizeof(Atom)+sizeof(float3))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit);
else
kCalculateObcGbsaSoftcorePeriodicForces2_kernel<<<gpu->sim.bornForce2_blocks, gpu->sim.bornForce2_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
kCalculateObcGbsaSoftcorePeriodicForces2_kernel<<<gpu->sim.bornForce2_blocks, threadsPerBlock,
(sizeof(Atom)+sizeof(float3))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit);
break;
}
LAUNCHERROR("kCalculateObcGbsaSoftcoreForces2");
......
......@@ -41,13 +41,18 @@ __launch_bounds__(G8X_BORNFORCE2_THREADS_PER_BLOCK, 1)
void METHOD_NAME(kCalculateObcGbsaSoftcore, Forces2_kernel)(unsigned int* workUnit)
{
extern __shared__ Atom sA[];
unsigned int totalWarps = cSim.bornForce2_blocks*cSim.bornForce2_threads_per_block/GRID;
unsigned int totalWarps = gridDim.x*blockDim.x/GRID;
unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
//unsigned int totalWarps = cSim.bornForce2_blocks*cSim.bornForce2_threads_per_block/GRID;
//unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
unsigned int numWorkUnits = cSim.pInteractionCount[0];
unsigned int pos = warp*numWorkUnits/totalWarps;
unsigned int end = (warp+1)*numWorkUnits/totalWarps;
#ifdef USE_CUTOFF
float3* tempBuffer = (float3*) &sA[cSim.bornForce2_threads_per_block];
//float3* tempBuffer = (float3*) &sA[cSim.bornForce2_threads_per_block];
float3* tempBuffer = (float3*) &sA[blockDim.x];
#endif
unsigned int lasty = -0xFFFFFFFF;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment