Commit 3f64d970 authored by Mark Friedrichs's avatar Mark Friedrichs
Browse files

GBSA OBC thread block size now based in part on available shared memory

parent 09c1460d
...@@ -24,6 +24,10 @@ ...@@ -24,6 +24,10 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * * along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */ * -------------------------------------------------------------------------- */
#include "openmm/OpenMMException.h"
#include "gputypes.h"
#include "freeEnergyGpuTypes.h"
#include <stdio.h> #include <stdio.h>
#include <cuda.h> #include <cuda.h>
#include <vector_functions.h> #include <vector_functions.h>
...@@ -33,9 +37,6 @@ ...@@ -33,9 +37,6 @@
#include <fstream> #include <fstream>
using namespace std; using namespace std;
#include "gputypes.h"
#include "freeEnergyGpuTypes.h"
struct Atom { struct Atom {
float x; float x;
float y; float y;
...@@ -99,38 +100,53 @@ void SetCalculateObcGbsaSoftcoreForces2Sim( freeEnergyGpuContext freeEnergyGpu ) ...@@ -99,38 +100,53 @@ void SetCalculateObcGbsaSoftcoreForces2Sim( freeEnergyGpuContext freeEnergyGpu )
void kCalculateObcGbsaSoftcoreForces2( freeEnergyGpuContext freeEnergyGpu ) void kCalculateObcGbsaSoftcoreForces2( freeEnergyGpuContext freeEnergyGpu )
{ {
//printf("kCalculateObcGbsaSoftcoreForces2\n"); unsigned int threadsPerBlock;
gpuContext gpu = freeEnergyGpu->gpuContext; static unsigned int threadsPerBlockPerMethod[3] = { 0, 0, 0 };
static unsigned int natoms[3] = { 0, 0, 0 };
gpuContext gpu = freeEnergyGpu->gpuContext;
unsigned int methodIndex = static_cast<unsigned int>(freeEnergyGpu->freeEnergySim.nonbondedMethod);
if( methodIndex > 2 ){
throw OpenMM::OpenMMException( "kCalculateObcGbsaSoftcoreForces2 method index invalid." );
}
if( natoms[methodIndex] != gpu->natoms ){
unsigned int extra = methodIndex == 0 ? 0 : sizeof(float3);
threadsPerBlockPerMethod[methodIndex] = std::min(getThreadsPerBlockFEP( freeEnergyGpu, (sizeof(Atom) + extra), gpu->sharedMemoryPerBlock ), gpu->sim.nonbond_threads_per_block );
natoms[methodIndex] = gpu->natoms;
}
threadsPerBlock = threadsPerBlockPerMethod[methodIndex];
switch (freeEnergyGpu->freeEnergySim.nonbondedMethod) switch (freeEnergyGpu->freeEnergySim.nonbondedMethod)
{ {
case FREE_ENERGY_NO_CUTOFF: case FREE_ENERGY_NO_CUTOFF:
if (gpu->bOutputBufferPerWarp) if (gpu->bOutputBufferPerWarp)
kCalculateObcGbsaSoftcoreN2ByWarpForces2_kernel<<<gpu->sim.bornForce2_blocks, gpu->sim.bornForce2_threads_per_block, kCalculateObcGbsaSoftcoreN2ByWarpForces2_kernel<<<gpu->sim.bornForce2_blocks, threadsPerBlock,
sizeof(Atom)*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pWorkUnit); sizeof(Atom)*threadsPerBlock>>>(gpu->sim.pWorkUnit);
else else
kCalculateObcGbsaSoftcoreN2Forces2_kernel<<<gpu->sim.bornForce2_blocks, gpu->sim.bornForce2_threads_per_block, kCalculateObcGbsaSoftcoreN2Forces2_kernel<<<gpu->sim.bornForce2_blocks, threadsPerBlock,
sizeof(Atom)*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pWorkUnit); sizeof(Atom)*threadsPerBlock>>>(gpu->sim.pWorkUnit);
break; break;
case FREE_ENERGY_CUTOFF: case FREE_ENERGY_CUTOFF:
if (gpu->bOutputBufferPerWarp) if (gpu->bOutputBufferPerWarp)
kCalculateObcGbsaSoftcoreCutoffByWarpForces2_kernel<<<gpu->sim.bornForce2_blocks, gpu->sim.bornForce2_threads_per_block, kCalculateObcGbsaSoftcoreCutoffByWarpForces2_kernel<<<gpu->sim.bornForce2_blocks, threadsPerBlock,
(sizeof(Atom)+sizeof(float3))*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pInteractingWorkUnit); (sizeof(Atom)+sizeof(float3))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit);
else else
kCalculateObcGbsaSoftcoreCutoffForces2_kernel<<<gpu->sim.bornForce2_blocks, gpu->sim.bornForce2_threads_per_block, kCalculateObcGbsaSoftcoreCutoffForces2_kernel<<<gpu->sim.bornForce2_blocks, threadsPerBlock,
(sizeof(Atom)+sizeof(float3))*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pInteractingWorkUnit); (sizeof(Atom)+sizeof(float3))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit);
break; break;
case FREE_ENERGY_PERIODIC: case FREE_ENERGY_PERIODIC:
if (gpu->bOutputBufferPerWarp) if (gpu->bOutputBufferPerWarp)
kCalculateObcGbsaSoftcorePeriodicByWarpForces2_kernel<<<gpu->sim.bornForce2_blocks, gpu->sim.bornForce2_threads_per_block, kCalculateObcGbsaSoftcorePeriodicByWarpForces2_kernel<<<gpu->sim.bornForce2_blocks, threadsPerBlock,
(sizeof(Atom)+sizeof(float3))*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pInteractingWorkUnit); (sizeof(Atom)+sizeof(float3))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit);
else else
kCalculateObcGbsaSoftcorePeriodicForces2_kernel<<<gpu->sim.bornForce2_blocks, gpu->sim.bornForce2_threads_per_block, kCalculateObcGbsaSoftcorePeriodicForces2_kernel<<<gpu->sim.bornForce2_blocks, threadsPerBlock,
(sizeof(Atom)+sizeof(float3))*gpu->sim.bornForce2_threads_per_block>>>(gpu->sim.pInteractingWorkUnit); (sizeof(Atom)+sizeof(float3))*threadsPerBlock>>>(gpu->sim.pInteractingWorkUnit);
break; break;
} }
LAUNCHERROR("kCalculateObcGbsaSoftcoreForces2"); LAUNCHERROR("kCalculateObcGbsaSoftcoreForces2");
......
...@@ -41,16 +41,21 @@ __launch_bounds__(G8X_BORNFORCE2_THREADS_PER_BLOCK, 1) ...@@ -41,16 +41,21 @@ __launch_bounds__(G8X_BORNFORCE2_THREADS_PER_BLOCK, 1)
void METHOD_NAME(kCalculateObcGbsaSoftcore, Forces2_kernel)(unsigned int* workUnit) void METHOD_NAME(kCalculateObcGbsaSoftcore, Forces2_kernel)(unsigned int* workUnit)
{ {
extern __shared__ Atom sA[]; extern __shared__ Atom sA[];
unsigned int totalWarps = cSim.bornForce2_blocks*cSim.bornForce2_threads_per_block/GRID; unsigned int totalWarps = gridDim.x*blockDim.x/GRID;
unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID; unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
//unsigned int totalWarps = cSim.bornForce2_blocks*cSim.bornForce2_threads_per_block/GRID;
//unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
unsigned int numWorkUnits = cSim.pInteractionCount[0]; unsigned int numWorkUnits = cSim.pInteractionCount[0];
unsigned int pos = warp*numWorkUnits/totalWarps; unsigned int pos = warp*numWorkUnits/totalWarps;
unsigned int end = (warp+1)*numWorkUnits/totalWarps; unsigned int end = (warp+1)*numWorkUnits/totalWarps;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
float3* tempBuffer = (float3*) &sA[cSim.bornForce2_threads_per_block]; //float3* tempBuffer = (float3*) &sA[cSim.bornForce2_threads_per_block];
float3* tempBuffer = (float3*) &sA[blockDim.x];
#endif #endif
unsigned int lasty = -0xFFFFFFFF; unsigned int lasty = -0xFFFFFFFF;
while (pos < end) while (pos < end)
{ {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment