Commit f5ea8297 authored by Peter Eastman's avatar Peter Eastman
Browse files

Optimization (clear the force and Born sum buffers in a single kernel)

parent 2cb112f0
......@@ -47,6 +47,9 @@ void CudaCalcForcesAndEnergyKernel::beginForceComputation(ContextImpl& context)
if (data.nonbondedMethod != NO_CUTOFF && data.computeForceCount%100 == 0)
gpuReorderAtoms(gpu);
data.computeForceCount++;
if (gpu->bIncludeGBSA || gpu->bIncludeGBVI)
kClearBornSumAndForces(gpu);
else
kClearForces(gpu);
}
......@@ -77,6 +80,8 @@ void CudaCalcForcesAndEnergyKernel::beginEnergyComputation(ContextImpl& context)
gpuReorderAtoms(gpu);
data.stepCount++;
kClearEnergy(gpu);
if (gpu->bIncludeGBSA || gpu->bIncludeGBVI)
kClearBornSumAndForces(gpu);
}
double CudaCalcForcesAndEnergyKernel::finishEnergyComputation(ContextImpl& context) {
......@@ -790,7 +795,9 @@ void OPENMMCUDA_EXPORT OpenMM::cudaOpenMMInitializeIntegration(const System& sys
gpuBuildExclusionList(gpu);
gpuBuildOutputBuffers(gpu);
gpuSetConstants(gpu);
kClearBornForces(gpu);
if (gpu->bIncludeGBSA || gpu->bIncludeGBVI)
kClearBornSumAndForces(gpu);
else
kClearForces(gpu);
cudaThreadSynchronize();
}
......
......@@ -29,7 +29,7 @@
// Initialization
extern void kClearForces(gpuContext gpu);
extern void kClearEnergy(gpuContext gpu);
extern void kClearBornForces(gpuContext gpu);
extern void kClearBornSumAndForces(gpuContext gpu);
extern void kClearObcGbsaBornSum(gpuContext gpu);
extern void kCalculateObcGbsaBornSum(gpuContext gpu);
extern void kReduceObcGbsaBornSum(gpuContext gpu);
......
......@@ -134,10 +134,6 @@ extern void kCalculatePME(gpuContext gpu);
void kCalculateCDLJObcGbsaForces1(gpuContext gpu)
{
// printf("kCalculateCDLJObcGbsaForces1\n");
// check if Born radii need to be calculated
kClearBornForces(gpu);
switch (gpu->sim.nonbondedMethod)
{
case NO_CUTOFF:
......
......@@ -102,21 +102,6 @@ void GetCalculateGBVIBornSumSim(gpuContext gpu)
#define METHOD_NAME(a, b) a##PeriodicByWarp##b
#include "kCalculateGBVIBornSum.h"
__global__ void kClearGBVIBornSum_kernel()
{
unsigned int pos = blockIdx.x * blockDim.x + threadIdx.x;
while (pos < cSim.stride * cSim.nonbondOutputBuffers)
{
((float*)cSim.pBornSum)[pos] = 0.0f;
pos += gridDim.x * blockDim.x;
}
}
void kClearGBVIBornSum(gpuContext gpu) {
kClearGBVIBornSum_kernel<<<gpu->sim.blocks, 384>>>();
}
__global__ void kReduceGBVIBornSum_kernel()
{
unsigned int pos = (blockIdx.x * blockDim.x + threadIdx.x);
......@@ -177,8 +162,6 @@ void kReduceGBVIBornSum(gpuContext gpu)
void kCalculateGBVIBornSum(gpuContext gpu)
{
//printf("kCalculateGBVIBornSum\n");
kClearGBVIBornSum( gpu );
LAUNCHERROR("kClearBornSum");
//size_t numWithInteractions;
switch (gpu->sim.nonbondedMethod)
{
......
......@@ -97,19 +97,6 @@ void GetCalculateObcGbsaBornSumSim(gpuContext gpu)
#define METHOD_NAME(a, b) a##PeriodicByWarp##b
#include "kCalculateObcGbsaBornSum.h"
__global__
__launch_bounds__(384, 1)
void kClearObcGbsaBornSum_kernel()
{
unsigned int pos = blockIdx.x * blockDim.x + threadIdx.x;
while (pos < cSim.stride * cSim.nonbondOutputBuffers)
{
((float*)cSim.pBornSum)[pos] = 0.0f;
pos += gridDim.x * blockDim.x;
}
}
__global__
__launch_bounds__(384, 1)
void kReduceObcGbsaBornSum_kernel()
......@@ -154,17 +141,9 @@ void kReduceObcGbsaBornSum(gpuContext gpu)
LAUNCHERROR("kReduceObcGbsaBornSum");
}
extern void kClearObcGbsaBornSum(gpuContext gpu)
{
// printf("kClearObcGbsaBornSum\n");
kClearObcGbsaBornSum_kernel<<<gpu->sim.blocks, 384>>>();
}
void kCalculateObcGbsaBornSum(gpuContext gpu)
{
// printf("kCalculateObcgbsaBornSum\n");
kClearObcGbsaBornSum(gpu);
LAUNCHERROR("kClearBornSum");
switch (gpu->sim.nonbondedMethod)
{
case NO_CUTOFF:
......
......@@ -74,21 +74,28 @@ void kClearForces(gpuContext gpu)
__global__
__launch_bounds__(384, 1)
void kClearBornForces_kernel()
void kClearBornSumAndForces_kernel()
{
unsigned int pos = blockIdx.x * blockDim.x + threadIdx.x;
while (pos < cSim.stride * cSim.nonbondOutputBuffers)
{
((float*)cSim.pBornForce)[pos] = 0.0f;
cSim.pBornSum[pos] = 0.0f;
cSim.pBornForce[pos] = 0.0f;
cSim.pForce4[pos] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
pos += gridDim.x * blockDim.x;
}
while (pos < cSim.stride * cSim.outputBuffers)
{
cSim.pForce4[pos] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
pos += gridDim.x * blockDim.x;
}
}
void kClearBornForces(gpuContext gpu)
void kClearBornSumAndForces(gpuContext gpu)
{
// printf("kClearBornForces\n");
kClearBornForces_kernel<<<gpu->sim.blocks, 384>>>();
LAUNCHERROR("kClearBornForces");
// printf("kClearBornSumAndForces\n");
kClearBornSumAndForces_kernel<<<gpu->sim.blocks, 384>>>();
LAUNCHERROR("kClearBornSumAndForces");
}
__global__
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment