"platforms/cuda/tests/TestCudaCheckpoints.cpp" did not exist on "17ae3aaeaa7c0518f3ec4a546f0f718f07dd06b3"
Commit f5ea8297 authored by Peter Eastman's avatar Peter Eastman
Browse files

Optimization (clear the force and Born sum buffers in a single kernel)

parent 2cb112f0
......@@ -47,7 +47,10 @@ void CudaCalcForcesAndEnergyKernel::beginForceComputation(ContextImpl& context)
if (data.nonbondedMethod != NO_CUTOFF && data.computeForceCount%100 == 0)
gpuReorderAtoms(gpu);
data.computeForceCount++;
kClearForces(gpu);
if (gpu->bIncludeGBSA || gpu->bIncludeGBVI)
kClearBornSumAndForces(gpu);
else
kClearForces(gpu);
}
void CudaCalcForcesAndEnergyKernel::finishForceComputation(ContextImpl& context) {
......@@ -77,6 +80,8 @@ void CudaCalcForcesAndEnergyKernel::beginEnergyComputation(ContextImpl& context)
gpuReorderAtoms(gpu);
data.stepCount++;
kClearEnergy(gpu);
if (gpu->bIncludeGBSA || gpu->bIncludeGBVI)
kClearBornSumAndForces(gpu);
}
double CudaCalcForcesAndEnergyKernel::finishEnergyComputation(ContextImpl& context) {
......@@ -790,8 +795,10 @@ void OPENMMCUDA_EXPORT OpenMM::cudaOpenMMInitializeIntegration(const System& sys
gpuBuildExclusionList(gpu);
gpuBuildOutputBuffers(gpu);
gpuSetConstants(gpu);
kClearBornForces(gpu);
kClearForces(gpu);
if (gpu->bIncludeGBSA || gpu->bIncludeGBVI)
kClearBornSumAndForces(gpu);
else
kClearForces(gpu);
cudaThreadSynchronize();
}
......
......@@ -29,7 +29,7 @@
// Initialization
extern void kClearForces(gpuContext gpu);
extern void kClearEnergy(gpuContext gpu);
extern void kClearBornForces(gpuContext gpu);
extern void kClearBornSumAndForces(gpuContext gpu);
extern void kClearObcGbsaBornSum(gpuContext gpu);
extern void kCalculateObcGbsaBornSum(gpuContext gpu);
extern void kReduceObcGbsaBornSum(gpuContext gpu);
......
......@@ -134,10 +134,6 @@ extern void kCalculatePME(gpuContext gpu);
void kCalculateCDLJObcGbsaForces1(gpuContext gpu)
{
// printf("kCalculateCDLJObcGbsaForces1\n");
// check if Born radii need to be calculated
kClearBornForces(gpu);
switch (gpu->sim.nonbondedMethod)
{
case NO_CUTOFF:
......
......@@ -102,21 +102,6 @@ void GetCalculateGBVIBornSumSim(gpuContext gpu)
#define METHOD_NAME(a, b) a##PeriodicByWarp##b
#include "kCalculateGBVIBornSum.h"
__global__ void kClearGBVIBornSum_kernel()
{
unsigned int pos = blockIdx.x * blockDim.x + threadIdx.x;
while (pos < cSim.stride * cSim.nonbondOutputBuffers)
{
((float*)cSim.pBornSum)[pos] = 0.0f;
pos += gridDim.x * blockDim.x;
}
}
void kClearGBVIBornSum(gpuContext gpu) {
kClearGBVIBornSum_kernel<<<gpu->sim.blocks, 384>>>();
}
__global__ void kReduceGBVIBornSum_kernel()
{
unsigned int pos = (blockIdx.x * blockDim.x + threadIdx.x);
......@@ -177,8 +162,6 @@ void kReduceGBVIBornSum(gpuContext gpu)
void kCalculateGBVIBornSum(gpuContext gpu)
{
//printf("kCalculateGBVIBornSum\n");
kClearGBVIBornSum( gpu );
LAUNCHERROR("kClearBornSum");
//size_t numWithInteractions;
switch (gpu->sim.nonbondedMethod)
{
......
......@@ -97,19 +97,6 @@ void GetCalculateObcGbsaBornSumSim(gpuContext gpu)
#define METHOD_NAME(a, b) a##PeriodicByWarp##b
#include "kCalculateObcGbsaBornSum.h"
__global__
__launch_bounds__(384, 1)
void kClearObcGbsaBornSum_kernel()
{
unsigned int pos = blockIdx.x * blockDim.x + threadIdx.x;
while (pos < cSim.stride * cSim.nonbondOutputBuffers)
{
((float*)cSim.pBornSum)[pos] = 0.0f;
pos += gridDim.x * blockDim.x;
}
}
__global__
__launch_bounds__(384, 1)
void kReduceObcGbsaBornSum_kernel()
......@@ -154,17 +141,9 @@ void kReduceObcGbsaBornSum(gpuContext gpu)
LAUNCHERROR("kReduceObcGbsaBornSum");
}
extern void kClearObcGbsaBornSum(gpuContext gpu)
{
// printf("kClearObcGbsaBornSum\n");
kClearObcGbsaBornSum_kernel<<<gpu->sim.blocks, 384>>>();
}
void kCalculateObcGbsaBornSum(gpuContext gpu)
{
// printf("kCalculateObcgbsaBornSum\n");
kClearObcGbsaBornSum(gpu);
LAUNCHERROR("kClearBornSum");
switch (gpu->sim.nonbondedMethod)
{
case NO_CUTOFF:
......
......@@ -53,8 +53,8 @@ void GetForcesSim(gpuContext gpu)
RTERROR(status, "cudaMemcpyFromSymbol: GetForcesSim copy from cSim failed");
}
__global__
__launch_bounds__(384, 1)
__global__
__launch_bounds__(384, 1)
void kClearForces_kernel()
{
unsigned int pos = blockIdx.x * blockDim.x + threadIdx.x;
......@@ -72,27 +72,34 @@ void kClearForces(gpuContext gpu)
LAUNCHERROR("kClearForces");
}
__global__
__launch_bounds__(384, 1)
void kClearBornForces_kernel()
__global__
__launch_bounds__(384, 1)
void kClearBornSumAndForces_kernel()
{
unsigned int pos = blockIdx.x * blockDim.x + threadIdx.x;
while (pos < cSim.stride * cSim.nonbondOutputBuffers)
{
((float*)cSim.pBornForce)[pos] = 0.0f;
cSim.pBornSum[pos] = 0.0f;
cSim.pBornForce[pos] = 0.0f;
cSim.pForce4[pos] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
pos += gridDim.x * blockDim.x;
}
while (pos < cSim.stride * cSim.outputBuffers)
{
cSim.pForce4[pos] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
pos += gridDim.x * blockDim.x;
}
}
void kClearBornForces(gpuContext gpu)
void kClearBornSumAndForces(gpuContext gpu)
{
// printf("kClearBornForces\n");
kClearBornForces_kernel<<<gpu->sim.blocks, 384>>>();
LAUNCHERROR("kClearBornForces");
// printf("kClearBornSumAndForces\n");
kClearBornSumAndForces_kernel<<<gpu->sim.blocks, 384>>>();
LAUNCHERROR("kClearBornSumAndForces");
}
__global__
__launch_bounds__(384, 1)
__global__
__launch_bounds__(384, 1)
void kClearEnergy_kernel()
{
unsigned int pos = blockIdx.x * blockDim.x + threadIdx.x;
......@@ -110,15 +117,15 @@ void kClearEnergy(gpuContext gpu)
LAUNCHERROR("kClearEnergy");
}
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
#endif
void kReduceBornSumAndForces_kernel()
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
#endif
void kReduceBornSumAndForces_kernel()
{
unsigned int pos = (blockIdx.x * blockDim.x + threadIdx.x);
......@@ -221,14 +228,14 @@ void kReduceBornSumAndForces(gpuContext gpu)
LAUNCHERROR("kReduceBornSumAndForces");
}
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
#endif
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
#endif
void kReduceForces_kernel()
{
unsigned int pos = (blockIdx.x * blockDim.x + threadIdx.x);
......@@ -291,15 +298,15 @@ double kReduceEnergy(gpuContext gpu)
return sum;
}
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_UPDATE_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__(GT2XX_UPDATE_THREADS_PER_BLOCK, 1)
#else
__launch_bounds__(G8X_UPDATE_THREADS_PER_BLOCK, 1)
#endif
void kReduceObcGbsaBornForces_kernel()
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_UPDATE_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__(GT2XX_UPDATE_THREADS_PER_BLOCK, 1)
#else
__launch_bounds__(G8X_UPDATE_THREADS_PER_BLOCK, 1)
#endif
void kReduceObcGbsaBornForces_kernel()
{
unsigned int pos = (blockIdx.x * blockDim.x + threadIdx.x);
float energy = 0.0f;
......@@ -357,15 +364,15 @@ void kReduceObcGbsaBornForces_kernel()
cSim.pEnergy[blockIdx.x * blockDim.x + threadIdx.x] += energy / -6.0f;
}
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_UPDATE_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__(GT2XX_UPDATE_THREADS_PER_BLOCK, 1)
#else
__launch_bounds__(G8X_UPDATE_THREADS_PER_BLOCK, 1)
#endif
void kReduceGBVIBornForces_kernel()
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_UPDATE_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__(GT2XX_UPDATE_THREADS_PER_BLOCK, 1)
#else
__launch_bounds__(G8X_UPDATE_THREADS_PER_BLOCK, 1)
#endif
void kReduceGBVIBornForces_kernel()
{
unsigned int pos = (blockIdx.x * blockDim.x + threadIdx.x);
float energy = 0.0f;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment