Commit 117436ed authored by Peter Eastman's avatar Peter Eastman
Browse files

Prevent custom forces from trying to use more shared memory than is available

parent 2d681361
......@@ -1871,6 +1871,7 @@ void* gpuInit(int numAtoms, unsigned int device, bool useBlockingSync)
gpu->sim.nonbond_blocks = deviceProp.multiProcessorCount*gpu->blocksPerSM;
gpu->sim.bornForce2_blocks = deviceProp.multiProcessorCount*gpu->blocksPerSM;
gpu->sim.blocks = deviceProp.multiProcessorCount;
gpu->sharedMemoryPerBlock = deviceProp.sharedMemPerBlock;
gpu->sim.shake_threads_per_block = gpu->sim.max_shake_threads_per_block;
gpu->sim.localForces_threads_per_block = gpu->sim.max_localForces_threads_per_block;
......
......@@ -72,6 +72,7 @@ struct _gpuContext {
gpuAtomType* gpAtomTable;
int gAtomTypes;
unsigned int blocksPerSM;
unsigned int sharedMemoryPerBlock;
cudaGmxSimulation sim;
unsigned int* pOutputBufferCounter;
std::vector<std::vector<int> > exclusions;
......
......@@ -156,7 +156,9 @@ void kCalculateCustomAngleForces_kernel()
void kCalculateCustomAngleForces(gpuContext gpu)
{
// printf("kCalculateCustomAngleForces\n");
kCalculateCustomAngleForces_kernel<<<gpu->sim.blocks, gpu->sim.localForces_threads_per_block,
(gpu->sim.customExpressionStackSize+9)*sizeof(float)*gpu->sim.localForces_threads_per_block>>>();
int memoryPerThread = (gpu->sim.customExpressionStackSize+9)*sizeof(float);
int maxThreads = (gpu->sharedMemoryPerBlock-16)/memoryPerThread;
int threads = min(gpu->sim.localForces_threads_per_block, (maxThreads/64)*64);
kCalculateCustomAngleForces_kernel<<<gpu->sim.blocks, threads, memoryPerThread*threads>>>();
LAUNCHERROR("kCalculateCustomAngleForces");
}
......@@ -135,7 +135,9 @@ void kCalculateCustomBondForces_kernel()
void kCalculateCustomBondForces(gpuContext gpu)
{
// printf("kCalculateCustomBondForces\n");
kCalculateCustomBondForces_kernel<<<gpu->sim.blocks, gpu->sim.localForces_threads_per_block,
(gpu->sim.customExpressionStackSize+9)*sizeof(float)*gpu->sim.localForces_threads_per_block>>>();
int memoryPerThread = (gpu->sim.customExpressionStackSize+9)*sizeof(float);
int maxThreads = (gpu->sharedMemoryPerBlock-16)/memoryPerThread;
int threads = min(gpu->sim.localForces_threads_per_block, (maxThreads/64)*64);
kCalculateCustomBondForces_kernel<<<gpu->sim.blocks, threads, memoryPerThread*threads>>>();
LAUNCHERROR("kCalculateCustomBondForces");
}
......@@ -123,7 +123,9 @@ void kCalculateCustomExternalForces_kernel()
void kCalculateCustomExternalForces(gpuContext gpu)
{
// printf("kCalculateCustomExternalForces\n");
kCalculateCustomExternalForces_kernel<<<gpu->sim.blocks, gpu->sim.localForces_threads_per_block,
(gpu->sim.customExpressionStackSize+9)*sizeof(float)*gpu->sim.localForces_threads_per_block>>>();
int memoryPerThread = (gpu->sim.customExpressionStackSize+9)*sizeof(float);
int maxThreads = (gpu->sharedMemoryPerBlock-16)/memoryPerThread;
int threads = min(gpu->sim.localForces_threads_per_block, (maxThreads/64)*64);
kCalculateCustomExternalForces_kernel<<<gpu->sim.blocks, threads, memoryPerThread*threads>>>();
LAUNCHERROR("kCalculateCustomExternalForces");
}
......@@ -150,7 +150,7 @@ void kCalculateCustomNonbondedForces(gpuContext gpu, bool neighborListValid)
if (gpu->sim.customNonbondedMethod != NO_CUTOFF)
sharedPerThread += sizeof(float3);
int threads = gpu->sim.nonbond_threads_per_block;
int maxThreads = 16380/sharedPerThread;
int maxThreads = (gpu->sharedMemoryPerBlock-16)/sharedPerThread;
if (threads > maxThreads)
threads = (maxThreads/32)*32;
switch (gpu->sim.customNonbondedMethod)
......
......@@ -195,7 +195,9 @@ void kCalculateCustomTorsionForces_kernel()
void kCalculateCustomTorsionForces(gpuContext gpu)
{
// printf("kCalculateCustomTorsionForces\n");
kCalculateCustomTorsionForces_kernel<<<gpu->sim.blocks, gpu->sim.localForces_threads_per_block,
(gpu->sim.customExpressionStackSize+9)*sizeof(float)*gpu->sim.localForces_threads_per_block>>>();
int memoryPerThread = (gpu->sim.customExpressionStackSize+9)*sizeof(float);
int maxThreads = (gpu->sharedMemoryPerBlock-16)/memoryPerThread;
int threads = min(gpu->sim.localForces_threads_per_block, (maxThreads/64)*64);
kCalculateCustomTorsionForces_kernel<<<gpu->sim.blocks, threads, memoryPerThread*threads>>>();
LAUNCHERROR("kCalculateCustomTorsionForces");
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment