Commit 117436ed authored by Peter Eastman's avatar Peter Eastman
Browse files

Prevent custom forces from trying to use more shared memory than is available

parent 2d681361
...@@ -1871,6 +1871,7 @@ void* gpuInit(int numAtoms, unsigned int device, bool useBlockingSync) ...@@ -1871,6 +1871,7 @@ void* gpuInit(int numAtoms, unsigned int device, bool useBlockingSync)
gpu->sim.nonbond_blocks = deviceProp.multiProcessorCount*gpu->blocksPerSM; gpu->sim.nonbond_blocks = deviceProp.multiProcessorCount*gpu->blocksPerSM;
gpu->sim.bornForce2_blocks = deviceProp.multiProcessorCount*gpu->blocksPerSM; gpu->sim.bornForce2_blocks = deviceProp.multiProcessorCount*gpu->blocksPerSM;
gpu->sim.blocks = deviceProp.multiProcessorCount; gpu->sim.blocks = deviceProp.multiProcessorCount;
gpu->sharedMemoryPerBlock = deviceProp.sharedMemPerBlock;
gpu->sim.shake_threads_per_block = gpu->sim.max_shake_threads_per_block; gpu->sim.shake_threads_per_block = gpu->sim.max_shake_threads_per_block;
gpu->sim.localForces_threads_per_block = gpu->sim.max_localForces_threads_per_block; gpu->sim.localForces_threads_per_block = gpu->sim.max_localForces_threads_per_block;
......
...@@ -72,6 +72,7 @@ struct _gpuContext { ...@@ -72,6 +72,7 @@ struct _gpuContext {
gpuAtomType* gpAtomTable; gpuAtomType* gpAtomTable;
int gAtomTypes; int gAtomTypes;
unsigned int blocksPerSM; unsigned int blocksPerSM;
unsigned int sharedMemoryPerBlock;
cudaGmxSimulation sim; cudaGmxSimulation sim;
unsigned int* pOutputBufferCounter; unsigned int* pOutputBufferCounter;
std::vector<std::vector<int> > exclusions; std::vector<std::vector<int> > exclusions;
......
...@@ -156,7 +156,9 @@ void kCalculateCustomAngleForces_kernel() ...@@ -156,7 +156,9 @@ void kCalculateCustomAngleForces_kernel()
void kCalculateCustomAngleForces(gpuContext gpu) void kCalculateCustomAngleForces(gpuContext gpu)
{ {
// printf("kCalculateCustomAngleForces\n"); // printf("kCalculateCustomAngleForces\n");
kCalculateCustomAngleForces_kernel<<<gpu->sim.blocks, gpu->sim.localForces_threads_per_block, int memoryPerThread = (gpu->sim.customExpressionStackSize+9)*sizeof(float);
(gpu->sim.customExpressionStackSize+9)*sizeof(float)*gpu->sim.localForces_threads_per_block>>>(); int maxThreads = (gpu->sharedMemoryPerBlock-16)/memoryPerThread;
int threads = min(gpu->sim.localForces_threads_per_block, (maxThreads/64)*64);
kCalculateCustomAngleForces_kernel<<<gpu->sim.blocks, threads, memoryPerThread*threads>>>();
LAUNCHERROR("kCalculateCustomAngleForces"); LAUNCHERROR("kCalculateCustomAngleForces");
} }
...@@ -135,7 +135,9 @@ void kCalculateCustomBondForces_kernel() ...@@ -135,7 +135,9 @@ void kCalculateCustomBondForces_kernel()
void kCalculateCustomBondForces(gpuContext gpu) void kCalculateCustomBondForces(gpuContext gpu)
{ {
// printf("kCalculateCustomBondForces\n"); // printf("kCalculateCustomBondForces\n");
kCalculateCustomBondForces_kernel<<<gpu->sim.blocks, gpu->sim.localForces_threads_per_block, int memoryPerThread = (gpu->sim.customExpressionStackSize+9)*sizeof(float);
(gpu->sim.customExpressionStackSize+9)*sizeof(float)*gpu->sim.localForces_threads_per_block>>>(); int maxThreads = (gpu->sharedMemoryPerBlock-16)/memoryPerThread;
int threads = min(gpu->sim.localForces_threads_per_block, (maxThreads/64)*64);
kCalculateCustomBondForces_kernel<<<gpu->sim.blocks, threads, memoryPerThread*threads>>>();
LAUNCHERROR("kCalculateCustomBondForces"); LAUNCHERROR("kCalculateCustomBondForces");
} }
...@@ -123,7 +123,9 @@ void kCalculateCustomExternalForces_kernel() ...@@ -123,7 +123,9 @@ void kCalculateCustomExternalForces_kernel()
void kCalculateCustomExternalForces(gpuContext gpu) void kCalculateCustomExternalForces(gpuContext gpu)
{ {
// printf("kCalculateCustomExternalForces\n"); // printf("kCalculateCustomExternalForces\n");
kCalculateCustomExternalForces_kernel<<<gpu->sim.blocks, gpu->sim.localForces_threads_per_block, int memoryPerThread = (gpu->sim.customExpressionStackSize+9)*sizeof(float);
(gpu->sim.customExpressionStackSize+9)*sizeof(float)*gpu->sim.localForces_threads_per_block>>>(); int maxThreads = (gpu->sharedMemoryPerBlock-16)/memoryPerThread;
int threads = min(gpu->sim.localForces_threads_per_block, (maxThreads/64)*64);
kCalculateCustomExternalForces_kernel<<<gpu->sim.blocks, threads, memoryPerThread*threads>>>();
LAUNCHERROR("kCalculateCustomExternalForces"); LAUNCHERROR("kCalculateCustomExternalForces");
} }
...@@ -150,7 +150,7 @@ void kCalculateCustomNonbondedForces(gpuContext gpu, bool neighborListValid) ...@@ -150,7 +150,7 @@ void kCalculateCustomNonbondedForces(gpuContext gpu, bool neighborListValid)
if (gpu->sim.customNonbondedMethod != NO_CUTOFF) if (gpu->sim.customNonbondedMethod != NO_CUTOFF)
sharedPerThread += sizeof(float3); sharedPerThread += sizeof(float3);
int threads = gpu->sim.nonbond_threads_per_block; int threads = gpu->sim.nonbond_threads_per_block;
int maxThreads = 16380/sharedPerThread; int maxThreads = (gpu->sharedMemoryPerBlock-16)/sharedPerThread;
if (threads > maxThreads) if (threads > maxThreads)
threads = (maxThreads/32)*32; threads = (maxThreads/32)*32;
switch (gpu->sim.customNonbondedMethod) switch (gpu->sim.customNonbondedMethod)
......
...@@ -195,7 +195,9 @@ void kCalculateCustomTorsionForces_kernel() ...@@ -195,7 +195,9 @@ void kCalculateCustomTorsionForces_kernel()
void kCalculateCustomTorsionForces(gpuContext gpu) void kCalculateCustomTorsionForces(gpuContext gpu)
{ {
// printf("kCalculateCustomTorsionForces\n"); // printf("kCalculateCustomTorsionForces\n");
kCalculateCustomTorsionForces_kernel<<<gpu->sim.blocks, gpu->sim.localForces_threads_per_block, int memoryPerThread = (gpu->sim.customExpressionStackSize+9)*sizeof(float);
(gpu->sim.customExpressionStackSize+9)*sizeof(float)*gpu->sim.localForces_threads_per_block>>>(); int maxThreads = (gpu->sharedMemoryPerBlock-16)/memoryPerThread;
int threads = min(gpu->sim.localForces_threads_per_block, (maxThreads/64)*64);
kCalculateCustomTorsionForces_kernel<<<gpu->sim.blocks, threads, memoryPerThread*threads>>>();
LAUNCHERROR("kCalculateCustomTorsionForces"); LAUNCHERROR("kCalculateCustomTorsionForces");
} }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment