Commit c4008bb2 authored by Peter Eastman's avatar Peter Eastman
Browse files

Increased block sizes on Fermi

parent 2bce659f
...@@ -578,7 +578,7 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldMatrixMultiply( amoebaGpuCon ...@@ -578,7 +578,7 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldMatrixMultiply( amoebaGpuCon
if( threadsPerBlock == 0 ){ if( threadsPerBlock == 0 ){
unsigned int maxThreads; unsigned int maxThreads;
if (gpu->sm_version >= SM_20) if (gpu->sm_version >= SM_20)
maxThreads = 256; maxThreads = 384;
else if (gpu->sm_version >= SM_12) else if (gpu->sm_version >= SM_12)
maxThreads = 128; maxThreads = 128;
else else
......
...@@ -28,7 +28,7 @@ ...@@ -28,7 +28,7 @@
__global__ __global__
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(256, 1) __launch_bounds__(384, 1)
#elif (__CUDA_ARCH__ >= 130) #elif (__CUDA_ARCH__ >= 130)
__launch_bounds__(128, 1) __launch_bounds__(128, 1)
#else #else
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment