"platforms/opencl/vscode:/vscode.git/clone" did not exist on "8ac164930170af8ff7460e448abb721b3ba526e7"
Commit 0aca702a authored by Peter Eastman's avatar Peter Eastman
Browse files

Fixed errors running on compute 1.1 devices

parent c9b1338b
...@@ -1190,7 +1190,14 @@ void kCalculateAmoebaKirkwoodEDiff( amoebaGpuContext amoebaGpu ) ...@@ -1190,7 +1190,14 @@ void kCalculateAmoebaKirkwoodEDiff( amoebaGpuContext amoebaGpu )
kClearFields_3( amoebaGpu, 6 ); kClearFields_3( amoebaGpu, 6 );
if( threadsPerBlock == 0 ){ if( threadsPerBlock == 0 ){
threadsPerBlock = getThreadsPerBlock( amoebaGpu, sizeof(KirkwoodEDiffParticle)); unsigned int maxThreads;
if (gpu->sm_version >= SM_20)
maxThreads = 192;
else if (gpu->sm_version >= SM_12)
maxThreads = 96;
else
maxThreads = 32;
threadsPerBlock = std::min(getThreadsPerBlock( amoebaGpu, sizeof(KirkwoodEDiffParticle)), maxThreads);
} }
if( amoebaGpu->log && timestep == 1 ){ if( amoebaGpu->log && timestep == 1 ){
......
...@@ -574,7 +574,14 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldMatrixMultiply( amoebaGpuCon ...@@ -574,7 +574,14 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldMatrixMultiply( amoebaGpuCon
// set threads/block first time through // set threads/block first time through
if( threadsPerBlock == 0 ){ if( threadsPerBlock == 0 ){
threadsPerBlock = getThreadsPerBlock( amoebaGpu, sizeof(MutualInducedParticle)); unsigned int maxThreads;
if (gpu->sm_version >= SM_20)
maxThreads = 256;
else if (gpu->sm_version >= SM_12)
maxThreads = 128;
else
maxThreads = 64;
threadsPerBlock = std::min(getThreadsPerBlock( amoebaGpu, sizeof(MutualInducedParticle)), maxThreads);
} }
if (gpu->bOutputBufferPerWarp){ if (gpu->bOutputBufferPerWarp){
......
...@@ -176,7 +176,7 @@ __device__ void calculateVdw14_7PairIxn_kernel( float4 atomCoordinatesI, float4 ...@@ -176,7 +176,7 @@ __device__ void calculateVdw14_7PairIxn_kernel( float4 atomCoordinatesI, float4
__global__ __global__
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_NONBOND_THREADS_PER_BLOCK, 1) __launch_bounds__(GF1XX_NONBOND_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130) #elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(GT2XX_NONBOND_THREADS_PER_BLOCK, 1) __launch_bounds__(GT2XX_NONBOND_THREADS_PER_BLOCK, 1)
#else #else
__launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1) __launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1)
...@@ -270,7 +270,14 @@ void kCalculateAmoebaVdw14_7Reduction_kernel( float* inputForce, float4* outputF ...@@ -270,7 +270,14 @@ void kCalculateAmoebaVdw14_7Reduction_kernel( float* inputForce, float4* outputF
static void kCalculateAmoebaVdw14_7Reduction(amoebaGpuContext amoebaGpu, CUDAStream<float>* vdwOutputArray, CUDAStream<float4>* forceOutputArray ) static void kCalculateAmoebaVdw14_7Reduction(amoebaGpuContext amoebaGpu, CUDAStream<float>* vdwOutputArray, CUDAStream<float4>* forceOutputArray )
{ {
kCalculateAmoebaVdw14_7Reduction_kernel<<<amoebaGpu->gpuContext->sim.blocks, 384>>>( unsigned int threadsPerBlock;
if (amoebaGpu->gpuContext->sm_version >= SM_20)
threadsPerBlock = GF1XX_NONBOND_THREADS_PER_BLOCK;
else if (amoebaGpu->gpuContext->sm_version >= SM_12)
threadsPerBlock = GT2XX_NONBOND_THREADS_PER_BLOCK;
else
threadsPerBlock = G8X_NONBOND_THREADS_PER_BLOCK;
kCalculateAmoebaVdw14_7Reduction_kernel<<<amoebaGpu->gpuContext->sim.blocks, threadsPerBlock>>>(
vdwOutputArray->_pDevStream[0], forceOutputArray->_pDevStream[0] ); vdwOutputArray->_pDevStream[0], forceOutputArray->_pDevStream[0] );
LAUNCHERROR("kCalculateAmoebaVdw14_7Reduction"); LAUNCHERROR("kCalculateAmoebaVdw14_7Reduction");
} }
...@@ -282,7 +289,7 @@ static void kCalculateAmoebaVdw14_7Reduction(amoebaGpuContext amoebaGpu, CUDAStr ...@@ -282,7 +289,7 @@ static void kCalculateAmoebaVdw14_7Reduction(amoebaGpuContext amoebaGpu, CUDAStr
__global__ __global__
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1) __launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130) #elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1) __launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else #else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1) __launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
...@@ -361,9 +368,16 @@ static void kCalculateAmoebaVdw14_7CoordinateReduction(amoebaGpuContext amoebaGp ...@@ -361,9 +368,16 @@ static void kCalculateAmoebaVdw14_7CoordinateReduction(amoebaGpuContext amoebaGp
CUDAStream<float4>* coordinateArray, CUDAStream<float4>* coordinateArray,
CUDAStream<float4>* reducedCoordinateArray) CUDAStream<float4>* reducedCoordinateArray)
{ {
kCalculateAmoebaVdw14_7CoordinateReduction_kernel<<<amoebaGpu->gpuContext->sim.blocks, 384>>>( unsigned int threadsPerBlock;
if (amoebaGpu->gpuContext->sm_version >= SM_20)
threadsPerBlock = GF1XX_THREADS_PER_BLOCK;
else if (amoebaGpu->gpuContext->sm_version >= SM_12)
threadsPerBlock = GT2XX_THREADS_PER_BLOCK;
else
threadsPerBlock = G8X_THREADS_PER_BLOCK;
kCalculateAmoebaVdw14_7CoordinateReduction_kernel<<<amoebaGpu->gpuContext->sim.blocks, threadsPerBlock>>>(
coordinateArray->_pDevStream[0], reducedCoordinateArray->_pDevStream[0] ); coordinateArray->_pDevStream[0], reducedCoordinateArray->_pDevStream[0] );
LAUNCHERROR("kCalculateAmoebaVdw14_7Reduction"); LAUNCHERROR("kCalculateAmoebaVdw14_7CoordinateReduction");
} }
// perform reduction of force on H's and add to heavy atom partner // perform reduction of force on H's and add to heavy atom partner
......
...@@ -408,8 +408,14 @@ void kCalculateAmoebaWcaDispersionForces( amoebaGpuContext amoebaGpu ) ...@@ -408,8 +408,14 @@ void kCalculateAmoebaWcaDispersionForces( amoebaGpuContext amoebaGpu )
// set threads/block first time through // set threads/block first time through
if( threadsPerBlock == 0 ){ if( threadsPerBlock == 0 ){
threadsPerBlock = getThreadsPerBlock( amoebaGpu, sizeof(WcaDispersionParticle)); unsigned int maxThreads;
threadsPerBlock = 128; if (gpu->sm_version >= SM_20)
maxThreads = 384;
else if (gpu->sm_version >= SM_12)
maxThreads = 192;
else
maxThreads = 64;
threadsPerBlock = std::min(getThreadsPerBlock( amoebaGpu, sizeof(WcaDispersionParticle)), maxThreads);
} }
if (gpu->bOutputBufferPerWarp){ if (gpu->bOutputBufferPerWarp){
......
...@@ -27,7 +27,7 @@ ...@@ -27,7 +27,7 @@
__global__ __global__
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(384, 1) __launch_bounds__(384, 1)
#elif (__CUDA_ARCH__ >= 130) #elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(192, 1) __launch_bounds__(192, 1)
#else #else
__launch_bounds__(64, 1) __launch_bounds__(64, 1)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment