Commit bd4fb844 authored by Peter Eastman's avatar Peter Eastman
Browse files

Fixed error in launch bounds

parent 7f367d00
...@@ -101,11 +101,11 @@ __device__ void computeBSplinePoint(float4* thetai, float w, float* array) ...@@ -101,11 +101,11 @@ __device__ void computeBSplinePoint(float4* thetai, float w, float* array)
*/ */
__global__ __global__
#if (__CUDA_ARCH__ >= 200) #if (__CUDA_ARCH__ >= 200)
__launch_bounds__(512, 1) __launch_bounds__(448, 1)
#elif (__CUDA_ARCH__ >= 120) #elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(256, 1) __launch_bounds__(160, 1)
#else #else
__launch_bounds__(128, 1) __launch_bounds__(160, 1)
#endif #endif
void kComputeAmoebaBsplines_kernel() void kComputeAmoebaBsplines_kernel()
{ {
...@@ -854,11 +854,11 @@ void kCalculateAmoebaPMEFixedMultipoleField(amoebaGpuContext amoebaGpu) ...@@ -854,11 +854,11 @@ void kCalculateAmoebaPMEFixedMultipoleField(amoebaGpuContext amoebaGpu)
int threads; int threads;
gpuContext gpu = amoebaGpu->gpuContext; gpuContext gpu = amoebaGpu->gpuContext;
if (gpu->sm_version >= SM_20) if (gpu->sm_version >= SM_20)
threads = 512; threads = 448;
else if (gpu->sm_version >= SM_12) else if (gpu->sm_version >= SM_12)
threads = 256; threads = 160;
else else
threads = 128; threads = 160;
kComputeAmoebaBsplines_kernel<<<gpu->sim.blocks, threads, threads*AMOEBA_PME_ORDER*AMOEBA_PME_ORDER*sizeof(float)>>>(); kComputeAmoebaBsplines_kernel<<<gpu->sim.blocks, threads, threads*AMOEBA_PME_ORDER*AMOEBA_PME_ORDER*sizeof(float)>>>();
LAUNCHERROR("kComputeAmoebaBsplines"); LAUNCHERROR("kComputeAmoebaBsplines");
bbSort(gpu->psPmeAtomGridIndex->_pDevData, gpu->natoms); bbSort(gpu->psPmeAtomGridIndex->_pDevData, gpu->natoms);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment