Minor optimizations to PME

3763b76b · Peter Eastman · 564fe013 · 3763b76b
Commit 3763b76b authored Sep 10, 2010 by Peter Eastman
Show whitespace changes
Inline Side-by-side

Showing with 2 additions and 9 deletions

platforms/cuda/src/kernels/kCalculatePME.cu platforms/cuda/src/kernels/kCalculatePME.cu +2 -9

No files found.
--- a/platforms/cuda/src/kernels/kCalculatePME.cu
+++ b/platforms/cuda/src/kernels/kCalculatePME.cu
@@ -225,13 +225,6 @@ void kFindAtomRangeForGrid_kernel()
 }
 __global__
-#if (__CUDA_ARCH__ >= 200)
-__launch_bounds__(1024, 1)
-#elif (__CUDA_ARCH__ >= 130)
-__launch_bounds__(512, 1)
-#else
-__launch_bounds__(256, 1)
-#endif
 void kGridSpreadCharge_kernel()
 {
    unsigned int numGridPoints = cSim.pmeGridSize.x*cSim.pmeGridSize.y*cSim.pmeGridSize.z;
@@ -398,7 +391,7 @@ void kCalculatePME(gpuContext gpu)
    bbSort(gpu->psPmeAtomGridIndex->_pDevData, gpu->natoms);
    kFindAtomRangeForGrid_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
    LAUNCHERROR("kFindAtomRangeForGrid");
-    kGridSpreadCharge_kernel<<<8*gpu->sim.blocks, 64, 64*(sizeof(float)+sizeof(int4))>>>();
+    kGridSpreadCharge_kernel<<<16*gpu->sim.blocks, 64>>>();
    LAUNCHERROR("kGridSpreadCharge");
    cufftExecC2C(gpu->fftplan, gpu->psPmeGrid->_pDevData, gpu->psPmeGrid->_pDevData, CUFFT_FORWARD);
    kReciprocalConvolution_kernel<<<gpu->sim.blocks, gpu->sim.nonbond_threads_per_block>>>();