Commit 3763b76b authored by Peter Eastman's avatar Peter Eastman
Browse files

Minor optimizations to PME

parent 564fe013
...@@ -225,13 +225,6 @@ void kFindAtomRangeForGrid_kernel() ...@@ -225,13 +225,6 @@ void kFindAtomRangeForGrid_kernel()
} }
__global__ __global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(1024, 1)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__(512, 1)
#else
__launch_bounds__(256, 1)
#endif
void kGridSpreadCharge_kernel() void kGridSpreadCharge_kernel()
{ {
unsigned int numGridPoints = cSim.pmeGridSize.x*cSim.pmeGridSize.y*cSim.pmeGridSize.z; unsigned int numGridPoints = cSim.pmeGridSize.x*cSim.pmeGridSize.y*cSim.pmeGridSize.z;
...@@ -398,7 +391,7 @@ void kCalculatePME(gpuContext gpu) ...@@ -398,7 +391,7 @@ void kCalculatePME(gpuContext gpu)
bbSort(gpu->psPmeAtomGridIndex->_pDevData, gpu->natoms); bbSort(gpu->psPmeAtomGridIndex->_pDevData, gpu->natoms);
kFindAtomRangeForGrid_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>(); kFindAtomRangeForGrid_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
LAUNCHERROR("kFindAtomRangeForGrid"); LAUNCHERROR("kFindAtomRangeForGrid");
kGridSpreadCharge_kernel<<<8*gpu->sim.blocks, 64, 64*(sizeof(float)+sizeof(int4))>>>(); kGridSpreadCharge_kernel<<<16*gpu->sim.blocks, 64>>>();
LAUNCHERROR("kGridSpreadCharge"); LAUNCHERROR("kGridSpreadCharge");
cufftExecC2C(gpu->fftplan, gpu->psPmeGrid->_pDevData, gpu->psPmeGrid->_pDevData, CUFFT_FORWARD); cufftExecC2C(gpu->fftplan, gpu->psPmeGrid->_pDevData, gpu->psPmeGrid->_pDevData, CUFFT_FORWARD);
kReciprocalConvolution_kernel<<<gpu->sim.blocks, gpu->sim.nonbond_threads_per_block>>>(); kReciprocalConvolution_kernel<<<gpu->sim.blocks, gpu->sim.nonbond_threads_per_block>>>();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment