"vscode:/vscode.git/clone" did not exist on "27777280f602cd887c19e890d91ddc72cd8661a0"
Commit d5cc0fba authored by Peter Eastman's avatar Peter Eastman
Browse files

Reduced memory use for PME.

parent dc677fac
...@@ -367,8 +367,6 @@ struct cudaGmxSimulation { ...@@ -367,8 +367,6 @@ struct cudaGmxSimulation {
float* pPmeBsplineModuli[3]; float* pPmeBsplineModuli[3];
float4* pPmeBsplineTheta; float4* pPmeBsplineTheta;
float4* pPmeBsplineDtheta; float4* pPmeBsplineDtheta;
int4* pPmeParticleIndex; // The grid indices for each atom
float4* pPmeParticleFraction; // Fractional offset in the grid for each atom in all three dimensions.
int* pPmeAtomRange; // The range of sorted atoms at each grid point int* pPmeAtomRange; // The range of sorted atoms at each grid point
float2* pPmeAtomGridIndex; // The grid point each atom is at float2* pPmeAtomGridIndex; // The grid point each atom is at
unsigned int bonds; // Number of bonds unsigned int bonds; // Number of bonds
......
...@@ -795,10 +795,6 @@ void gpuSetPMEParameters(gpuContext gpu, float alpha, int gridSizeX, int gridSiz ...@@ -795,10 +795,6 @@ void gpuSetPMEParameters(gpuContext gpu, float alpha, int gridSizeX, int gridSiz
gpu->sim.pPmeBsplineTheta = gpu->psPmeBsplineTheta->_pDevData; gpu->sim.pPmeBsplineTheta = gpu->psPmeBsplineTheta->_pDevData;
gpu->psPmeBsplineDtheta = new CUDAStream<float4>(PME_ORDER*gpu->natoms, 1, "PmeBsplineDtheta"); gpu->psPmeBsplineDtheta = new CUDAStream<float4>(PME_ORDER*gpu->natoms, 1, "PmeBsplineDtheta");
gpu->sim.pPmeBsplineDtheta = gpu->psPmeBsplineDtheta->_pDevData; gpu->sim.pPmeBsplineDtheta = gpu->psPmeBsplineDtheta->_pDevData;
gpu->psPmeParticleIndex = new CUDAStream<int4>(gpu->natoms, 1, "PmeParticleIndex");
gpu->sim.pPmeParticleIndex = gpu->psPmeParticleIndex->_pDevData;
gpu->psPmeParticleFraction = new CUDAStream<float4>(gpu->natoms, 1, "PmeParticleFraction");
gpu->sim.pPmeParticleFraction = gpu->psPmeParticleFraction->_pDevData;
gpu->psPmeAtomRange = new CUDAStream<int>(gridSize.x*gridSize.y*gridSize.z+1, 1, "PmeAtomRange"); gpu->psPmeAtomRange = new CUDAStream<int>(gridSize.x*gridSize.y*gridSize.z+1, 1, "PmeAtomRange");
gpu->sim.pPmeAtomRange = gpu->psPmeAtomRange->_pDevData; gpu->sim.pPmeAtomRange = gpu->psPmeAtomRange->_pDevData;
gpu->psPmeAtomGridIndex = new CUDAStream<float2>(gpu->natoms, 1, "PmeAtomGridIndex"); gpu->psPmeAtomGridIndex = new CUDAStream<float2>(gpu->natoms, 1, "PmeAtomGridIndex");
...@@ -1703,8 +1699,6 @@ void* gpuInit(int numAtoms, unsigned int device, bool useBlockingSync) ...@@ -1703,8 +1699,6 @@ void* gpuInit(int numAtoms, unsigned int device, bool useBlockingSync)
gpu->psPmeBsplineModuli[2] = NULL; gpu->psPmeBsplineModuli[2] = NULL;
gpu->psPmeBsplineTheta = NULL; gpu->psPmeBsplineTheta = NULL;
gpu->psPmeBsplineDtheta = NULL; gpu->psPmeBsplineDtheta = NULL;
gpu->psPmeParticleIndex = NULL;
gpu->psPmeParticleFraction = NULL;
gpu->psPmeAtomRange = NULL; gpu->psPmeAtomRange = NULL;
gpu->psPmeAtomGridIndex = NULL; gpu->psPmeAtomGridIndex = NULL;
gpu->psShakeID = NULL; gpu->psShakeID = NULL;
...@@ -1872,8 +1866,6 @@ void gpuShutDown(gpuContext gpu) ...@@ -1872,8 +1866,6 @@ void gpuShutDown(gpuContext gpu)
delete gpu->psPmeBsplineModuli[2]; delete gpu->psPmeBsplineModuli[2];
delete gpu->psPmeBsplineTheta; delete gpu->psPmeBsplineTheta;
delete gpu->psPmeBsplineDtheta; delete gpu->psPmeBsplineDtheta;
delete gpu->psPmeParticleIndex;
delete gpu->psPmeParticleFraction;
delete gpu->psPmeAtomRange; delete gpu->psPmeAtomRange;
delete gpu->psPmeAtomGridIndex; delete gpu->psPmeAtomGridIndex;
delete gpu->psTabulatedErfc; delete gpu->psTabulatedErfc;
......
...@@ -111,8 +111,6 @@ struct _gpuContext { ...@@ -111,8 +111,6 @@ struct _gpuContext {
CUDAStream<float>* psPmeBsplineModuli[3]; CUDAStream<float>* psPmeBsplineModuli[3];
CUDAStream<float4>* psPmeBsplineTheta; CUDAStream<float4>* psPmeBsplineTheta;
CUDAStream<float4>* psPmeBsplineDtheta; CUDAStream<float4>* psPmeBsplineDtheta;
CUDAStream<int4>* psPmeParticleIndex; // The grid indices for each atom
CUDAStream<float4>* psPmeParticleFraction; // Fractional offset in the grid for each atom in all three dimensions.
CUDAStream<int>* psPmeAtomRange; // The range of sorted atoms at each grid point CUDAStream<int>* psPmeAtomRange; // The range of sorted atoms at each grid point
CUDAStream<float2>* psPmeAtomGridIndex; // The grid point each atom is at CUDAStream<float2>* psPmeAtomGridIndex; // The grid point each atom is at
CUDAStream<float2>* psObcData; CUDAStream<float2>* psObcData;
......
...@@ -99,20 +99,14 @@ __global__ void kUpdateGridIndexAndFraction_kernel() ...@@ -99,20 +99,14 @@ __global__ void kUpdateGridIndexAndFraction_kernel()
for (int i = tid; i < cSim.atoms; i += tnb) for (int i = tid; i < cSim.atoms; i += tnb)
{ {
float4 ftmp = cSim.pPosq[i]; float4 posq = cSim.pPosq[i];
float3 t = make_float3((ftmp.x/cSim.periodicBoxSizeX+1.0f)*cSim.pmeGridSize.x, float3 t = make_float3((posq.x/cSim.periodicBoxSizeX+1.0f)*cSim.pmeGridSize.x,
(ftmp.y/cSim.periodicBoxSizeY+1.0f)*cSim.pmeGridSize.y, (posq.y/cSim.periodicBoxSizeY+1.0f)*cSim.pmeGridSize.y,
(ftmp.z/cSim.periodicBoxSizeZ+1.0f)*cSim.pmeGridSize.z); (posq.z/cSim.periodicBoxSizeZ+1.0f)*cSim.pmeGridSize.z);
float3 tix; int3 gridIndex = make_int3(((int) t.x) % cSim.pmeGridSize.x,
ftmp.x = modff(t.x, &tix.x); ((int) t.y) % cSim.pmeGridSize.y,
ftmp.y = modff(t.y, &tix.y); ((int) t.z) % cSim.pmeGridSize.z);
ftmp.z = modff(t.z, &tix.z); cSim.pPmeAtomGridIndex[i] = make_float2(i, gridIndex.x*cSim.pmeGridSize.y*cSim.pmeGridSize.z+gridIndex.y*cSim.pmeGridSize.z+gridIndex.z);
cSim.pPmeParticleFraction[i] = ftmp;
int4 itmp = make_int4(fast_mod(__float2int_rd(tix.x), cSim.pmeGridSize.x),
fast_mod(__float2int_rd(tix.y), cSim.pmeGridSize.y),
fast_mod(__float2int_rd(tix.z), cSim.pmeGridSize.z), 0);
cSim.pPmeParticleIndex[i] = itmp;
cSim.pPmeAtomGridIndex[i] = make_float2(i, itmp.x*cSim.pmeGridSize.y*cSim.pmeGridSize.z+itmp.y*cSim.pmeGridSize.z+itmp.z);
} }
} }
...@@ -173,7 +167,11 @@ __global__ void kUpdateBsplines_kernel() ...@@ -173,7 +167,11 @@ __global__ void kUpdateBsplines_kernel()
ddata[j] = make_float4(0.0f); ddata[j] = make_float4(0.0f);
} }
float4 dr = cSim.pPmeParticleFraction[i]; float4 posq = cSim.pPosq[tid];
float3 t = make_float3((posq.x/cSim.periodicBoxSizeX+1.0f)*cSim.pmeGridSize.x,
(posq.y/cSim.periodicBoxSizeY+1.0f)*cSim.pmeGridSize.y,
(posq.z/cSim.periodicBoxSizeZ+1.0f)*cSim.pmeGridSize.z);
float4 dr = make_float4(t.x-(int) t.x, t.y-(int) t.y, t.z-(int) t.z, 0.0f);
data[PME_ORDER - 1] = make_float4(0.0f); data[PME_ORDER - 1] = make_float4(0.0f);
data[1] = dr; data[1] = dr;
...@@ -295,7 +293,12 @@ __global__ void kGridInterpolateForce_kernel() ...@@ -295,7 +293,12 @@ __global__ void kGridInterpolateForce_kernel()
{ {
float3 force = make_float3(0.0f, 0.0f, 0.0f); float3 force = make_float3(0.0f, 0.0f, 0.0f);
float4 posq = cSim.pPosq[atom]; float4 posq = cSim.pPosq[atom];
int4 gridIndex = cSim.pPmeParticleIndex[atom]; float3 t = make_float3((posq.x/cSim.periodicBoxSizeX+1.0f)*cSim.pmeGridSize.x,
(posq.y/cSim.periodicBoxSizeY+1.0f)*cSim.pmeGridSize.y,
(posq.z/cSim.periodicBoxSizeZ+1.0f)*cSim.pmeGridSize.z);
int3 gridIndex = make_int3(((int) t.x) % cSim.pmeGridSize.x,
((int) t.y) % cSim.pmeGridSize.y,
((int) t.z) % cSim.pmeGridSize.z);
for (int ix = 0; ix < PME_ORDER; ix++) for (int ix = 0; ix < PME_ORDER; ix++)
{ {
int xindex = (gridIndex.x + ix) % cSim.pmeGridSize.x; int xindex = (gridIndex.x + ix) % cSim.pmeGridSize.x;
...@@ -347,6 +350,6 @@ void kCalculatePME(gpuContext gpu) ...@@ -347,6 +350,6 @@ void kCalculatePME(gpuContext gpu)
kReciprocalConvolution_kernel<<<gpu->sim.blocks, gpu->sim.nonbond_threads_per_block>>>(); kReciprocalConvolution_kernel<<<gpu->sim.blocks, gpu->sim.nonbond_threads_per_block>>>();
LAUNCHERROR("kReciprocalConvolution"); LAUNCHERROR("kReciprocalConvolution");
cufftExecC2C(gpu->fftplan, gpu->psPmeGrid->_pDevData, gpu->psPmeGrid->_pDevData, CUFFT_INVERSE); cufftExecC2C(gpu->fftplan, gpu->psPmeGrid->_pDevData, gpu->psPmeGrid->_pDevData, CUFFT_INVERSE);
kGridInterpolateForce_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>(); kGridInterpolateForce_kernel<<<2*gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
LAUNCHERROR("kGridInterpolateForce"); LAUNCHERROR("kGridInterpolateForce");
} }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment