Commit 55801d55 authored by Marc Marí's avatar Marc Marí
Browse files

Use coarser granularity to split work in PME kernels

parent 7c7dc751
......@@ -66,13 +66,17 @@ static void spreadCharge(float* posq, float* grid, int gridx, int gridy, int gri
float posInBox[4] = {0,0,0,0};
memset(grid, 0, sizeof(float)*gridx*gridy*gridz);
int i = threadIndex;
const int groupSize = max(1, numParticles / (10 * numThreads));
int start = groupSize * threadIndex;
while (true) {
if (!deterministic)
i = atomicCounter++;
if (i >= numParticles)
start = atomicCounter.fetch_add(groupSize);
if (start >= numParticles)
break;
int end = min(start + groupSize, numParticles);
for (int i = start; i < end; ++i) {
// Find the position relative to the nearest grid point.
fvec4 pos(&posq[4*i]);
......@@ -154,8 +158,10 @@ static void spreadCharge(float* posq, float* grid, int gridx, int gridy, int gri
}
}
}
}
if (deterministic)
i += numThreads;
start += groupSize * numThreads;
}
}
......@@ -310,7 +316,7 @@ static void reciprocalConvolution(int start, int end, fftwf_complex* grid, vecto
}
}
static void interpolateForces(float* posq, float* force, float* grid, int gridx, int gridy, int gridz, int numParticles, Vec3* periodicBoxVectors, Vec3* recipBoxVectors, atomic<int>& atomicCounter, const float epsilonFactor) {
static void interpolateForces(float* posq, float* force, float* grid, int gridx, int gridy, int gridz, int numParticles, Vec3* periodicBoxVectors, Vec3* recipBoxVectors, atomic<int>& atomicCounter, const float epsilonFactor, int numThreads) {
fvec4 boxSize((float) periodicBoxVectors[0][0], (float) periodicBoxVectors[1][1], (float) periodicBoxVectors[2][2], 0);
fvec4 invBoxSize((float) recipBoxVectors[0][0], (float) recipBoxVectors[1][1], (float) recipBoxVectors[2][2], 0);
fvec4 recipBoxVec0((float) recipBoxVectors[0][0], (float) recipBoxVectors[0][1], (float) recipBoxVectors[0][2], 0);
......@@ -320,11 +326,16 @@ static void interpolateForces(float* posq, float* force, float* grid, int gridx,
ivec4 gridSizeInt(gridx, gridy, gridz, 0);
fvec4 one(1);
fvec4 scale(1.0f/(PME_ORDER-1));
const int groupSize = max(1, numParticles / (10 * numThreads));
while (true) {
int i = atomicCounter++;
if (i >= numParticles)
int start = atomicCounter.fetch_add(groupSize);
if (start >= numParticles)
break;
int end = min(start + groupSize, numParticles);
for (int i = start; i < end; i++) {
// Find the position relative to the nearest grid point.
fvec4 pos(&posq[4*i]);
......@@ -403,6 +414,7 @@ static void interpolateForces(float* posq, float* force, float* grid, int gridx,
force[4*i+1] = fc[0]*gridx*(float)recipBoxVectors[1][0]+fc[1]*gridy*(float)recipBoxVectors[1][1];
force[4*i+2] = fc[0]*gridx*(float)recipBoxVectors[2][0]+fc[1]*gridy*(float)recipBoxVectors[2][1]+fc[2]*gridz*(float)recipBoxVectors[2][2];
}
}
}
static void* threadBody(void* args) {
......@@ -606,7 +618,7 @@ void CpuCalcPmeReciprocalForceKernel::runWorkerThread(ThreadPool& threads, int i
}
reciprocalConvolution(complexStart, complexEnd, complexGrid, recipEterm);
threads.syncThreads();
interpolateForces(posq, &force[0], realGrid, gridx, gridy, gridz, numParticles, periodicBoxVectors, recipBoxVectors, atomicCounter, epsilonFactor);
interpolateForces(posq, &force[0], realGrid, gridx, gridy, gridz, numParticles, periodicBoxVectors, recipBoxVectors, atomicCounter, epsilonFactor, numThreads);
}
void CpuCalcPmeReciprocalForceKernel::beginComputation(IO& io, const Vec3* periodicBoxVectors, bool includeEnergy) {
......@@ -900,7 +912,7 @@ void CpuCalcDispersionPmeReciprocalForceKernel::runWorkerThread(ThreadPool& thre
complexStart = (index*complexSize)/numThreads;
reciprocalConvolution(complexStart, complexEnd, complexGrid, recipEterm);
threads.syncThreads();
interpolateForces(posq, &force[0], realGrid, gridx, gridy, gridz, numParticles, periodicBoxVectors, recipBoxVectors, atomicCounter, epsilonFactor);
interpolateForces(posq, &force[0], realGrid, gridx, gridy, gridz, numParticles, periodicBoxVectors, recipBoxVectors, atomicCounter, epsilonFactor, numThreads);
}
void CpuCalcDispersionPmeReciprocalForceKernel::beginComputation(CalcPmeReciprocalForceKernel::IO& io, const Vec3* periodicBoxVectors, bool includeEnergy) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment