Commit 9e74c183 authored by peastman's avatar peastman
Browse files

CPU FFT selects grid dimensions that can be handled efficiently. Also lots of minor cleanup.

parent f2548616
......@@ -644,7 +644,8 @@ public:
* @param includeForce true if forces should be computed
* @param includeEnergy true if potential energy should be computed
* @param groups a set of bit flags for which force groups to include
* @return an optional contribution to add to the potential energy. */
* @return an optional contribution to add to the potential energy.
*/
virtual double computeForceAndEnergy(bool includeForces, bool includeEnergy, int groups) = 0;
};
......
......@@ -280,4 +280,4 @@ void addForces(const real4* __restrict__ forces, unsigned long long* __restrict_
forceBuffers[atom+PADDED_NUM_ATOMS] += static_cast<unsigned long long>((long long) (f.y*0x100000000));
forceBuffers[atom+2*PADDED_NUM_ATOMS] += static_cast<unsigned long long>((long long) (f.z*0x100000000));
}
}
\ No newline at end of file
}
......@@ -750,7 +750,8 @@ public:
* @param includeForce true if forces should be computed
* @param includeEnergy true if potential energy should be computed
* @param groups a set of bit flags for which force groups to include
* @return an optional contribution to add to the potential energy. */
* @return an optional contribution to add to the potential energy.
*/
virtual double computeForceAndEnergy(bool includeForces, bool includeEnergy, int groups) = 0;
};
......
......@@ -397,15 +397,15 @@ static void* threadBody(void* args) {
return 0;
}
void CpuCalcPmeReciprocalForceKernel::initialize(int gridx, int gridy, int gridz, int numParticles, double alpha) {
void CpuCalcPmeReciprocalForceKernel::initialize(int xsize, int ysize, int zsize, int numParticles, double alpha) {
if (!hasInitializedThreads) {
numThreads = getNumProcessors();
fftwf_init_threads();
hasInitializedThreads = true;
}
this->gridx = gridx;
this->gridy = gridy;
this->gridz = gridz;
gridx = findFFTDimension(xsize);
gridy = findFFTDimension(ysize);
gridz = findFFTDimension(zsize);
this->numParticles = numParticles;
this->alpha = alpha;
force.resize(4*numParticles);
......@@ -630,3 +630,20 @@ bool CpuCalcPmeReciprocalForceKernel::isProcessorSupported() {
}
return false;
}
int CpuCalcPmeReciprocalForceKernel::findFFTDimension(int minimum) {
if (minimum < 1)
return 1;
while (true) {
// Attempt to factor the current value.
int unfactored = minimum;
for (int factor = 2; factor < 12; factor++) {
while (unfactored > 1 && unfactored%factor == 0)
unfactored /= factor;
}
if (unfactored == 1)
return minimum;
minimum++;
}
}
......@@ -42,6 +42,8 @@
namespace OpenMM {
/**
* This is an optimized CPU implementation of CalcPmeReciprocalForceKernel. It is both
* vectorized (requiring SSE 4.1) and multithreaded. It uses FFTW to perform the FFTs.
*/
class OPENMM_EXPORT_PME CpuCalcPmeReciprocalForceKernel : public CalcPmeReciprocalForceKernel {
......@@ -50,15 +52,53 @@ public:
CpuCalcPmeReciprocalForceKernel(std::string name, const Platform& platform) : CalcPmeReciprocalForceKernel(name, platform),
hasCreatedPlan(false), isDeleted(false), realGrid(NULL), complexGrid(NULL) {
}
void initialize(int gridx, int gridy, int gridz, int numParticles, double alpha);
/**
* Initialize the kernel.
*
* @param gridx the x size of the PME grid
* @param gridy the y size of the PME grid
* @param gridz the z size of the PME grid
* @param numParticles the number of particles in the system
* @param alpha the Ewald blending parameter
*/
void initialize(int xsize, int ysize, int zsize, int numParticles, double alpha);
~CpuCalcPmeReciprocalForceKernel();
/**
* Begin computing the force and energy.
*
* @param io an object that coordinates data transfer
* @param periodicBoxSize the size of the periodic box (measured in nm)
* @param includeEnergy true if potential energy should be computed
*/
void beginComputation(IO& io, Vec3 periodicBoxSize, bool includeEnergy);
/**
* Finish computing the force and energy.
*
* @param io an object that coordinates data transfer
* @return the potential energy due to the PME reciprocal space interactions
*/
double finishComputation(IO& io);
/**
* This routine contains the code executed by each thread.
*/
void runThread(int index);
/**
* Get whether the current CPU supports all features needed by this kernel.
*/
static bool isProcessorSupported();
private:
/**
* This is called by the worker threads to wait until the master thread instructs them to advance.
*/
void threadWait();
/**
* This is called by the master thread to instruct all the worker threads to advance.
*/
void advanceThreads();
/**
* Select a size for one grid dimension that FFTW can handle efficiently.
*/
int findFFTDimension(int minimum);
static bool hasInitializedThreads;
static int numThreads;
int gridx, gridy, gridz, numParticles;
......
......@@ -83,6 +83,7 @@ void testPME() {
force->setNonbondedMethod(NonbondedForce::PME);
force->setCutoffDistance(cutoff);
force->setReciprocalSpaceForceGroup(1);
force->setEwaldErrorTolerance(1e-4);
// Compute the reciprocal space forces with the reference platform.
......@@ -116,9 +117,9 @@ void testPME() {
// See if they match.
ASSERT_EQUAL_TOL(refState.getPotentialEnergy(), energy+ewaldSelfEnergy, 1e-4);
ASSERT_EQUAL_TOL(refState.getPotentialEnergy(), energy+ewaldSelfEnergy, 1e-3);
for (int i = 0; i < numParticles; i++)
ASSERT_EQUAL_VEC(refState.getForces()[i], Vec3(io.force[4*i], io.force[4*i+1], io.force[4*i+2]), 1e-4);
ASSERT_EQUAL_VEC(refState.getForces()[i], Vec3(io.force[4*i], io.force[4*i+1], io.force[4*i+2]), 1e-3);
}
int main(int argc, char* argv[]) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment