"devtools/packaging/scripts/source/build.sh" did not exist on "4fcf2b166241b8cb6defb65b12cf68c813c7ab13"
Commit 9e74c183 authored by peastman's avatar peastman
Browse files

CPU FFT selects grid dimensions that can be handled efficiently. Also lots of minor cleanup.

parent f2548616
...@@ -644,7 +644,8 @@ public: ...@@ -644,7 +644,8 @@ public:
* @param includeForce true if forces should be computed * @param includeForce true if forces should be computed
* @param includeEnergy true if potential energy should be computed * @param includeEnergy true if potential energy should be computed
* @param groups a set of bit flags for which force groups to include * @param groups a set of bit flags for which force groups to include
* @return an optional contribution to add to the potential energy. */ * @return an optional contribution to add to the potential energy.
*/
virtual double computeForceAndEnergy(bool includeForces, bool includeEnergy, int groups) = 0; virtual double computeForceAndEnergy(bool includeForces, bool includeEnergy, int groups) = 0;
}; };
......
...@@ -280,4 +280,4 @@ void addForces(const real4* __restrict__ forces, unsigned long long* __restrict_ ...@@ -280,4 +280,4 @@ void addForces(const real4* __restrict__ forces, unsigned long long* __restrict_
forceBuffers[atom+PADDED_NUM_ATOMS] += static_cast<unsigned long long>((long long) (f.y*0x100000000)); forceBuffers[atom+PADDED_NUM_ATOMS] += static_cast<unsigned long long>((long long) (f.y*0x100000000));
forceBuffers[atom+2*PADDED_NUM_ATOMS] += static_cast<unsigned long long>((long long) (f.z*0x100000000)); forceBuffers[atom+2*PADDED_NUM_ATOMS] += static_cast<unsigned long long>((long long) (f.z*0x100000000));
} }
} }
\ No newline at end of file
...@@ -750,7 +750,8 @@ public: ...@@ -750,7 +750,8 @@ public:
* @param includeForce true if forces should be computed * @param includeForce true if forces should be computed
* @param includeEnergy true if potential energy should be computed * @param includeEnergy true if potential energy should be computed
* @param groups a set of bit flags for which force groups to include * @param groups a set of bit flags for which force groups to include
* @return an optional contribution to add to the potential energy. */ * @return an optional contribution to add to the potential energy.
*/
virtual double computeForceAndEnergy(bool includeForces, bool includeEnergy, int groups) = 0; virtual double computeForceAndEnergy(bool includeForces, bool includeEnergy, int groups) = 0;
}; };
......
...@@ -397,15 +397,15 @@ static void* threadBody(void* args) { ...@@ -397,15 +397,15 @@ static void* threadBody(void* args) {
return 0; return 0;
} }
void CpuCalcPmeReciprocalForceKernel::initialize(int gridx, int gridy, int gridz, int numParticles, double alpha) { void CpuCalcPmeReciprocalForceKernel::initialize(int xsize, int ysize, int zsize, int numParticles, double alpha) {
if (!hasInitializedThreads) { if (!hasInitializedThreads) {
numThreads = getNumProcessors(); numThreads = getNumProcessors();
fftwf_init_threads(); fftwf_init_threads();
hasInitializedThreads = true; hasInitializedThreads = true;
} }
this->gridx = gridx; gridx = findFFTDimension(xsize);
this->gridy = gridy; gridy = findFFTDimension(ysize);
this->gridz = gridz; gridz = findFFTDimension(zsize);
this->numParticles = numParticles; this->numParticles = numParticles;
this->alpha = alpha; this->alpha = alpha;
force.resize(4*numParticles); force.resize(4*numParticles);
...@@ -630,3 +630,20 @@ bool CpuCalcPmeReciprocalForceKernel::isProcessorSupported() { ...@@ -630,3 +630,20 @@ bool CpuCalcPmeReciprocalForceKernel::isProcessorSupported() {
} }
return false; return false;
} }
int CpuCalcPmeReciprocalForceKernel::findFFTDimension(int minimum) {
if (minimum < 1)
return 1;
while (true) {
// Attempt to factor the current value.
int unfactored = minimum;
for (int factor = 2; factor < 12; factor++) {
while (unfactored > 1 && unfactored%factor == 0)
unfactored /= factor;
}
if (unfactored == 1)
return minimum;
minimum++;
}
}
...@@ -42,6 +42,8 @@ ...@@ -42,6 +42,8 @@
namespace OpenMM { namespace OpenMM {
/** /**
* This is an optimized CPU implementation of CalcPmeReciprocalForceKernel. It is both
* vectorized (requiring SSE 4.1) and multithreaded. It uses FFTW to perform the FFTs.
*/ */
class OPENMM_EXPORT_PME CpuCalcPmeReciprocalForceKernel : public CalcPmeReciprocalForceKernel { class OPENMM_EXPORT_PME CpuCalcPmeReciprocalForceKernel : public CalcPmeReciprocalForceKernel {
...@@ -50,15 +52,53 @@ public: ...@@ -50,15 +52,53 @@ public:
CpuCalcPmeReciprocalForceKernel(std::string name, const Platform& platform) : CalcPmeReciprocalForceKernel(name, platform), CpuCalcPmeReciprocalForceKernel(std::string name, const Platform& platform) : CalcPmeReciprocalForceKernel(name, platform),
hasCreatedPlan(false), isDeleted(false), realGrid(NULL), complexGrid(NULL) { hasCreatedPlan(false), isDeleted(false), realGrid(NULL), complexGrid(NULL) {
} }
void initialize(int gridx, int gridy, int gridz, int numParticles, double alpha); /**
* Initialize the kernel.
*
* @param gridx the x size of the PME grid
* @param gridy the y size of the PME grid
* @param gridz the z size of the PME grid
* @param numParticles the number of particles in the system
* @param alpha the Ewald blending parameter
*/
void initialize(int xsize, int ysize, int zsize, int numParticles, double alpha);
~CpuCalcPmeReciprocalForceKernel(); ~CpuCalcPmeReciprocalForceKernel();
/**
* Begin computing the force and energy.
*
* @param io an object that coordinates data transfer
* @param periodicBoxSize the size of the periodic box (measured in nm)
* @param includeEnergy true if potential energy should be computed
*/
void beginComputation(IO& io, Vec3 periodicBoxSize, bool includeEnergy); void beginComputation(IO& io, Vec3 periodicBoxSize, bool includeEnergy);
/**
* Finish computing the force and energy.
*
* @param io an object that coordinates data transfer
* @return the potential energy due to the PME reciprocal space interactions
*/
double finishComputation(IO& io); double finishComputation(IO& io);
/**
* This routine contains the code executed by each thread.
*/
void runThread(int index); void runThread(int index);
/**
* Get whether the current CPU supports all features needed by this kernel.
*/
static bool isProcessorSupported(); static bool isProcessorSupported();
private: private:
/**
* This is called by the worker threads to wait until the master thread instructs them to advance.
*/
void threadWait(); void threadWait();
/**
* This is called by the master thread to instruct all the worker threads to advance.
*/
void advanceThreads(); void advanceThreads();
/**
* Select a size for one grid dimension that FFTW can handle efficiently.
*/
int findFFTDimension(int minimum);
static bool hasInitializedThreads; static bool hasInitializedThreads;
static int numThreads; static int numThreads;
int gridx, gridy, gridz, numParticles; int gridx, gridy, gridz, numParticles;
......
...@@ -83,6 +83,7 @@ void testPME() { ...@@ -83,6 +83,7 @@ void testPME() {
force->setNonbondedMethod(NonbondedForce::PME); force->setNonbondedMethod(NonbondedForce::PME);
force->setCutoffDistance(cutoff); force->setCutoffDistance(cutoff);
force->setReciprocalSpaceForceGroup(1); force->setReciprocalSpaceForceGroup(1);
force->setEwaldErrorTolerance(1e-4);
// Compute the reciprocal space forces with the reference platform. // Compute the reciprocal space forces with the reference platform.
...@@ -116,9 +117,9 @@ void testPME() { ...@@ -116,9 +117,9 @@ void testPME() {
// See if they match. // See if they match.
ASSERT_EQUAL_TOL(refState.getPotentialEnergy(), energy+ewaldSelfEnergy, 1e-4); ASSERT_EQUAL_TOL(refState.getPotentialEnergy(), energy+ewaldSelfEnergy, 1e-3);
for (int i = 0; i < numParticles; i++) for (int i = 0; i < numParticles; i++)
ASSERT_EQUAL_VEC(refState.getForces()[i], Vec3(io.force[4*i], io.force[4*i+1], io.force[4*i+2]), 1e-4); ASSERT_EQUAL_VEC(refState.getForces()[i], Vec3(io.force[4*i], io.force[4*i+1], io.force[4*i+2]), 1e-3);
} }
int main(int argc, char* argv[]) { int main(int argc, char* argv[]) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment