"test/develop/dev.py" did not exist on "e2df774faa054c67fa15d35aab6683c5a5d89c5c"
Commit 823ad165 authored by peastman's avatar peastman
Browse files

Continuing to implement CPU based PME

parent 684ba725
...@@ -220,7 +220,7 @@ static float reciprocalEnergy(int start, int end, fftwf_complex* grid, int gridx ...@@ -220,7 +220,7 @@ static float reciprocalEnergy(int start, int end, fftwf_complex* grid, int gridx
firstz = 0; firstz = 0;
} }
} }
return energy; return 0.5f*energy;
} }
static void reciprocalConvolution(int start, int end, fftwf_complex* grid, int gridx, int gridy, int gridz, double alpha, vector<float>* bsplineModuli, Vec3 periodicBoxSize) { static void reciprocalConvolution(int start, int end, fftwf_complex* grid, int gridx, int gridy, int gridz, double alpha, vector<float>* bsplineModuli, Vec3 periodicBoxSize) {
...@@ -348,12 +348,14 @@ public: ...@@ -348,12 +348,14 @@ public:
static void* threadBody(void* args) { static void* threadBody(void* args) {
CpuPme::ThreadData& data = *reinterpret_cast<CpuPme::ThreadData*>(args); CpuPme::ThreadData& data = *reinterpret_cast<CpuPme::ThreadData*>(args);
data.owner.runThread(data.index); data.owner.runThread(data.index);
if (data.tempGrid != NULL)
fftwf_free(data.tempGrid);
delete &data; delete &data;
return 0; return 0;
} }
CpuPme::CpuPme(int gridx, int gridy, int gridz, int numParticles, double alpha) : CpuPme::CpuPme(int gridx, int gridy, int gridz, int numParticles, double alpha) :
gridx(gridx), gridy(gridy), gridz(gridz), numParticles(numParticles), alpha(alpha), hasCreatedPlan(false), isFinished(false), realGrid(NULL), complexGrid(NULL) { gridx(gridx), gridy(gridy), gridz(gridz), numParticles(numParticles), alpha(alpha), hasCreatedPlan(false), isDeleted(false), force(4*numParticles), realGrid(NULL), complexGrid(NULL) {
if (!hasInitializedThreads) { if (!hasInitializedThreads) {
numThreads = getNumProcessors(); numThreads = getNumProcessors();
fftwf_init_threads(); fftwf_init_threads();
...@@ -363,6 +365,8 @@ CpuPme::CpuPme(int gridx, int gridy, int gridz, int numParticles, double alpha) ...@@ -363,6 +365,8 @@ CpuPme::CpuPme(int gridx, int gridy, int gridz, int numParticles, double alpha)
pthread_cond_init(&startCondition, NULL); pthread_cond_init(&startCondition, NULL);
pthread_cond_init(&endCondition, NULL); pthread_cond_init(&endCondition, NULL);
pthread_cond_init(&mainThreadStartCondition, NULL);
pthread_cond_init(&mainThreadEndCondition, NULL);
pthread_mutex_init(&lock, NULL); pthread_mutex_init(&lock, NULL);
thread.resize(numThreads); thread.resize(numThreads);
for (int i = 0; i < numThreads; i++) { for (int i = 0; i < numThreads; i++) {
...@@ -371,7 +375,8 @@ CpuPme::CpuPme(int gridx, int gridy, int gridz, int numParticles, double alpha) ...@@ -371,7 +375,8 @@ CpuPme::CpuPme(int gridx, int gridy, int gridz, int numParticles, double alpha)
pthread_create(&thread[i], NULL, threadBody, data); pthread_create(&thread[i], NULL, threadBody, data);
data->tempGrid = (float*) fftwf_malloc(sizeof(float)*(gridx*gridy*gridz+3)); data->tempGrid = (float*) fftwf_malloc(sizeof(float)*(gridx*gridy*gridz+3));
} }
pthread_create(&mainThread, NULL, threadBody, new ThreadData(*this, -1));
// Initialize FFTW. // Initialize FFTW.
realGrid = threadData[0]->tempGrid; realGrid = threadData[0]->tempGrid;
...@@ -444,7 +449,8 @@ CpuPme::~CpuPme() { ...@@ -444,7 +449,8 @@ CpuPme::~CpuPme() {
fftwf_destroy_plan(forwardFFT); fftwf_destroy_plan(forwardFFT);
fftwf_destroy_plan(backwardFFT); fftwf_destroy_plan(backwardFFT);
} }
isFinished = true; isDeleted = true;
pthread_mutex_lock(&lock);
pthread_cond_broadcast(&startCondition); pthread_cond_broadcast(&startCondition);
pthread_mutex_unlock(&lock); pthread_mutex_unlock(&lock);
for (int i = 0; i < (int) thread.size(); i++) for (int i = 0; i < (int) thread.size(); i++)
...@@ -452,11 +458,8 @@ CpuPme::~CpuPme() { ...@@ -452,11 +458,8 @@ CpuPme::~CpuPme() {
pthread_mutex_destroy(&lock); pthread_mutex_destroy(&lock);
pthread_cond_destroy(&startCondition); pthread_cond_destroy(&startCondition);
pthread_cond_destroy(&endCondition); pthread_cond_destroy(&endCondition);
for (int i = 0; i < (int) threadData.size(); i++) { pthread_cond_destroy(&mainThreadStartCondition);
if (threadData[i]->tempGrid != NULL) pthread_cond_destroy(&mainThreadEndCondition);
fftwf_free(threadData[i]->tempGrid);
delete threadData[i];
}
} }
#include <sys/time.h> #include <sys/time.h>
...@@ -465,37 +468,76 @@ double diff(struct timeval t1, struct timeval t2) { ...@@ -465,37 +468,76 @@ double diff(struct timeval t1, struct timeval t2) {
} }
void CpuPme::runThread(int index) { void CpuPme::runThread(int index) {
int particleStart = (index*numParticles)/numThreads; if (index == -1) {
int particleEnd = ((index+1)*numParticles)/numThreads; // This is the main thread that coordinates all the other ones.
int gridxStart = (index*gridx)/numThreads;
int gridxEnd = ((index+1)*gridx)/numThreads; while (true) {
int gridSize = (gridx*gridy*gridz+3)/4; // Wait for the signal to start.
int gridStart = 4*((index*gridSize)/numThreads);
int gridEnd = 4*(((index+1)*gridSize)/numThreads); pthread_mutex_lock(&lock);
while (!isFinished) { pthread_cond_wait(&mainThreadStartCondition, &lock);
threadWait(); pthread_mutex_unlock(&lock);
spreadCharge(particleStart, particleEnd, posq, threadData[index]->tempGrid, gridx, gridy, gridz, numParticles, periodicBoxSize); if (isDeleted)
threadWait(); return;
int numGrids = threadData.size(); isFinished = false;
for (int i = gridStart; i < gridEnd; i += 4) { struct timeval t1, t2, t3, t4, t5, t6, t7;
__m128 sum = _mm_load_ps(&realGrid[i]); gettimeofday(&t1, NULL);
for (int j = 1; j < numGrids; j++) advanceThreads(); // Signal threads to perform charge spreading.
sum = _mm_add_ps(sum, _mm_load_ps(&threadData[j]->tempGrid[i])); advanceThreads(); // Signal threads to sum the charge grids.
_mm_store_ps(&realGrid[i], sum); gettimeofday(&t2, NULL);
} fftwf_execute_dft_r2c(forwardFFT, realGrid, complexGrid);
threadWait(); gettimeofday(&t3, NULL);
if (isFinished) if (includeEnergy)
break; advanceThreads(); // Signal threads to compute energy.
if (includeEnergy) { gettimeofday(&t4, NULL);
double threadEnergy = reciprocalEnergy(gridxStart, gridxEnd, complexGrid, gridx, gridy, gridz, alpha, bsplineModuli, periodicBoxSize); advanceThreads(); // Signal threads to perform reciprocal convolution.
gettimeofday(&t5, NULL);
fftwf_execute_dft_c2r(backwardFFT, complexGrid, realGrid);
gettimeofday(&t6, NULL);
advanceThreads(); // Signal threads to interpolate forces.
isFinished = true;
gettimeofday(&t7, NULL);
printf("time %g %g %g %g %g %g\n", diff(t1, t2), diff(t2, t3), diff(t3, t4), diff(t4, t5), diff(t5, t6), diff(t6, t7));
pthread_mutex_lock(&lock); pthread_mutex_lock(&lock);
energy += threadEnergy; pthread_cond_signal(&mainThreadEndCondition);
pthread_mutex_unlock(&lock); pthread_mutex_unlock(&lock);
}
}
else {
// This is a worker thread.
int particleStart = (index*numParticles)/numThreads;
int particleEnd = ((index+1)*numParticles)/numThreads;
int gridxStart = (index*gridx)/numThreads;
int gridxEnd = ((index+1)*gridx)/numThreads;
int gridSize = (gridx*gridy*gridz+3)/4;
int gridStart = 4*((index*gridSize)/numThreads);
int gridEnd = 4*(((index+1)*gridSize)/numThreads);
while (true) {
threadWait(); threadWait();
if (isDeleted)
return;
spreadCharge(particleStart, particleEnd, posq, threadData[index]->tempGrid, gridx, gridy, gridz, numParticles, periodicBoxSize);
threadWait();
int numGrids = threadData.size();
for (int i = gridStart; i < gridEnd; i += 4) {
__m128 sum = _mm_load_ps(&realGrid[i]);
for (int j = 1; j < numGrids; j++)
sum = _mm_add_ps(sum, _mm_load_ps(&threadData[j]->tempGrid[i]));
_mm_store_ps(&realGrid[i], sum);
}
threadWait();
if (includeEnergy) {
double threadEnergy = reciprocalEnergy(gridxStart, gridxEnd, complexGrid, gridx, gridy, gridz, alpha, bsplineModuli, periodicBoxSize);
pthread_mutex_lock(&lock);
energy += threadEnergy;
pthread_mutex_unlock(&lock);
threadWait();
}
reciprocalConvolution(gridxStart, gridxEnd, complexGrid, gridx, gridy, gridz, alpha, bsplineModuli, periodicBoxSize);
threadWait();
interpolateForces(particleStart, particleEnd, posq, &force[0], realGrid, gridx, gridy, gridz, numParticles, periodicBoxSize);
} }
reciprocalConvolution(gridxStart, gridxEnd, complexGrid, gridx, gridy, gridz, alpha, bsplineModuli, periodicBoxSize);
threadWait();
interpolateForces(particleStart, particleEnd, posq, force, realGrid, gridx, gridy, gridz, numParticles, periodicBoxSize);
} }
} }
...@@ -517,29 +559,22 @@ void CpuPme::advanceThreads() { ...@@ -517,29 +559,22 @@ void CpuPme::advanceThreads() {
pthread_mutex_unlock(&lock); pthread_mutex_unlock(&lock);
} }
double CpuPme::computeForceAndEnergy(float* posq, float* force, Vec3 periodicBoxSize, bool includeEnergy) { void CpuPme::beginComputation(IO& io, Vec3 periodicBoxSize, bool includeEnergy) {
this->posq = posq; posq = io.getPosq();
this->force = force;
this->periodicBoxSize = periodicBoxSize; this->periodicBoxSize = periodicBoxSize;
this->includeEnergy = includeEnergy; this->includeEnergy = includeEnergy;
energy = 0.0; energy = 0.0;
struct timeval t1, t2, t3, t4, t5, t6, t7; pthread_mutex_lock(&lock);
gettimeofday(&t1, NULL); pthread_cond_signal(&mainThreadStartCondition);
advanceThreads(); // Signal threads to perform charge spreading. pthread_mutex_unlock(&lock);
advanceThreads(); // Signal threads to sum the charge grids. }
gettimeofday(&t2, NULL);
fftwf_execute_dft_r2c(forwardFFT, realGrid, complexGrid); double CpuPme::finishComputation(IO& io) {
gettimeofday(&t3, NULL); pthread_mutex_lock(&lock);
double energy = 0.0; while (!isFinished) {
if (includeEnergy) pthread_cond_wait(&mainThreadEndCondition, &lock);
advanceThreads(); // Signal threads to compute energy. }
gettimeofday(&t4, NULL); pthread_mutex_unlock(&lock);
advanceThreads(); // Signal threads to perform reciprocal convolution. io.setForce(&force[0]);
gettimeofday(&t5, NULL);
fftwf_execute_dft_c2r(backwardFFT, complexGrid, realGrid);
gettimeofday(&t6, NULL);
advanceThreads(); // Signal threads to interpolate forces.
gettimeofday(&t7, NULL);
printf("time %g %g %g %g %g %g\n", diff(t1, t2), diff(t2, t3), diff(t3, t4), diff(t4, t5), diff(t5, t6), diff(t6, t7));
return energy; return energy;
} }
...@@ -34,7 +34,6 @@ ...@@ -34,7 +34,6 @@
#include "windowsExportCuda.h" #include "windowsExportCuda.h"
#include "openmm/Vec3.h" #include "openmm/Vec3.h"
#include <complex>
#include <fftw3.h> #include <fftw3.h>
#include <pthread.h> #include <pthread.h>
#include <vector> #include <vector>
...@@ -46,12 +45,14 @@ namespace OpenMM { ...@@ -46,12 +45,14 @@ namespace OpenMM {
class OPENMM_EXPORT_CUDA CpuPme { class OPENMM_EXPORT_CUDA CpuPme {
public: public:
class IO;
class ThreadData; class ThreadData;
/** /**
*/ */
CpuPme(int gridx, int gridy, int gridz, int numParticles, double alpha); CpuPme(int gridx, int gridy, int gridz, int numParticles, double alpha);
~CpuPme(); ~CpuPme();
double computeForceAndEnergy(float* posq, float* force, Vec3 periodicBoxSize, bool includeEnergy); void beginComputation(IO& io, Vec3 periodicBoxSize, bool includeEnergy);
double finishComputation(IO& io);
void runThread(int index); void runThread(int index);
private: private:
void threadWait(); void threadWait();
...@@ -60,24 +61,32 @@ private: ...@@ -60,24 +61,32 @@ private:
static int numThreads; static int numThreads;
int gridx, gridy, gridz, numParticles; int gridx, gridy, gridz, numParticles;
double alpha; double alpha;
bool hasCreatedPlan, isFinished; bool hasCreatedPlan, isFinished, isDeleted;
std::vector<float> force;
std::vector<float> bsplineModuli[3]; std::vector<float> bsplineModuli[3];
float* realGrid; float* realGrid;
fftwf_complex* complexGrid; fftwf_complex* complexGrid;
fftwf_plan forwardFFT, backwardFFT; fftwf_plan forwardFFT, backwardFFT;
int waitCount; int waitCount;
pthread_cond_t startCondition, endCondition; pthread_cond_t startCondition, endCondition;
pthread_cond_t mainThreadStartCondition, mainThreadEndCondition;
pthread_mutex_t lock; pthread_mutex_t lock;
pthread_t mainThread;
std::vector<pthread_t> thread; std::vector<pthread_t> thread;
std::vector<ThreadData*> threadData; std::vector<ThreadData*> threadData;
// The following variables are used to store information about the calculation currently being performed. // The following variables are used to store information about the calculation currently being performed.
float energy; float energy;
float* posq; float* posq;
float* force;
Vec3 periodicBoxSize; Vec3 periodicBoxSize;
bool includeEnergy; bool includeEnergy;
}; };
class CpuPme::IO {
public:
virtual float* getPosq() = 0;
virtual void setForce(float* force) = 0;
};
} // namespace OpenMM } // namespace OpenMM
#endif /*OPENMM_CPU_PME_H_*/ #endif /*OPENMM_CPU_PME_H_*/
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment