Commit cd874b2b authored by peastman's avatar peastman
Browse files

Merged changes from main branch

parents a783b996 b84e22ba
/* Portions copyright (c) 2009-2016 Stanford University and Simbios. /* Portions copyright (c) 2009-2017 Stanford University and Simbios.
* Contributors: Peter Eastman * Contributors: Peter Eastman
* *
* Permission is hereby granted, free of charge, to any person obtaining * Permission is hereby granted, free of charge, to any person obtaining
...@@ -122,7 +122,6 @@ class CpuCustomNonbondedForce { ...@@ -122,7 +122,6 @@ class CpuCustomNonbondedForce {
double* fixedParameters, const std::map<std::string, double>& globalParameters, double* fixedParameters, const std::map<std::string, double>& globalParameters,
std::vector<AlignedArray<float> >& threadForce, bool includeForce, bool includeEnergy, double& totalEnergy, double* energyParamDerivs); std::vector<AlignedArray<float> >& threadForce, bool includeForce, bool includeEnergy, double& totalEnergy, double* energyParamDerivs);
private: private:
class ComputeForceTask;
class ThreadData; class ThreadData;
bool cutoff; bool cutoff;
......
/* Portions copyright (c) 2006-2013 Stanford University and Simbios. /* Portions copyright (c) 2006-2017 Stanford University and Simbios.
* Contributors: Pande Group * Contributors: Pande Group
* *
* Permission is hereby granted, free of charge, to any person obtaining * Permission is hereby granted, free of charge, to any person obtaining
...@@ -36,7 +36,6 @@ namespace OpenMM { ...@@ -36,7 +36,6 @@ namespace OpenMM {
class CpuGBSAOBCForce { class CpuGBSAOBCForce {
public: public:
class ComputeTask;
CpuGBSAOBCForce(); CpuGBSAOBCForce();
/** /**
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2016 Stanford University and the Authors. * * Portions copyright (c) 2016-2017 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -45,7 +45,6 @@ namespace OpenMM { ...@@ -45,7 +45,6 @@ namespace OpenMM {
class CpuGayBerneForce { class CpuGayBerneForce {
public: public:
struct Matrix; struct Matrix;
class ComputeTask;
/** /**
* Constructor. * Constructor.
......
...@@ -54,8 +54,6 @@ namespace OpenMM { ...@@ -54,8 +54,6 @@ namespace OpenMM {
*/ */
class CpuCalcForcesAndEnergyKernel : public CalcForcesAndEnergyKernel { class CpuCalcForcesAndEnergyKernel : public CalcForcesAndEnergyKernel {
public: public:
class InitForceTask;
class SumForceTask;
CpuCalcForcesAndEnergyKernel(std::string name, const Platform& platform, CpuPlatform::PlatformData& data, ContextImpl& context); CpuCalcForcesAndEnergyKernel(std::string name, const Platform& platform, CpuPlatform::PlatformData& data, ContextImpl& context);
/** /**
* Initialize the kernel. * Initialize the kernel.
...@@ -251,27 +249,37 @@ public: ...@@ -251,27 +249,37 @@ public:
void copyParametersToContext(ContextImpl& context, const NonbondedForce& force); void copyParametersToContext(ContextImpl& context, const NonbondedForce& force);
/** /**
* Get the parameters being used for PME. * Get the parameters being used for PME.
* *
* @param alpha the separation parameter * @param alpha the separation parameter
* @param nx the number of grid points along the X axis * @param nx the number of grid points along the X axis
* @param ny the number of grid points along the Y axis * @param ny the number of grid points along the Y axis
* @param nz the number of grid points along the Z axis * @param nz the number of grid points along the Z axis
*/ */
void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const; void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
/**
* Get the parameters being used for the dispersion term in LJPME.
*
* @param alpha the separation parameter
* @param nx the number of grid points along the X axis
* @param ny the number of grid points along the Y axis
* @param nz the number of grid points along the Z axis
*/
void getLJPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
private: private:
class PmeIO; class PmeIO;
CpuPlatform::PlatformData& data; CpuPlatform::PlatformData& data;
int numParticles, num14; int numParticles, num14;
int **bonded14IndexArray; int **bonded14IndexArray;
double **bonded14ParamArray; double **bonded14ParamArray;
double nonbondedCutoff, switchingDistance, rfDielectric, ewaldAlpha, ewaldSelfEnergy, dispersionCoefficient; double nonbondedCutoff, switchingDistance, rfDielectric, ewaldAlpha, ewaldDispersionAlpha, ewaldSelfEnergy, dispersionCoefficient;
int kmax[3], gridSize[3]; int kmax[3], gridSize[3], dispersionGridSize[3];
bool useSwitchingFunction, useOptimizedPme, hasInitializedPme; bool useSwitchingFunction, useOptimizedPme, hasInitializedPme, hasInitializedDispersionPme;
std::vector<std::set<int> > exclusions; std::vector<std::set<int> > exclusions;
std::vector<std::pair<float, float> > particleParams; std::vector<std::pair<float, float> > particleParams;
std::vector<float> C6params;
NonbondedMethod nonbondedMethod; NonbondedMethod nonbondedMethod;
CpuNonbondedForce* nonbonded; CpuNonbondedForce* nonbonded;
Kernel optimizedPme; Kernel optimizedPme, optimizedDispersionPme;
CpuBondForce bondForce; CpuBondForce bondForce;
}; };
......
/* Portions copyright (c) 2013-2016 Stanford University and Simbios. /* Portions copyright (c) 2013-2017 Stanford University and Simbios.
* Authors: Peter Eastman * Authors: Peter Eastman
* Contributors: * Contributors:
* *
...@@ -35,9 +35,6 @@ namespace OpenMM { ...@@ -35,9 +35,6 @@ namespace OpenMM {
class CpuLangevinDynamics : public ReferenceStochasticDynamics { class CpuLangevinDynamics : public ReferenceStochasticDynamics {
public: public:
class Update1Task;
class Update2Task;
class Update3Task;
/** /**
* Constructor. * Constructor.
* *
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2013-2016 Stanford University and the Authors. * * Portions copyright (c) 2013-2017 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -45,7 +45,6 @@ namespace OpenMM { ...@@ -45,7 +45,6 @@ namespace OpenMM {
class OPENMM_EXPORT_CPU CpuNeighborList { class OPENMM_EXPORT_CPU CpuNeighborList {
public: public:
class ThreadTask;
class Voxels; class Voxels;
CpuNeighborList(int blockSize); CpuNeighborList(int blockSize);
void computeNeighborList(int numAtoms, const AlignedArray<float>& atomLocations, const std::vector<std::set<int> >& exclusions, void computeNeighborList(int numAtoms, const AlignedArray<float>& atomLocations, const std::vector<std::set<int> >& exclusions,
......
/* Portions copyright (c) 2006-2015 Stanford University and Simbios. /* Portions copyright (c) 2006-2017 Stanford University and Simbios.
* Contributors: Pande Group * Contributors: Pande Group
* *
* Permission is hereby granted, free of charge, to any person obtaining * Permission is hereby granted, free of charge, to any person obtaining
...@@ -39,7 +39,6 @@ namespace OpenMM { ...@@ -39,7 +39,6 @@ namespace OpenMM {
class CpuNonbondedForce { class CpuNonbondedForce {
public: public:
class ComputeDirectTask;
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
...@@ -104,16 +103,27 @@ class CpuNonbondedForce { ...@@ -104,16 +103,27 @@ class CpuNonbondedForce {
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
Set the force to use Particle-Mesh Ewald (PME) summation. Set the force to use Particle-Mesh Ewald (PME) summation.
@param alpha the Ewald separation parameter @param alpha the Ewald separation parameter
@param gridSize the dimensions of the mesh @param gridSize the dimensions of the mesh
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
void setUsePME(float alpha, int meshSize[3]); void setUsePME(float alpha, int meshSize[3]);
/**---------------------------------------------------------------------------------------
Set the force to use Particle-Mesh Ewald (PME) summation for dispersion.
@param alpha the Ewald separation parameter
@param gridSize the dimensions of the mesh
--------------------------------------------------------------------------------------- */
void setUseLJPME(float alpha, int meshSize[3]);
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
Calculate Ewald ixn Calculate Ewald ixn
...@@ -122,16 +132,17 @@ class CpuNonbondedForce { ...@@ -122,16 +132,17 @@ class CpuNonbondedForce {
@param posq atom coordinates and charges @param posq atom coordinates and charges
@param atomCoordinates atom coordinates (in format needed by PME) @param atomCoordinates atom coordinates (in format needed by PME)
@param atomParameters atom parameters (sigma/2, 2*sqrt(epsilon)) @param atomParameters atom parameters (sigma/2, 2*sqrt(epsilon))
@param C6Paramrs C6 parameters for multiplicative representation of dispersion
@param exclusions atom exclusion indices @param exclusions atom exclusion indices
exclusions[atomIndex] contains the list of exclusions for that atom exclusions[atomIndex] contains the list of exclusions for that atom
@param forces force array (forces added) @param forces force array (forces added)
@param totalEnergy total energy @param totalEnergy total energy
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
void calculateReciprocalIxn(int numberOfAtoms, float* posq, const std::vector<Vec3>& atomCoordinates, void calculateReciprocalIxn(int numberOfAtoms, float* posq, const std::vector<Vec3>& atomCoordinates,
const std::vector<std::pair<float, float> >& atomParameters, const std::vector<std::set<int> >& exclusions, const std::vector<std::pair<float, float> >& atomParameters, const std::vector<float> &C6params,
std::vector<Vec3>& forces, double* totalEnergy) const; const std::vector<std::set<int> >& exclusions, std::vector<Vec3>& forces, double* totalEnergy) const;
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
...@@ -150,7 +161,7 @@ class CpuNonbondedForce { ...@@ -150,7 +161,7 @@ class CpuNonbondedForce {
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
void calculateDirectIxn(int numberOfAtoms, float* posq, const std::vector<Vec3>& atomCoordinates, const std::vector<std::pair<float, float> >& atomParameters, void calculateDirectIxn(int numberOfAtoms, float* posq, const std::vector<Vec3>& atomCoordinates, const std::vector<std::pair<float, float> >& atomParameters,
const std::vector<std::set<int> >& exclusions, std::vector<AlignedArray<float> >& threadForce, double* totalEnergy, ThreadPool& threads); const std::vector<float>& C6params, const std::vector<std::set<int> >& exclusions, std::vector<AlignedArray<float> >& threadForce, double* totalEnergy, ThreadPool& threads);
/** /**
* This routine contains the code executed by each thread. * This routine contains the code executed by each thread.
...@@ -163,28 +174,32 @@ protected: ...@@ -163,28 +174,32 @@ protected:
bool periodic; bool periodic;
bool triclinic; bool triclinic;
bool ewald; bool ewald;
bool pme; bool ljpme, pme;
bool tableIsValid; bool tableIsValid, expTableIsValid;
const CpuNeighborList* neighborList; const CpuNeighborList* neighborList;
float recipBoxSize[3]; float recipBoxSize[3];
Vec3 periodicBoxVectors[3]; Vec3 periodicBoxVectors[3];
AlignedArray<fvec4> periodicBoxVec4; AlignedArray<fvec4> periodicBoxVec4;
float cutoffDistance, switchingDistance; float cutoffDistance, switchingDistance;
float krf, crf; float krf, crf;
float alphaEwald; float alphaEwald, alphaDispersionEwald;
int numRx, numRy, numRz; int numRx, numRy, numRz;
int meshDim[3]; int meshDim[3], dispersionMeshDim[3];
std::vector<float> erfcTable, ewaldScaleTable; std::vector<float> erfcTable, ewaldScaleTable;
float ewaldDX, ewaldDXInv, erfcDXInv; std::vector<float> exptermsTable, dExptermsTable;
float ewaldDX, ewaldDXInv, erfcDXInv, exptermsDX, exptermsDXInv;
std::vector<double> threadEnergy; std::vector<double> threadEnergy;
// The following variables are used to make information accessible to the individual threads. // The following variables are used to make information accessible to the individual threads.
int numberOfAtoms; int numberOfAtoms;
float* posq; float* posq;
Vec3 const* atomCoordinates; Vec3 const* atomCoordinates;
std::pair<float, float> const* atomParameters; std::pair<float, float> const* atomParameters;
float const *C6params;
std::set<int> const* exclusions; std::set<int> const* exclusions;
std::vector<AlignedArray<float> >* threadForce; std::vector<AlignedArray<float> >* threadForce;
bool includeEnergy; bool includeEnergy;
float inverseRcut6;
float inverseRcut6Expterm;
void* atomicCounter; void* atomicCounter;
static const float TWO_OVER_SQRT_PI; static const float TWO_OVER_SQRT_PI;
...@@ -238,10 +253,29 @@ protected: ...@@ -238,10 +253,29 @@ protected:
*/ */
void tabulateEwaldScaleFactor(); void tabulateEwaldScaleFactor();
/**
* Create a lookup table for the scale factor used with dispersion PME.
*/
void tabulateExpTerms();
/** /**
* Compute a fast approximation to erfc(x). * Compute a fast approximation to erfc(x).
*/ */
float erfcApprox(float x); float erfcApprox(float x);
/**
* Compute a fast approximation to (1.0 - EXP(-dar^2) * (1.0 + dar^2 + 0.5*dar^4))
* where dar = (dispersionAlpha * R)
* needed for LJPME energies.
*/
float exptermsApprox(float R);
/**
* Compute a fast approximation to (1.0 - EXP(-dar^2) * (1.0 + dar^2 + 0.5*dar^4 + dar^6/6.0))
* where dar = (dispersionAlpha * R)
* needed for LJPME forces.
*/
float dExptermsApprox(float R);
}; };
} // namespace OpenMM } // namespace OpenMM
......
...@@ -88,11 +88,25 @@ protected: ...@@ -88,11 +88,25 @@ protected:
* Compute a fast approximation to erfc(x). * Compute a fast approximation to erfc(x).
*/ */
fvec4 erfcApprox(const fvec4& x); fvec4 erfcApprox(const fvec4& x);
/** /**
* Evaluate the scale factor used with Ewald and PME: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI) * Evaluate the scale factor used with Ewald and PME: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
*/ */
fvec4 ewaldScaleFunction(const fvec4& x); fvec4 ewaldScaleFunction(const fvec4& x);
/**
* Compute a fast approximation to (1.0 - EXP(-dar^2) * (1.0 + dar^2 + 0.5*dar^4))
* where dar = (dispersionAlpha * R)
* needed for LJPME energies.
*/
fvec4 exptermsApprox(const fvec4& R);
/**
* Compute a fast approximation to (1.0 - EXP(-dar^2) * (1.0 + dar^2 + 0.5*dar^4 + dar^6/6.0))
* where dar = (dispersionAlpha * R)
* needed for LJPME forces.
*/
fvec4 dExptermsApprox(const fvec4& R);
}; };
} // namespace OpenMM } // namespace OpenMM
......
...@@ -92,6 +92,21 @@ protected: ...@@ -92,6 +92,21 @@ protected:
* Evaluate the scale factor used with Ewald and PME: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI) * Evaluate the scale factor used with Ewald and PME: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
*/ */
fvec8 ewaldScaleFunction(const fvec8& x); fvec8 ewaldScaleFunction(const fvec8& x);
/**
* Compute a fast approximation to (1.0 - EXP(-dar^2) * (1.0 + dar^2 + 0.5*dar^4))
* where dar = (dispersionAlpha * R)
* needed for LJPME energies.
*/
fvec8 exptermsApprox(const fvec8& R);
/**
* Compute a fast approximation to (1.0 - EXP(-dar^2) * (1.0 + dar^2 + 0.5*dar^4 + dar^6/6.0))
* where dar = (dispersionAlpha * R)
* needed for LJPME forces.
*/
fvec8 dExptermsApprox(const fvec8& R);
}; };
} // namespace OpenMM } // namespace OpenMM
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2013 Stanford University and the Authors. * * Portions copyright (c) 2013-2017 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -45,8 +45,6 @@ namespace OpenMM { ...@@ -45,8 +45,6 @@ namespace OpenMM {
*/ */
class OPENMM_EXPORT_CPU CpuSETTLE : public ReferenceConstraintAlgorithm { class OPENMM_EXPORT_CPU CpuSETTLE : public ReferenceConstraintAlgorithm {
public: public:
class ApplyToPositionsTask;
class ApplyToVelocitiesTask;
CpuSETTLE(const System& system, const ReferenceSETTLEAlgorithm& settle, ThreadPool& threads); CpuSETTLE(const System& system, const ReferenceSETTLEAlgorithm& settle, ThreadPool& threads);
~CpuSETTLE(); ~CpuSETTLE();
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2014-2016 Stanford University and the Authors. * * Portions copyright (c) 2014-2017 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -35,25 +35,6 @@ ...@@ -35,25 +35,6 @@
using namespace OpenMM; using namespace OpenMM;
using namespace std; using namespace std;
class CpuBondForce::ComputeForceTask : public ThreadPool::Task {
public:
ComputeForceTask(CpuBondForce& owner, vector<Vec3>& atomCoordinates, double** parameters, vector<Vec3>& forces,
vector<double>& threadEnergy, double* totalEnergy, ReferenceBondIxn& referenceBondIxn) : owner(owner), atomCoordinates(atomCoordinates),
parameters(parameters), forces(forces), threadEnergy(threadEnergy), totalEnergy(totalEnergy), referenceBondIxn(referenceBondIxn) {
}
void execute(ThreadPool& threads, int threadIndex) {
double* energy = (totalEnergy == NULL ? NULL : &threadEnergy[threadIndex]);
owner.threadComputeForce(threads, threadIndex, atomCoordinates, parameters, forces, energy, referenceBondIxn);
}
CpuBondForce& owner;
vector<Vec3>& atomCoordinates;
double** parameters;
vector<Vec3>& forces;
vector<double>& threadEnergy;
double* totalEnergy;
ReferenceBondIxn& referenceBondIxn;
};
CpuBondForce::CpuBondForce() { CpuBondForce::CpuBondForce() {
} }
...@@ -188,8 +169,10 @@ void CpuBondForce::calculateForce(vector<Vec3>& atomCoordinates, double** parame ...@@ -188,8 +169,10 @@ void CpuBondForce::calculateForce(vector<Vec3>& atomCoordinates, double** parame
// Have the worker threads compute their forces. // Have the worker threads compute their forces.
vector<double> threadEnergy(threads->getNumThreads(), 0); vector<double> threadEnergy(threads->getNumThreads(), 0);
ComputeForceTask task(*this, atomCoordinates, parameters, forces, threadEnergy, totalEnergy, referenceBondIxn); threads->execute([&] (ThreadPool& threads, int threadIndex) {
threads->execute(task); double* energy = (totalEnergy == NULL ? NULL : &threadEnergy[threadIndex]);
threadComputeForce(threads, threadIndex, atomCoordinates, parameters, forces, energy, referenceBondIxn);
});
threads->waitForThreads(); threads->waitForThreads();
// Compute any "extra" bonds. // Compute any "extra" bonds.
......
/* Portions copyright (c) 2009-2016 Stanford University and Simbios. /* Portions copyright (c) 2009-2017 Stanford University and Simbios.
* Contributors: Peter Eastman * Contributors: Peter Eastman
* *
* Permission is hereby granted, free of charge, to any person obtaining * Permission is hereby granted, free of charge, to any person obtaining
...@@ -33,16 +33,6 @@ ...@@ -33,16 +33,6 @@
using namespace OpenMM; using namespace OpenMM;
using namespace std; using namespace std;
class CpuCustomGBForce::ComputeForceTask : public ThreadPool::Task {
public:
ComputeForceTask(CpuCustomGBForce& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadComputeForce(threads, threadIndex);
}
CpuCustomGBForce& owner;
};
CpuCustomGBForce::ThreadData::ThreadData(int numAtoms, int numThreads, int threadIndex, CpuCustomGBForce::ThreadData::ThreadData(int numAtoms, int numThreads, int threadIndex,
const vector<Lepton::CompiledExpression>& valueExpressions, const vector<Lepton::CompiledExpression>& valueExpressions,
const vector<vector<Lepton::CompiledExpression> >& valueDerivExpressions, const vector<vector<Lepton::CompiledExpression> >& valueDerivExpressions,
...@@ -206,7 +196,7 @@ void CpuCustomGBForce::calculateIxn(int numberOfAtoms, float* posq, double** ato ...@@ -206,7 +196,7 @@ void CpuCustomGBForce::calculateIxn(int numberOfAtoms, float* posq, double** ato
// Calculate the first computed value. // Calculate the first computed value.
ComputeForceTask task(*this); auto task = [&] (ThreadPool& threads, int threadIndex) { threadComputeForce(threads, threadIndex); };
gmx_atomic_set(&counter, 0); gmx_atomic_set(&counter, 0);
threads.execute(task); threads.execute(task);
threads.waitForThreads(); threads.waitForThreads();
......
/* Portions copyright (c) 2009-2014 Stanford University and Simbios. /* Portions copyright (c) 2009-2017 Stanford University and Simbios.
* Contributors: Peter Eastman * Contributors: Peter Eastman
* *
* Permission is hereby granted, free of charge, to any person obtaining * Permission is hereby granted, free of charge, to any person obtaining
...@@ -37,16 +37,6 @@ ...@@ -37,16 +37,6 @@
using namespace OpenMM; using namespace OpenMM;
using namespace std; using namespace std;
class CpuCustomManyParticleForce::ComputeForceTask : public ThreadPool::Task {
public:
ComputeForceTask(CpuCustomManyParticleForce& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadComputeForce(threads, threadIndex);
}
CpuCustomManyParticleForce& owner;
};
CpuCustomManyParticleForce::CpuCustomManyParticleForce(const CustomManyParticleForce& force, ThreadPool& threads) : CpuCustomManyParticleForce::CpuCustomManyParticleForce(const CustomManyParticleForce& force, ThreadPool& threads) :
threads(threads), useCutoff(false), usePeriodic(false), neighborList(NULL) { threads(threads), useCutoff(false), usePeriodic(false), neighborList(NULL) {
numParticles = force.getNumParticles(); numParticles = force.getNumParticles();
...@@ -141,8 +131,7 @@ void CpuCustomManyParticleForce::calculateIxn(AlignedArray<float>& posq, double* ...@@ -141,8 +131,7 @@ void CpuCustomManyParticleForce::calculateIxn(AlignedArray<float>& posq, double*
// Signal the threads to start running and wait for them to finish. // Signal the threads to start running and wait for them to finish.
ComputeForceTask task(*this); threads.execute([&] (ThreadPool& threads, int threadIndex) { threadComputeForce(threads, threadIndex); });
threads.execute(task);
threads.waitForThreads(); threads.waitForThreads();
// Combine the energies from all the threads. // Combine the energies from all the threads.
......
/* Portions copyright (c) 2009-2016 Stanford University and Simbios. /* Portions copyright (c) 2009-2017 Stanford University and Simbios.
* Contributors: Peter Eastman * Contributors: Peter Eastman
* *
* Permission is hereby granted, free of charge, to any person obtaining * Permission is hereby granted, free of charge, to any person obtaining
...@@ -33,16 +33,6 @@ ...@@ -33,16 +33,6 @@
using namespace OpenMM; using namespace OpenMM;
using namespace std; using namespace std;
class CpuCustomNonbondedForce::ComputeForceTask : public ThreadPool::Task {
public:
ComputeForceTask(CpuCustomNonbondedForce& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadComputeForce(threads, threadIndex);
}
CpuCustomNonbondedForce& owner;
};
CpuCustomNonbondedForce::ThreadData::ThreadData(const Lepton::CompiledExpression& energyExpression, const Lepton::CompiledExpression& forceExpression, CpuCustomNonbondedForce::ThreadData::ThreadData(const Lepton::CompiledExpression& energyExpression, const Lepton::CompiledExpression& forceExpression,
const vector<string>& parameterNames, const std::vector<Lepton::CompiledExpression> energyParamDerivExpressions) : const vector<string>& parameterNames, const std::vector<Lepton::CompiledExpression> energyParamDerivExpressions) :
energyExpression(energyExpression), forceExpression(forceExpression), energyParamDerivExpressions(energyParamDerivExpressions) { energyExpression(energyExpression), forceExpression(forceExpression), energyParamDerivExpressions(energyParamDerivExpressions) {
...@@ -150,8 +140,7 @@ void CpuCustomNonbondedForce::calculatePairIxn(int numberOfAtoms, float* posq, v ...@@ -150,8 +140,7 @@ void CpuCustomNonbondedForce::calculatePairIxn(int numberOfAtoms, float* posq, v
// Signal the threads to start running and wait for them to finish. // Signal the threads to start running and wait for them to finish.
ComputeForceTask task(*this); threads.execute([&] (ThreadPool& threads, int threadIndex) { threadComputeForce(threads, threadIndex); });
threads.execute(task);
threads.waitForThreads(); threads.waitForThreads();
// Combine the energies from all the threads. // Combine the energies from all the threads.
......
/* Portions copyright (c) 2006-2016 Stanford University and Simbios. /* Portions copyright (c) 2006-2017 Stanford University and Simbios.
* Contributors: Pande Group * Contributors: Pande Group
* *
* Permission is hereby granted, free of charge, to any person obtaining * Permission is hereby granted, free of charge, to any person obtaining
...@@ -36,16 +36,6 @@ const int CpuGBSAOBCForce::NUM_TABLE_POINTS = 4096; ...@@ -36,16 +36,6 @@ const int CpuGBSAOBCForce::NUM_TABLE_POINTS = 4096;
const float CpuGBSAOBCForce::TABLE_MIN = 0.25f; const float CpuGBSAOBCForce::TABLE_MIN = 0.25f;
const float CpuGBSAOBCForce::TABLE_MAX = 1.5f; const float CpuGBSAOBCForce::TABLE_MAX = 1.5f;
class CpuGBSAOBCForce::ComputeTask : public ThreadPool::Task {
public:
ComputeTask(CpuGBSAOBCForce& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadComputeForce(threads, threadIndex);
}
CpuGBSAOBCForce& owner;
};
CpuGBSAOBCForce::CpuGBSAOBCForce() : cutoff(false), periodic(false) { CpuGBSAOBCForce::CpuGBSAOBCForce() : cutoff(false), periodic(false) {
logDX = (TABLE_MAX-TABLE_MIN)/NUM_TABLE_POINTS; logDX = (TABLE_MAX-TABLE_MIN)/NUM_TABLE_POINTS;
logDXInv = 1.0f/logDX; logDXInv = 1.0f/logDX;
...@@ -110,9 +100,8 @@ void CpuGBSAOBCForce::computeForce(const AlignedArray<float>& posq, vector<Align ...@@ -110,9 +100,8 @@ void CpuGBSAOBCForce::computeForce(const AlignedArray<float>& posq, vector<Align
// Signal the threads to start running and wait for them to finish. // Signal the threads to start running and wait for them to finish.
ComputeTask task(*this);
gmx_atomic_set(&counter, 0); gmx_atomic_set(&counter, 0);
threads.execute(task); threads.execute([&] (ThreadPool& threads, int threadIndex) { threadComputeForce(threads, threadIndex); });
threads.waitForThreads(); // Compute Born radii threads.waitForThreads(); // Compute Born radii
gmx_atomic_set(&counter, 0); gmx_atomic_set(&counter, 0);
threads.resumeThreads(); threads.resumeThreads();
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2016 Stanford University and the Authors. * * Portions copyright (c) 2016-2017 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -44,17 +44,6 @@ ...@@ -44,17 +44,6 @@
using namespace OpenMM; using namespace OpenMM;
using namespace std; using namespace std;
class CpuGayBerneForce::ComputeTask : public ThreadPool::Task {
public:
ComputeTask(CpuGayBerneForce& owner, CpuNeighborList* neighborList) : owner(owner), neighborList(neighborList) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadComputeForce(threads, threadIndex, neighborList);
}
CpuGayBerneForce& owner;
CpuNeighborList* neighborList;
};
CpuGayBerneForce::CpuGayBerneForce(const GayBerneForce& force) { CpuGayBerneForce::CpuGayBerneForce(const GayBerneForce& force) {
// Record the force parameters. // Record the force parameters.
...@@ -137,8 +126,7 @@ double CpuGayBerneForce::calculateForce(const vector<Vec3>& positions, std::vect ...@@ -137,8 +126,7 @@ double CpuGayBerneForce::calculateForce(const vector<Vec3>& positions, std::vect
// Signal the threads to compute the pairwise interactions. // Signal the threads to compute the pairwise interactions.
ComputeTask task(*this, data.neighborList); threads.execute([&] (ThreadPool& threads, int threadIndex) { threadComputeForce(threads, threadIndex, data.neighborList); });
threads.execute(task);
threads.waitForThreads(); threads.waitForThreads();
// Signal the threads to compute exceptions. // Signal the threads to compute exceptions.
......
...@@ -50,6 +50,7 @@ ...@@ -50,6 +50,7 @@
#include "lepton/CustomFunction.h" #include "lepton/CustomFunction.h"
#include "lepton/Operation.h" #include "lepton/Operation.h"
#include "lepton/Parser.h" #include "lepton/Parser.h"
#include <iostream>
#include "lepton/ParsedExpression.h" #include "lepton/ParsedExpression.h"
using namespace OpenMM; using namespace OpenMM;
...@@ -137,35 +138,27 @@ static double computeShiftedKineticEnergy(ContextImpl& context, vector<double>& ...@@ -137,35 +138,27 @@ static double computeShiftedKineticEnergy(ContextImpl& context, vector<double>&
return 0.5*energy; return 0.5*energy;
} }
class CpuCalcForcesAndEnergyKernel::SumForceTask : public ThreadPool::Task { CpuCalcForcesAndEnergyKernel::CpuCalcForcesAndEnergyKernel(std::string name, const Platform& platform, CpuPlatform::PlatformData& data, ContextImpl& context) :
public: CalcForcesAndEnergyKernel(name, platform), data(data) {
SumForceTask(int numParticles, vector<Vec3>& forceData, CpuPlatform::PlatformData& data) : numParticles(numParticles), forceData(forceData), data(data) { // Create a Reference platform version of this kernel.
}
void execute(ThreadPool& threads, int threadIndex) { ReferenceKernelFactory referenceFactory;
// Sum the contributions to forces that have been calculated by different threads. referenceKernel = Kernel(referenceFactory.createKernelImpl(name, platform, context));
}
int numThreads = threads.getNumThreads();
int start = threadIndex*numParticles/numThreads;
int end = (threadIndex+1)*numParticles/numThreads;
for (int i = start; i < end; i++) {
fvec4 f(0.0f);
for (int j = 0; j < numThreads; j++)
f += fvec4(&data.threadForce[j][4*i]);
forceData[i][0] += f[0];
forceData[i][1] += f[1];
forceData[i][2] += f[2];
}
}
int numParticles;
vector<Vec3>& forceData;
CpuPlatform::PlatformData& data;
};
class CpuCalcForcesAndEnergyKernel::InitForceTask : public ThreadPool::Task { void CpuCalcForcesAndEnergyKernel::initialize(const System& system) {
public: referenceKernel.getAs<ReferenceCalcForcesAndEnergyKernel>().initialize(system);
InitForceTask(int numParticles, ContextImpl& context, CpuPlatform::PlatformData& data) : numParticles(numParticles), positionsValid(true), context(context), data(data) { lastPositions.resize(system.getNumParticles(), Vec3(1e10, 1e10, 1e10));
} }
void execute(ThreadPool& threads, int threadIndex) {
void CpuCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups) {
referenceKernel.getAs<ReferenceCalcForcesAndEnergyKernel>().beginComputation(context, includeForce, includeEnergy, groups);
// Convert positions to single precision and clear the forces.
int numParticles = context.getSystem().getNumParticles();
bool positionsValid = true;
data.threads.execute([&] (ThreadPool& threads, int threadIndex) {
// Convert the positions to single precision and apply periodic boundary conditions // Convert the positions to single precision and apply periodic boundary conditions
AlignedArray<float>& posq = data.posq; AlignedArray<float>& posq = data.posq;
...@@ -218,36 +211,9 @@ public: ...@@ -218,36 +211,9 @@ public:
fvec4 zero(0.0f); fvec4 zero(0.0f);
for (int j = 0; j < numParticles; j++) for (int j = 0; j < numParticles; j++)
zero.store(&data.threadForce[threadIndex][j*4]); zero.store(&data.threadForce[threadIndex][j*4]);
} });
int numParticles;
bool positionsValid;
ContextImpl& context;
CpuPlatform::PlatformData& data;
};
CpuCalcForcesAndEnergyKernel::CpuCalcForcesAndEnergyKernel(std::string name, const Platform& platform, CpuPlatform::PlatformData& data, ContextImpl& context) :
CalcForcesAndEnergyKernel(name, platform), data(data) {
// Create a Reference platform version of this kernel.
ReferenceKernelFactory referenceFactory;
referenceKernel = Kernel(referenceFactory.createKernelImpl(name, platform, context));
}
void CpuCalcForcesAndEnergyKernel::initialize(const System& system) {
referenceKernel.getAs<ReferenceCalcForcesAndEnergyKernel>().initialize(system);
lastPositions.resize(system.getNumParticles(), Vec3(1e10, 1e10, 1e10));
}
void CpuCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups) {
referenceKernel.getAs<ReferenceCalcForcesAndEnergyKernel>().beginComputation(context, includeForce, includeEnergy, groups);
// Convert positions to single precision and clear the forces.
int numParticles = context.getSystem().getNumParticles();
InitForceTask task(numParticles, context, data);
data.threads.execute(task);
data.threads.waitForThreads(); data.threads.waitForThreads();
if (!task.positionsValid) if (!positionsValid)
throw OpenMMException("Particle coordinate is nan"); throw OpenMMException("Particle coordinate is nan");
// Determine whether we need to recompute the neighbor list. // Determine whether we need to recompute the neighbor list.
...@@ -302,8 +268,23 @@ void CpuCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool i ...@@ -302,8 +268,23 @@ void CpuCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool i
double CpuCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups, bool& valid) { double CpuCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups, bool& valid) {
// Sum the forces from all the threads. // Sum the forces from all the threads.
SumForceTask task(context.getSystem().getNumParticles(), extractForces(context), data); data.threads.execute([&] (ThreadPool& threads, int threadIndex) {
data.threads.execute(task); // Sum the contributions to forces that have been calculated by different threads.
int numParticles = context.getSystem().getNumParticles();
int numThreads = threads.getNumThreads();
int start = threadIndex*numParticles/numThreads;
int end = (threadIndex+1)*numParticles/numThreads;
vector<Vec3>& forceData = extractForces(context);
for (int i = start; i < end; i++) {
fvec4 f(0.0f);
for (int j = 0; j < numThreads; j++)
f += fvec4(&data.threadForce[j][4*i]);
forceData[i][0] += f[0];
forceData[i][1] += f[1];
forceData[i][2] += f[2];
}
});
data.threads.waitForThreads(); data.threads.waitForThreads();
return referenceKernel.getAs<ReferenceCalcForcesAndEnergyKernel>().finishComputation(context, includeForce, includeEnergy, groups, valid); return referenceKernel.getAs<ReferenceCalcForcesAndEnergyKernel>().finishComputation(context, includeForce, includeEnergy, groups, valid);
} }
...@@ -528,7 +509,7 @@ CpuNonbondedForce* createCpuNonbondedForceVec4(); ...@@ -528,7 +509,7 @@ CpuNonbondedForce* createCpuNonbondedForceVec4();
CpuNonbondedForce* createCpuNonbondedForceVec8(); CpuNonbondedForce* createCpuNonbondedForceVec8();
CpuCalcNonbondedForceKernel::CpuCalcNonbondedForceKernel(string name, const Platform& platform, CpuPlatform::PlatformData& data) : CalcNonbondedForceKernel(name, platform), CpuCalcNonbondedForceKernel::CpuCalcNonbondedForceKernel(string name, const Platform& platform, CpuPlatform::PlatformData& data) : CalcNonbondedForceKernel(name, platform),
data(data), bonded14IndexArray(NULL), bonded14ParamArray(NULL), hasInitializedPme(false), nonbonded(NULL) { data(data), bonded14IndexArray(NULL), bonded14ParamArray(NULL), hasInitializedPme(false), hasInitializedDispersionPme(false), nonbonded(NULL) {
if (isVec8Supported()) if (isVec8Supported())
nonbonded = createCpuNonbondedForceVec8(); nonbonded = createCpuNonbondedForceVec8();
else else
...@@ -575,12 +556,14 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond ...@@ -575,12 +556,14 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
for (int i = 0; i < num14; i++) for (int i = 0; i < num14; i++)
bonded14ParamArray[i] = new double[3]; bonded14ParamArray[i] = new double[3];
particleParams.resize(numParticles); particleParams.resize(numParticles);
C6params.resize(numParticles);
double sumSquaredCharges = 0.0; double sumSquaredCharges = 0.0;
for (int i = 0; i < numParticles; ++i) { for (int i = 0; i < numParticles; ++i) {
double charge, radius, depth; double charge, radius, depth;
force.getParticleParameters(i, charge, radius, depth); force.getParticleParameters(i, charge, radius, depth);
data.posq[4*i+3] = (float) charge; data.posq[4*i+3] = (float) charge;
particleParams[i] = make_pair((float) (0.5*radius), (float) (2.0*sqrt(depth))); particleParams[i] = make_pair((float) (0.5*radius), (float) (2.0*sqrt(depth)));
C6params[i] = 8.0*pow(particleParams[i].first, 3.0) * particleParams[i].second;
sumSquaredCharges += charge*charge; sumSquaredCharges += charge*charge;
} }
...@@ -616,19 +599,35 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond ...@@ -616,19 +599,35 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
} }
else if (nonbondedMethod == PME) { else if (nonbondedMethod == PME) {
double alpha; double alpha;
NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSize[0], gridSize[1], gridSize[2]); NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSize[0], gridSize[1], gridSize[2], false);
ewaldAlpha = alpha; ewaldAlpha = alpha;
} }
if (nonbondedMethod == Ewald || nonbondedMethod == PME) else if (nonbondedMethod == LJPME) {
double alpha;
NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSize[0], gridSize[1], gridSize[2], false);
ewaldAlpha = alpha;
NonbondedForceImpl::calcPMEParameters(system, force, alpha, dispersionGridSize[0], dispersionGridSize[1], dispersionGridSize[2], true);
ewaldDispersionAlpha = alpha;
useSwitchingFunction = false;
}
if (nonbondedMethod == Ewald || nonbondedMethod == PME || nonbondedMethod == LJPME) {
ewaldSelfEnergy = -ONE_4PI_EPS0*ewaldAlpha*sumSquaredCharges/sqrt(M_PI); ewaldSelfEnergy = -ONE_4PI_EPS0*ewaldAlpha*sumSquaredCharges/sqrt(M_PI);
else if(nonbondedMethod == LJPME){
for (int atom = 0; atom < numParticles; atom++) {
// Dispersion self term
ewaldSelfEnergy += pow(ewaldDispersionAlpha, 6.0) * C6params[atom]*C6params[atom] / 12.0;
}
}
} else {
ewaldSelfEnergy = 0.0; ewaldSelfEnergy = 0.0;
}
rfDielectric = force.getReactionFieldDielectric(); rfDielectric = force.getReactionFieldDielectric();
if (force.getUseDispersionCorrection()) if (force.getUseDispersionCorrection())
dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(system, force); dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(system, force);
else else
dispersionCoefficient = 0.0; dispersionCoefficient = 0.0;
data.isPeriodic = (nonbondedMethod == CutoffPeriodic || nonbondedMethod == Ewald || nonbondedMethod == PME); data.isPeriodic = (nonbondedMethod == CutoffPeriodic || nonbondedMethod == Ewald || nonbondedMethod == PME || nonbondedMethod == LJPME);
} }
double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy, bool includeDirect, bool includeReciprocal) { double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy, bool includeDirect, bool includeReciprocal) {
...@@ -646,6 +645,20 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -646,6 +645,20 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
optimizedPme.getAs<CalcPmeReciprocalForceKernel>().initialize(gridSize[0], gridSize[1], gridSize[2], numParticles, ewaldAlpha); optimizedPme.getAs<CalcPmeReciprocalForceKernel>().initialize(gridSize[0], gridSize[1], gridSize[2], numParticles, ewaldAlpha);
} }
} }
if (nonbondedMethod == LJPME) {
// If available, use the optimized PME implementation.
vector<string> kernelNames;
kernelNames.push_back("CalcPmeReciprocalForce");
useOptimizedPme = getPlatform().supportsKernels(kernelNames);
if (useOptimizedPme) {
optimizedPme = getPlatform().createKernel(CalcPmeReciprocalForceKernel::Name(), context);
optimizedPme.getAs<CalcPmeReciprocalForceKernel>().initialize(gridSize[0], gridSize[1], gridSize[2], numParticles, ewaldAlpha);
optimizedDispersionPme = getPlatform().createKernel(CalcDispersionPmeReciprocalForceKernel::Name(), context);
optimizedDispersionPme.getAs<CalcDispersionPmeReciprocalForceKernel>().initialize(dispersionGridSize[0], dispersionGridSize[1],
dispersionGridSize[2], numParticles, ewaldDispersionAlpha);
}
}
} }
AlignedArray<float>& posq = data.posq; AlignedArray<float>& posq = data.posq;
vector<Vec3>& posData = extractPositions(context); vector<Vec3>& posData = extractPositions(context);
...@@ -654,6 +667,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -654,6 +667,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
double energy = (includeReciprocal ? ewaldSelfEnergy : 0.0); double energy = (includeReciprocal ? ewaldSelfEnergy : 0.0);
bool ewald = (nonbondedMethod == Ewald); bool ewald = (nonbondedMethod == Ewald);
bool pme = (nonbondedMethod == PME); bool pme = (nonbondedMethod == PME);
bool ljpme = (nonbondedMethod == LJPME);
if (nonbondedMethod != NoCutoff) if (nonbondedMethod != NoCutoff)
nonbonded->setUseCutoff(nonbondedCutoff, *data.neighborList, rfDielectric); nonbonded->setUseCutoff(nonbondedCutoff, *data.neighborList, rfDielectric);
if (data.isPeriodic) { if (data.isPeriodic) {
...@@ -669,9 +683,13 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -669,9 +683,13 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
nonbonded->setUsePME(ewaldAlpha, gridSize); nonbonded->setUsePME(ewaldAlpha, gridSize);
if (useSwitchingFunction) if (useSwitchingFunction)
nonbonded->setUseSwitchingFunction(switchingDistance); nonbonded->setUseSwitchingFunction(switchingDistance);
if (ljpme){
nonbonded->setUsePME(ewaldAlpha, gridSize);
nonbonded->setUseLJPME(ewaldDispersionAlpha, dispersionGridSize);
}
double nonbondedEnergy = 0; double nonbondedEnergy = 0;
if (includeDirect) if (includeDirect)
nonbonded->calculateDirectIxn(numParticles, &posq[0], posData, particleParams, exclusions, data.threadForce, includeEnergy ? &nonbondedEnergy : NULL, data.threads); nonbonded->calculateDirectIxn(numParticles, &posq[0], posData, particleParams, C6params, exclusions, data.threadForce, includeEnergy ? &nonbondedEnergy : NULL, data.threads);
if (includeReciprocal) { if (includeReciprocal) {
if (useOptimizedPme) { if (useOptimizedPme) {
PmeIO io(&posq[0], &data.threadForce[0][0], numParticles); PmeIO io(&posq[0], &data.threadForce[0][0], numParticles);
...@@ -680,13 +698,13 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -680,13 +698,13 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
nonbondedEnergy += optimizedPme.getAs<CalcPmeReciprocalForceKernel>().finishComputation(io); nonbondedEnergy += optimizedPme.getAs<CalcPmeReciprocalForceKernel>().finishComputation(io);
} }
else else
nonbonded->calculateReciprocalIxn(numParticles, &posq[0], posData, particleParams, exclusions, forceData, includeEnergy ? &nonbondedEnergy : NULL); nonbonded->calculateReciprocalIxn(numParticles, &posq[0], posData, particleParams, C6params, exclusions, forceData, includeEnergy ? &nonbondedEnergy : NULL);
} }
energy += nonbondedEnergy; energy += nonbondedEnergy;
if (includeDirect) { if (includeDirect) {
ReferenceLJCoulomb14 nonbonded14; ReferenceLJCoulomb14 nonbonded14;
bondForce.calculateForce(posData, bonded14ParamArray, forceData, includeEnergy ? &energy : NULL, nonbonded14); bondForce.calculateForce(posData, bonded14ParamArray, forceData, includeEnergy ? &energy : NULL, nonbonded14);
if (data.isPeriodic) if (data.isPeriodic && nonbondedMethod != LJPME)
energy += dispersionCoefficient/(boxVectors[0][0]*boxVectors[1][1]*boxVectors[2][2]); energy += dispersionCoefficient/(boxVectors[0][0]*boxVectors[1][1]*boxVectors[2][2]);
} }
return energy; return energy;
...@@ -739,7 +757,7 @@ void CpuCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context, ...@@ -739,7 +757,7 @@ void CpuCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
} }
void CpuCalcNonbondedForceKernel::getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const { void CpuCalcNonbondedForceKernel::getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
if (nonbondedMethod != PME) if (nonbondedMethod != PME && nonbondedMethod != LJPME)
throw OpenMMException("getPMEParametersInContext: This Context is not using PME"); throw OpenMMException("getPMEParametersInContext: This Context is not using PME");
if (useOptimizedPme) if (useOptimizedPme)
optimizedPme.getAs<const CalcPmeReciprocalForceKernel>().getPMEParameters(alpha, nx, ny, nz); optimizedPme.getAs<const CalcPmeReciprocalForceKernel>().getPMEParameters(alpha, nx, ny, nz);
...@@ -751,6 +769,19 @@ void CpuCalcNonbondedForceKernel::getPMEParameters(double& alpha, int& nx, int& ...@@ -751,6 +769,19 @@ void CpuCalcNonbondedForceKernel::getPMEParameters(double& alpha, int& nx, int&
} }
} }
void CpuCalcNonbondedForceKernel::getLJPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
if (nonbondedMethod != LJPME)
throw OpenMMException("getPMEParametersInContext: This Context is not using PME");
if (useOptimizedPme)
optimizedDispersionPme.getAs<const CalcPmeReciprocalForceKernel>().getPMEParameters(alpha, nx, ny, nz);
else {
alpha = ewaldDispersionAlpha;
nx = dispersionGridSize[0];
ny = dispersionGridSize[1];
nz = dispersionGridSize[2];
}
}
CpuCalcCustomNonbondedForceKernel::CpuCalcCustomNonbondedForceKernel(string name, const Platform& platform, CpuPlatform::PlatformData& data) : CpuCalcCustomNonbondedForceKernel::CpuCalcCustomNonbondedForceKernel(string name, const Platform& platform, CpuPlatform::PlatformData& data) :
CalcCustomNonbondedForceKernel(name, platform), data(data), forceCopy(NULL), nonbonded(NULL) { CalcCustomNonbondedForceKernel(name, platform), data(data), forceCopy(NULL), nonbonded(NULL) {
} }
......
/* Portions copyright (c) 2006-2016 Stanford University and Simbios. /* Portions copyright (c) 2006-2017 Stanford University and Simbios.
* Authors: Peter Eastman * Authors: Peter Eastman
* Contributors: * Contributors:
* *
...@@ -29,36 +29,6 @@ ...@@ -29,36 +29,6 @@
using namespace OpenMM; using namespace OpenMM;
using namespace std; using namespace std;
class CpuLangevinDynamics::Update1Task : public ThreadPool::Task {
public:
Update1Task(CpuLangevinDynamics& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadUpdate1(threadIndex);
}
CpuLangevinDynamics& owner;
};
class CpuLangevinDynamics::Update2Task : public ThreadPool::Task {
public:
Update2Task(CpuLangevinDynamics& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadUpdate2(threadIndex);
}
CpuLangevinDynamics& owner;
};
class CpuLangevinDynamics::Update3Task : public ThreadPool::Task {
public:
Update3Task(CpuLangevinDynamics& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadUpdate3(threadIndex);
}
CpuLangevinDynamics& owner;
};
CpuLangevinDynamics::CpuLangevinDynamics(int numberOfAtoms, double deltaT, double friction, double temperature, ThreadPool& threads, CpuRandom& random) : CpuLangevinDynamics::CpuLangevinDynamics(int numberOfAtoms, double deltaT, double friction, double temperature, ThreadPool& threads, CpuRandom& random) :
ReferenceStochasticDynamics(numberOfAtoms, deltaT, friction, temperature), threads(threads), random(random) { ReferenceStochasticDynamics(numberOfAtoms, deltaT, friction, temperature), threads(threads), random(random) {
} }
...@@ -79,8 +49,7 @@ void CpuLangevinDynamics::updatePart1(int numberOfAtoms, vector<Vec3>& atomCoord ...@@ -79,8 +49,7 @@ void CpuLangevinDynamics::updatePart1(int numberOfAtoms, vector<Vec3>& atomCoord
// Signal the threads to start running and wait for them to finish. // Signal the threads to start running and wait for them to finish.
Update1Task task(*this); threads.execute([&] (ThreadPool& threads, int threadIndex) { threadUpdate1(threadIndex); });
threads.execute(task);
threads.waitForThreads(); threads.waitForThreads();
} }
...@@ -97,8 +66,7 @@ void CpuLangevinDynamics::updatePart2(int numberOfAtoms, vector<Vec3>& atomCoord ...@@ -97,8 +66,7 @@ void CpuLangevinDynamics::updatePart2(int numberOfAtoms, vector<Vec3>& atomCoord
// Signal the threads to start running and wait for them to finish. // Signal the threads to start running and wait for them to finish.
Update2Task task(*this); threads.execute([&] (ThreadPool& threads, int threadIndex) { threadUpdate2(threadIndex); });
threads.execute(task);
threads.waitForThreads(); threads.waitForThreads();
} }
...@@ -114,8 +82,7 @@ void CpuLangevinDynamics::updatePart3(int numberOfAtoms, vector<Vec3>& atomCoord ...@@ -114,8 +82,7 @@ void CpuLangevinDynamics::updatePart3(int numberOfAtoms, vector<Vec3>& atomCoord
// Signal the threads to start running and wait for them to finish. // Signal the threads to start running and wait for them to finish.
Update3Task task(*this); threads.execute([&] (ThreadPool& threads, int threadIndex) { threadUpdate3(threadIndex); });
threads.execute(task);
threads.waitForThreads(); threads.waitForThreads();
} }
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2013-2016 Stanford University and the Authors. * * Portions copyright (c) 2013-2017 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -409,16 +409,6 @@ private: ...@@ -409,16 +409,6 @@ private:
vector<vector<vector<pair<float, int> > > > bins; vector<vector<vector<pair<float, int> > > > bins;
}; };
class CpuNeighborList::ThreadTask : public ThreadPool::Task {
public:
ThreadTask(CpuNeighborList& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadComputeNeighborList(threads, threadIndex);
}
CpuNeighborList& owner;
};
CpuNeighborList::CpuNeighborList(int blockSize) : blockSize(blockSize) { CpuNeighborList::CpuNeighborList(int blockSize) : blockSize(blockSize) {
} }
...@@ -460,8 +450,7 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float ...@@ -460,8 +450,7 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
// Sort the atoms based on a Hilbert curve. // Sort the atoms based on a Hilbert curve.
atomBins.resize(numAtoms); atomBins.resize(numAtoms);
ThreadTask task(*this); threads.execute([&] (ThreadPool& threads, int threadIndex) { threadComputeNeighborList(threads, threadIndex); });
threads.execute(task);
threads.waitForThreads(); threads.waitForThreads();
sort(atomBins.begin(), atomBins.end()); sort(atomBins.begin(), atomBins.end());
......
/* Portions copyright (c) 2006-2015 Stanford University and Simbios. /* Portions copyright (c) 2006-2017 Stanford University and Simbios.
* Contributors: Pande Group * Contributors: Pande Group
* *
* Permission is hereby granted, free of charge, to any person obtaining * Permission is hereby granted, free of charge, to any person obtaining
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include "ReferencePME.h" #include "ReferencePME.h"
#include "openmm/internal/gmx_atomic.h" #include "openmm/internal/gmx_atomic.h"
#include <algorithm> #include <algorithm>
#include <iostream>
// In case we're using some primitive version of Visual Studio this will // In case we're using some primitive version of Visual Studio this will
// make sure that erf() and erfc() are defined. // make sure that erf() and erfc() are defined.
...@@ -41,23 +42,14 @@ using namespace OpenMM; ...@@ -41,23 +42,14 @@ using namespace OpenMM;
const float CpuNonbondedForce::TWO_OVER_SQRT_PI = (float) (2/sqrt(PI_M)); const float CpuNonbondedForce::TWO_OVER_SQRT_PI = (float) (2/sqrt(PI_M));
const int CpuNonbondedForce::NUM_TABLE_POINTS = 2048; const int CpuNonbondedForce::NUM_TABLE_POINTS = 2048;
class CpuNonbondedForce::ComputeDirectTask : public ThreadPool::Task {
public:
ComputeDirectTask(CpuNonbondedForce& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadComputeDirect(threads, threadIndex);
}
CpuNonbondedForce& owner;
};
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
CpuNonbondedForce constructor CpuNonbondedForce constructor
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
CpuNonbondedForce::CpuNonbondedForce() : cutoff(false), useSwitch(false), periodic(false), ewald(false), pme(false), tableIsValid(false), cutoffDistance(0.0f), alphaEwald(0.0f) { CpuNonbondedForce::CpuNonbondedForce() : cutoff(false), useSwitch(false), periodic(false), ewald(false), pme(false), ljpme(false), tableIsValid(false), expTableIsValid(false),
cutoffDistance(0.0f), alphaDispersionEwald(0.0f), alphaEwald(0.0f) {
} }
CpuNonbondedForce::~CpuNonbondedForce() { CpuNonbondedForce::~CpuNonbondedForce() {
...@@ -78,10 +70,21 @@ void CpuNonbondedForce::setUseCutoff(float distance, const CpuNeighborList& neig ...@@ -78,10 +70,21 @@ void CpuNonbondedForce::setUseCutoff(float distance, const CpuNeighborList& neig
tableIsValid = false; tableIsValid = false;
cutoff = true; cutoff = true;
cutoffDistance = distance; cutoffDistance = distance;
inverseRcut6 = pow(cutoffDistance, -6);
neighborList = &neighbors; neighborList = &neighbors;
krf = pow(cutoffDistance, -3.0f)*(solventDielectric-1.0)/(2.0*solventDielectric+1.0); krf = pow(cutoffDistance, -3.0f)*(solventDielectric-1.0)/(2.0*solventDielectric+1.0);
crf = (1.0/cutoffDistance)*(3.0*solventDielectric)/(2.0*solventDielectric+1.0); crf = (1.0/cutoffDistance)*(3.0*solventDielectric)/(2.0*solventDielectric+1.0);
} if(alphaDispersionEwald != 0.0f){
// We set this here, in case setUseCutoff is called after the dispersion alpha is set.
double dalphaR = alphaDispersionEwald*cutoffDistance;
double dar2 = dalphaR * dalphaR;
double dar4 = dar2*dar2;
double dar6 = dar4*dar2;
double expterm = EXP(-dar2);
inverseRcut6Expterm = inverseRcut6*(1.0 - expterm * (1.0 + dar2 + 0.5*dar4));
}
}
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
...@@ -96,7 +99,7 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) { ...@@ -96,7 +99,7 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
switchingDistance = distance; switchingDistance = distance;
} }
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
Set the force to use periodic boundary conditions. This requires that a cutoff has Set the force to use periodic boundary conditions. This requires that a cutoff has
also been set, and the smallest side of the periodic box is at least twice the cutoff also been set, and the smallest side of the periodic box is at least twice the cutoff
...@@ -106,7 +109,7 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) { ...@@ -106,7 +109,7 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
void CpuNonbondedForce::setPeriodic(Vec3* periodicBoxVectors) { void CpuNonbondedForce::setPeriodic(Vec3* periodicBoxVectors) {
assert(cutoff); assert(cutoff);
assert(periodicBoxVectors[0][0] >= 2.0*cutoffDistance); assert(periodicBoxVectors[0][0] >= 2.0*cutoffDistance);
...@@ -124,11 +127,11 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) { ...@@ -124,11 +127,11 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
periodicBoxVec4[1] = fvec4(periodicBoxVectors[1][0], periodicBoxVectors[1][1], periodicBoxVectors[1][2], 0); periodicBoxVec4[1] = fvec4(periodicBoxVectors[1][0], periodicBoxVectors[1][1], periodicBoxVectors[1][2], 0);
periodicBoxVec4[2] = fvec4(periodicBoxVectors[2][0], periodicBoxVectors[2][1], periodicBoxVectors[2][2], 0); periodicBoxVec4[2] = fvec4(periodicBoxVectors[2][0], periodicBoxVectors[2][1], periodicBoxVectors[2][2], 0);
triclinic = (periodicBoxVectors[0][1] != 0.0 || periodicBoxVectors[0][2] != 0.0 || triclinic = (periodicBoxVectors[0][1] != 0.0 || periodicBoxVectors[0][2] != 0.0 ||
periodicBoxVectors[1][0] != 0.0 || periodicBoxVectors[1][2] != 0.0 || periodicBoxVectors[1][0] != 0.0 || periodicBoxVectors[1][2] != 0.0 ||
periodicBoxVectors[2][0] != 0.0 || periodicBoxVectors[2][1] != 0.0); periodicBoxVectors[2][0] != 0.0 || periodicBoxVectors[2][1] != 0.0);
} }
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
Set the force to use Ewald summation. Set the force to use Ewald summation.
...@@ -139,18 +142,18 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) { ...@@ -139,18 +142,18 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
void CpuNonbondedForce::setUseEwald(float alpha, int kmaxx, int kmaxy, int kmaxz) { void CpuNonbondedForce::setUseEwald(float alpha, int kmaxx, int kmaxy, int kmaxz) {
if (alpha != alphaEwald) if (alpha != alphaEwald)
tableIsValid = false; tableIsValid = false;
alphaEwald = alpha; alphaEwald = alpha;
numRx = kmaxx; numRx = kmaxx;
numRy = kmaxy; numRy = kmaxy;
numRz = kmaxz; numRz = kmaxz;
ewald = true; ewald = true;
tabulateEwaldScaleFactor(); tabulateEwaldScaleFactor();
} }
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
Set the force to use Particle-Mesh Ewald (PME) summation. Set the force to use Particle-Mesh Ewald (PME) summation.
...@@ -159,19 +162,49 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) { ...@@ -159,19 +162,49 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
void CpuNonbondedForce::setUsePME(float alpha, int meshSize[3]) { void CpuNonbondedForce::setUsePME(float alpha, int meshSize[3]) {
if (alpha != alphaEwald) if (alpha != alphaEwald)
tableIsValid = false; tableIsValid = false;
alphaEwald = alpha; alphaEwald = alpha;
meshDim[0] = meshSize[0]; meshDim[0] = meshSize[0];
meshDim[1] = meshSize[1]; meshDim[1] = meshSize[1];
meshDim[2] = meshSize[2]; meshDim[2] = meshSize[2];
pme = true; pme = true;
tabulateEwaldScaleFactor(); tabulateEwaldScaleFactor();
} }
void CpuNonbondedForce::tabulateEwaldScaleFactor() { /**---------------------------------------------------------------------------------------
Set the force to use Particle-Mesh Ewald (PME) summation for dispersion.
@param alpha the Ewald separation parameter
@param gridSize the dimensions of the mesh
--------------------------------------------------------------------------------------- */
void CpuNonbondedForce::setUseLJPME(float alpha, int meshSize[3]) {
if (alpha != alphaDispersionEwald)
expTableIsValid = false;
alphaDispersionEwald = alpha;
dispersionMeshDim[0] = meshSize[0];
dispersionMeshDim[1] = meshSize[1];
dispersionMeshDim[2] = meshSize[2];
ljpme = true;
tabulateExpTerms();
if(cutoffDistance != 0.0f){
// We set this here, in case setUseLJPME is called after the cutoff is set
double dalphaR = alphaDispersionEwald*cutoffDistance;
double dar2 = dalphaR * dalphaR;
double dar4 = dar2*dar2;
double dar6 = dar4*dar2;
double expterm = EXP(-dar2);
inverseRcut6Expterm = inverseRcut6*(1.0 - expterm * (1.0 + dar2 + 0.5*dar4));
}
}
void CpuNonbondedForce::tabulateEwaldScaleFactor() {
if (tableIsValid) if (tableIsValid)
return; return;
tableIsValid = true; tableIsValid = true;
...@@ -187,10 +220,30 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) { ...@@ -187,10 +220,30 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
ewaldScaleTable[i] = erfcTable[i] + TWO_OVER_SQRT_PI*alphaR*exp(-alphaR*alphaR); ewaldScaleTable[i] = erfcTable[i] + TWO_OVER_SQRT_PI*alphaR*exp(-alphaR*alphaR);
} }
} }
void CpuNonbondedForce::tabulateExpTerms() {
if (expTableIsValid)
return;
expTableIsValid = true;
exptermsDX = cutoffDistance/NUM_TABLE_POINTS;
exptermsDXInv = 1.0f/exptermsDX;
exptermsTable.resize(NUM_TABLE_POINTS+4);
dExptermsTable.resize(NUM_TABLE_POINTS+4);
for (int i = 0; i < NUM_TABLE_POINTS+4; i++) {
double r = i*ewaldDX;
double dalphaR = alphaDispersionEwald*r;
double dar2 = dalphaR * dalphaR;
double dar4 = dar2*dar2;
double dar6 = dar4*dar2;
double expterm = EXP(-dar2);
exptermsTable[i] = (1.0 - expterm * (1.0 + dar2 + 0.5*dar4));
dExptermsTable[i] = (1.0 - expterm * (1.0 + dar2 + 0.5*dar4 + dar6/6.0));
}
}
void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, const vector<Vec3>& atomCoordinates, void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, const vector<Vec3>& atomCoordinates,
const vector<pair<float, float> >& atomParameters, const vector<set<int> >& exclusions, const vector<pair<float, float> >& atomParameters, const vector<float> &C6params, const vector<set<int> >& exclusions,
vector<Vec3>& forces, double* totalEnergy) const { vector<Vec3>& forces, double* totalEnergy) const {
typedef std::complex<float> d_complex; typedef std::complex<float> d_complex;
static const float epsilon = 1.0; static const float epsilon = 1.0;
...@@ -211,6 +264,29 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c ...@@ -211,6 +264,29 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c
if (totalEnergy) if (totalEnergy)
*totalEnergy += recipEnergy; *totalEnergy += recipEnergy;
pme_destroy(pmedata); pme_destroy(pmedata);
if (ljpme) {
// Dispersion reciprocal space terms
pme_init(&pmedata,alphaDispersionEwald,numberOfAtoms,dispersionMeshDim,5,1);
std::vector<Vec3> dpmeforces;
for (int i = 0; i < numberOfAtoms; i++){
charges[i] = C6params[i];
dpmeforces.push_back(Vec3());
}
double recipDispersionEnergy = 0.0;
pme_exec_dpme(pmedata,atomCoordinates,dpmeforces,charges,periodicBoxVectors,&recipDispersionEnergy);
for (int i = 0; i < numberOfAtoms; i++){
forces[i][0] -= 2.0*dpmeforces[i][0];
forces[i][1] -= 2.0*dpmeforces[i][1];
forces[i][2] -= 2.0*dpmeforces[i][2];
}
if (totalEnergy)
*totalEnergy += recipDispersionEnergy;
pme_destroy(pmedata);
}
} }
// Ewald method // Ewald method
...@@ -224,7 +300,7 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c ...@@ -224,7 +300,7 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c
// setup K-vectors // setup K-vectors
#define EIR(x, y, z) eir[(x)*numberOfAtoms*3+(y)*3+z] #define EIR(x, y, z) eir[(x)*numberOfAtoms*3+(y)*3+z]
vector<d_complex> eir(kmax*numberOfAtoms*3); vector<d_complex> eir(kmax*numberOfAtoms*3);
vector<d_complex> tab_xy(numberOfAtoms); vector<d_complex> tab_xy(numberOfAtoms);
vector<d_complex> tab_qxyz(numberOfAtoms); vector<d_complex> tab_qxyz(numberOfAtoms);
...@@ -232,15 +308,15 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c ...@@ -232,15 +308,15 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c
for (int i = 0; (i < numberOfAtoms); i++) { for (int i = 0; (i < numberOfAtoms); i++) {
float* pos = posq+4*i; float* pos = posq+4*i;
for (int m = 0; (m < 3); m++) for (int m = 0; (m < 3); m++)
EIR(0, i, m) = d_complex(1,0); EIR(0, i, m) = d_complex(1,0);
for (int m=0; (m<3); m++) for (int m=0; (m<3); m++)
EIR(1, i, m) = d_complex(cos(pos[m]*recipBoxSize[m]), EIR(1, i, m) = d_complex(cos(pos[m]*recipBoxSize[m]),
sin(pos[m]*recipBoxSize[m])); sin(pos[m]*recipBoxSize[m]));
for (int j=2; (j<kmax); j++) for (int j=2; (j<kmax); j++)
for (int m=0; (m<3); m++) for (int m=0; (m<3); m++)
EIR(j, i, m) = EIR(j-1, i, m) * EIR(1, i, m); EIR(j, i, m) = EIR(j-1, i, m) * EIR(1, i, m);
} }
// calculate reciprocal space energy and forces // calculate reciprocal space energy and forces
...@@ -254,11 +330,11 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c ...@@ -254,11 +330,11 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c
float ky = ry * recipBoxSize[1]; float ky = ry * recipBoxSize[1];
if (ry >= 0) { if (ry >= 0) {
for (int n = 0; n < numberOfAtoms; n++) for (int n = 0; n < numberOfAtoms; n++)
tab_xy[n] = EIR(rx, n, 0) * EIR(ry, n, 1); tab_xy[n] = EIR(rx, n, 0) * EIR(ry, n, 1);
} }
else { else {
for (int n = 0; n < numberOfAtoms; n++) for (int n = 0; n < numberOfAtoms; n++)
tab_xy[n]= EIR(rx, n, 0) * conj (EIR(-ry, n, 1)); tab_xy[n]= EIR(rx, n, 0) * conj (EIR(-ry, n, 1));
} }
for (int rz = lowrz; rz < numRz; rz++) { for (int rz = lowrz; rz < numRz; rz++) {
if (rz >= 0) { if (rz >= 0) {
...@@ -301,13 +377,14 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c ...@@ -301,13 +377,14 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c
void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const vector<Vec3>& atomCoordinates, const vector<pair<float, float> >& atomParameters, void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const vector<Vec3>& atomCoordinates, const vector<pair<float, float> >& atomParameters,
const vector<set<int> >& exclusions, vector<AlignedArray<float> >& threadForce, double* totalEnergy, ThreadPool& threads) { const vector<float>& C6params, const vector<set<int> >& exclusions, vector<AlignedArray<float> >& threadForce, double* totalEnergy, ThreadPool& threads) {
// Record the parameters for the threads. // Record the parameters for the threads.
this->numberOfAtoms = numberOfAtoms; this->numberOfAtoms = numberOfAtoms;
this->posq = posq; this->posq = posq;
this->atomCoordinates = &atomCoordinates[0]; this->atomCoordinates = &atomCoordinates[0];
this->atomParameters = &atomParameters[0]; this->atomParameters = &atomParameters[0];
this->C6params = &C6params[0];
this->exclusions = &exclusions[0]; this->exclusions = &exclusions[0];
this->threadForce = &threadForce; this->threadForce = &threadForce;
includeEnergy = (totalEnergy != NULL); includeEnergy = (totalEnergy != NULL);
...@@ -318,8 +395,7 @@ void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const ...@@ -318,8 +395,7 @@ void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const
// Signal the threads to start running and wait for them to finish. // Signal the threads to start running and wait for them to finish.
ComputeDirectTask task(*this); threads.execute([&] (ThreadPool& threads, int threadIndex) { threadComputeDirect(threads, threadIndex); });
threads.execute(task);
threads.waitForThreads(); threads.waitForThreads();
// Signal the threads to subtract the exclusions. // Signal the threads to subtract the exclusions.
...@@ -350,9 +426,8 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex ...@@ -350,9 +426,8 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
float* forces = &(*threadForce)[threadIndex][0]; float* forces = &(*threadForce)[threadIndex][0];
fvec4 boxSize(periodicBoxVectors[0][0], periodicBoxVectors[1][1], periodicBoxVectors[2][2], 0); fvec4 boxSize(periodicBoxVectors[0][0], periodicBoxVectors[1][1], periodicBoxVectors[2][2], 0);
fvec4 invBoxSize(recipBoxSize[0], recipBoxSize[1], recipBoxSize[2], 0); fvec4 invBoxSize(recipBoxSize[0], recipBoxSize[1], recipBoxSize[2], 0);
if (ewald || pme) { if (ewald || pme || ljpme) {
// Compute the interactions from the neighbor list. // Compute the interactions from the neighbor list.
while (true) { while (true) {
int nextBlock = gmx_atomic_fetch_add(reinterpret_cast<gmx_atomic_t*>(atomicCounter), 1); int nextBlock = gmx_atomic_fetch_add(reinterpret_cast<gmx_atomic_t*>(atomicCounter), 1);
if (nextBlock >= neighborList->getNumBlocks()) if (nextBlock >= neighborList->getNumBlocks())
...@@ -370,7 +445,7 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex ...@@ -370,7 +445,7 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
break; break;
int end = min(start+groupSize, numberOfAtoms); int end = min(start+groupSize, numberOfAtoms);
for (int i = start; i < end; i++) { for (int i = start; i < end; i++) {
fvec4 posI((float) atomCoordinates[i][0], (float) atomCoordinates[i][1], (float) atomCoordinates[i][2], 0.0f); fvec4 posI((float) atomCoordinates[i][0], (float) atomCoordinates[i][1], (float) atomCoordinates[i][2], 0.0f);
float scaledChargeI = (float) (ONE_4PI_EPS0*posq[4*i+3]); float scaledChargeI = (float) (ONE_4PI_EPS0*posq[4*i+3]);
for (set<int>::const_iterator iter = exclusions[i].begin(); iter != exclusions[i].end(); ++iter) { for (set<int>::const_iterator iter = exclusions[i].begin(); iter != exclusions[i].end(); ++iter) {
if (*iter > i) { if (*iter > i) {
...@@ -394,7 +469,18 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex ...@@ -394,7 +469,18 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
threadEnergy[threadIndex] -= chargeProdOverR*erfAlphaR; threadEnergy[threadIndex] -= chargeProdOverR*erfAlphaR;
} }
else if (includeEnergy) else if (includeEnergy)
threadEnergy[threadIndex] -= alphaEwald*TWO_OVER_SQRT_PI*scaledChargeI*posq[4*j+3]; threadEnergy[threadIndex] -= alphaEwald*TWO_OVER_SQRT_PI*scaledChargeI*posq[4*j+3];
if (ljpme) {
float C6ij = C6params[i]*C6params[j];
float inverseR2 = 1.0f/r2;
float emult = C6ij*inverseR2*inverseR2*inverseR2*exptermsApprox(r);
if(includeEnergy)
threadEnergy[threadIndex] += emult;
float dEdR = -6.0f*C6ij*inverseR2*inverseR2*inverseR2*inverseR2*dExptermsApprox(r);
fvec4 result = deltaR*dEdR;
(fvec4(forces+4*i)-result).store(forces+4*i);
(fvec4(forces+4*j)+result).store(forces+4*j);
}
} }
} }
} }
...@@ -444,7 +530,7 @@ void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* forces, double* t ...@@ -444,7 +530,7 @@ void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* forces, double* t
} }
float sig = atomParameters[ii].first + atomParameters[jj].first; float sig = atomParameters[ii].first + atomParameters[jj].first;
float sig2 = inverseR*sig; float sig2 = inverseR*sig;
sig2 *= sig2; sig2 *= sig2;
float sig6 = sig2*sig2*sig2; float sig6 = sig2*sig2*sig2;
float eps = atomParameters[ii].second*atomParameters[jj].second; float eps = atomParameters[ii].second*atomParameters[jj].second;
...@@ -476,7 +562,7 @@ void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* forces, double* t ...@@ -476,7 +562,7 @@ void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* forces, double* t
fvec4 result = deltaR*dEdR; fvec4 result = deltaR*dEdR;
(fvec4(forces+4*ii)+result).store(forces+4*ii); (fvec4(forces+4*ii)+result).store(forces+4*ii);
(fvec4(forces+4*jj)-result).store(forces+4*jj); (fvec4(forces+4*jj)-result).store(forces+4*jj);
} }
void CpuNonbondedForce::getDeltaR(const fvec4& posI, const fvec4& posJ, fvec4& deltaR, float& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const { void CpuNonbondedForce::getDeltaR(const fvec4& posI, const fvec4& posJ, fvec4& deltaR, float& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const {
deltaR = posJ-posI; deltaR = posJ-posI;
...@@ -502,3 +588,18 @@ float CpuNonbondedForce::erfcApprox(float x) { ...@@ -502,3 +588,18 @@ float CpuNonbondedForce::erfcApprox(float x) {
return coeff1*erfcTable[index] + coeff2*erfcTable[index+1]; return coeff1*erfcTable[index] + coeff2*erfcTable[index+1];
} }
float CpuNonbondedForce::exptermsApprox(float x) {
float x1 = x*exptermsDXInv;
int index = min((int) floor(x1), NUM_TABLE_POINTS);
float coeff2 = x1-index;
float coeff1 = 1.0f-coeff2;
return coeff1*exptermsTable[index] + coeff2*exptermsTable[index+1];
}
float CpuNonbondedForce::dExptermsApprox(float x) {
float x1 = x*exptermsDXInv;
int index = min((int) floor(x1), NUM_TABLE_POINTS);
float coeff2 = x1-index;
float coeff1 = 1.0f-coeff2;
return coeff1*dExptermsTable[index] + coeff2*dExptermsTable[index+1];
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment