Commit cd874b2b authored by peastman's avatar peastman
Browse files

Merged changes from main branch

parents a783b996 b84e22ba
/* Portions copyright (c) 2009-2016 Stanford University and Simbios.
/* Portions copyright (c) 2009-2017 Stanford University and Simbios.
* Contributors: Peter Eastman
*
* Permission is hereby granted, free of charge, to any person obtaining
......@@ -122,7 +122,6 @@ class CpuCustomNonbondedForce {
double* fixedParameters, const std::map<std::string, double>& globalParameters,
std::vector<AlignedArray<float> >& threadForce, bool includeForce, bool includeEnergy, double& totalEnergy, double* energyParamDerivs);
private:
class ComputeForceTask;
class ThreadData;
bool cutoff;
......
/* Portions copyright (c) 2006-2013 Stanford University and Simbios.
/* Portions copyright (c) 2006-2017 Stanford University and Simbios.
* Contributors: Pande Group
*
* Permission is hereby granted, free of charge, to any person obtaining
......@@ -36,7 +36,6 @@ namespace OpenMM {
class CpuGBSAOBCForce {
public:
class ComputeTask;
CpuGBSAOBCForce();
/**
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2016 Stanford University and the Authors. *
* Portions copyright (c) 2016-2017 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -45,7 +45,6 @@ namespace OpenMM {
class CpuGayBerneForce {
public:
struct Matrix;
class ComputeTask;
/**
* Constructor.
......
......@@ -54,8 +54,6 @@ namespace OpenMM {
*/
class CpuCalcForcesAndEnergyKernel : public CalcForcesAndEnergyKernel {
public:
class InitForceTask;
class SumForceTask;
CpuCalcForcesAndEnergyKernel(std::string name, const Platform& platform, CpuPlatform::PlatformData& data, ContextImpl& context);
/**
* Initialize the kernel.
......@@ -251,27 +249,37 @@ public:
void copyParametersToContext(ContextImpl& context, const NonbondedForce& force);
/**
* Get the parameters being used for PME.
*
*
* @param alpha the separation parameter
* @param nx the number of grid points along the X axis
* @param ny the number of grid points along the Y axis
* @param nz the number of grid points along the Z axis
*/
void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
/**
* Get the parameters being used for the dispersion term in LJPME.
*
* @param alpha the separation parameter
* @param nx the number of grid points along the X axis
* @param ny the number of grid points along the Y axis
* @param nz the number of grid points along the Z axis
*/
void getLJPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
private:
class PmeIO;
CpuPlatform::PlatformData& data;
int numParticles, num14;
int **bonded14IndexArray;
double **bonded14ParamArray;
double nonbondedCutoff, switchingDistance, rfDielectric, ewaldAlpha, ewaldSelfEnergy, dispersionCoefficient;
int kmax[3], gridSize[3];
bool useSwitchingFunction, useOptimizedPme, hasInitializedPme;
double nonbondedCutoff, switchingDistance, rfDielectric, ewaldAlpha, ewaldDispersionAlpha, ewaldSelfEnergy, dispersionCoefficient;
int kmax[3], gridSize[3], dispersionGridSize[3];
bool useSwitchingFunction, useOptimizedPme, hasInitializedPme, hasInitializedDispersionPme;
std::vector<std::set<int> > exclusions;
std::vector<std::pair<float, float> > particleParams;
std::vector<float> C6params;
NonbondedMethod nonbondedMethod;
CpuNonbondedForce* nonbonded;
Kernel optimizedPme;
Kernel optimizedPme, optimizedDispersionPme;
CpuBondForce bondForce;
};
......
/* Portions copyright (c) 2013-2016 Stanford University and Simbios.
/* Portions copyright (c) 2013-2017 Stanford University and Simbios.
* Authors: Peter Eastman
* Contributors:
*
......@@ -35,9 +35,6 @@ namespace OpenMM {
class CpuLangevinDynamics : public ReferenceStochasticDynamics {
public:
class Update1Task;
class Update2Task;
class Update3Task;
/**
* Constructor.
*
......
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013-2016 Stanford University and the Authors. *
* Portions copyright (c) 2013-2017 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -45,7 +45,6 @@ namespace OpenMM {
class OPENMM_EXPORT_CPU CpuNeighborList {
public:
class ThreadTask;
class Voxels;
CpuNeighborList(int blockSize);
void computeNeighborList(int numAtoms, const AlignedArray<float>& atomLocations, const std::vector<std::set<int> >& exclusions,
......
/* Portions copyright (c) 2006-2015 Stanford University and Simbios.
/* Portions copyright (c) 2006-2017 Stanford University and Simbios.
* Contributors: Pande Group
*
* Permission is hereby granted, free of charge, to any person obtaining
......@@ -39,7 +39,6 @@ namespace OpenMM {
class CpuNonbondedForce {
public:
class ComputeDirectTask;
/**---------------------------------------------------------------------------------------
......@@ -104,16 +103,27 @@ class CpuNonbondedForce {
/**---------------------------------------------------------------------------------------
Set the force to use Particle-Mesh Ewald (PME) summation.
@param alpha the Ewald separation parameter
@param gridSize the dimensions of the mesh
--------------------------------------------------------------------------------------- */
void setUsePME(float alpha, int meshSize[3]);
/**---------------------------------------------------------------------------------------
Set the force to use Particle-Mesh Ewald (PME) summation for dispersion.
@param alpha the Ewald separation parameter
@param gridSize the dimensions of the mesh
--------------------------------------------------------------------------------------- */
void setUseLJPME(float alpha, int meshSize[3]);
/**---------------------------------------------------------------------------------------
Calculate Ewald ixn
......@@ -122,16 +132,17 @@ class CpuNonbondedForce {
@param posq atom coordinates and charges
@param atomCoordinates atom coordinates (in format needed by PME)
@param atomParameters atom parameters (sigma/2, 2*sqrt(epsilon))
@param C6Paramrs C6 parameters for multiplicative representation of dispersion
@param exclusions atom exclusion indices
exclusions[atomIndex] contains the list of exclusions for that atom
@param forces force array (forces added)
@param totalEnergy total energy
--------------------------------------------------------------------------------------- */
void calculateReciprocalIxn(int numberOfAtoms, float* posq, const std::vector<Vec3>& atomCoordinates,
const std::vector<std::pair<float, float> >& atomParameters, const std::vector<std::set<int> >& exclusions,
std::vector<Vec3>& forces, double* totalEnergy) const;
const std::vector<std::pair<float, float> >& atomParameters, const std::vector<float> &C6params,
const std::vector<std::set<int> >& exclusions, std::vector<Vec3>& forces, double* totalEnergy) const;
/**---------------------------------------------------------------------------------------
......@@ -150,7 +161,7 @@ class CpuNonbondedForce {
--------------------------------------------------------------------------------------- */
void calculateDirectIxn(int numberOfAtoms, float* posq, const std::vector<Vec3>& atomCoordinates, const std::vector<std::pair<float, float> >& atomParameters,
const std::vector<std::set<int> >& exclusions, std::vector<AlignedArray<float> >& threadForce, double* totalEnergy, ThreadPool& threads);
const std::vector<float>& C6params, const std::vector<std::set<int> >& exclusions, std::vector<AlignedArray<float> >& threadForce, double* totalEnergy, ThreadPool& threads);
/**
* This routine contains the code executed by each thread.
......@@ -163,28 +174,32 @@ protected:
bool periodic;
bool triclinic;
bool ewald;
bool pme;
bool tableIsValid;
bool ljpme, pme;
bool tableIsValid, expTableIsValid;
const CpuNeighborList* neighborList;
float recipBoxSize[3];
Vec3 periodicBoxVectors[3];
AlignedArray<fvec4> periodicBoxVec4;
float cutoffDistance, switchingDistance;
float krf, crf;
float alphaEwald;
float alphaEwald, alphaDispersionEwald;
int numRx, numRy, numRz;
int meshDim[3];
int meshDim[3], dispersionMeshDim[3];
std::vector<float> erfcTable, ewaldScaleTable;
float ewaldDX, ewaldDXInv, erfcDXInv;
std::vector<float> exptermsTable, dExptermsTable;
float ewaldDX, ewaldDXInv, erfcDXInv, exptermsDX, exptermsDXInv;
std::vector<double> threadEnergy;
// The following variables are used to make information accessible to the individual threads.
int numberOfAtoms;
float* posq;
Vec3 const* atomCoordinates;
std::pair<float, float> const* atomParameters;
float const *C6params;
std::set<int> const* exclusions;
std::vector<AlignedArray<float> >* threadForce;
bool includeEnergy;
float inverseRcut6;
float inverseRcut6Expterm;
void* atomicCounter;
static const float TWO_OVER_SQRT_PI;
......@@ -238,10 +253,29 @@ protected:
*/
void tabulateEwaldScaleFactor();
/**
* Create a lookup table for the scale factor used with dispersion PME.
*/
void tabulateExpTerms();
/**
* Compute a fast approximation to erfc(x).
*/
float erfcApprox(float x);
/**
* Compute a fast approximation to (1.0 - EXP(-dar^2) * (1.0 + dar^2 + 0.5*dar^4))
* where dar = (dispersionAlpha * R)
* needed for LJPME energies.
*/
float exptermsApprox(float R);
/**
* Compute a fast approximation to (1.0 - EXP(-dar^2) * (1.0 + dar^2 + 0.5*dar^4 + dar^6/6.0))
* where dar = (dispersionAlpha * R)
* needed for LJPME forces.
*/
float dExptermsApprox(float R);
};
} // namespace OpenMM
......
......@@ -88,11 +88,25 @@ protected:
* Compute a fast approximation to erfc(x).
*/
fvec4 erfcApprox(const fvec4& x);
/**
* Evaluate the scale factor used with Ewald and PME: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
*/
fvec4 ewaldScaleFunction(const fvec4& x);
/**
* Compute a fast approximation to (1.0 - EXP(-dar^2) * (1.0 + dar^2 + 0.5*dar^4))
* where dar = (dispersionAlpha * R)
* needed for LJPME energies.
*/
fvec4 exptermsApprox(const fvec4& R);
/**
* Compute a fast approximation to (1.0 - EXP(-dar^2) * (1.0 + dar^2 + 0.5*dar^4 + dar^6/6.0))
* where dar = (dispersionAlpha * R)
* needed for LJPME forces.
*/
fvec4 dExptermsApprox(const fvec4& R);
};
} // namespace OpenMM
......
......@@ -92,6 +92,21 @@ protected:
* Evaluate the scale factor used with Ewald and PME: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
*/
fvec8 ewaldScaleFunction(const fvec8& x);
/**
* Compute a fast approximation to (1.0 - EXP(-dar^2) * (1.0 + dar^2 + 0.5*dar^4))
* where dar = (dispersionAlpha * R)
* needed for LJPME energies.
*/
fvec8 exptermsApprox(const fvec8& R);
/**
* Compute a fast approximation to (1.0 - EXP(-dar^2) * (1.0 + dar^2 + 0.5*dar^4 + dar^6/6.0))
* where dar = (dispersionAlpha * R)
* needed for LJPME forces.
*/
fvec8 dExptermsApprox(const fvec8& R);
};
} // namespace OpenMM
......
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013 Stanford University and the Authors. *
* Portions copyright (c) 2013-2017 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -45,8 +45,6 @@ namespace OpenMM {
*/
class OPENMM_EXPORT_CPU CpuSETTLE : public ReferenceConstraintAlgorithm {
public:
class ApplyToPositionsTask;
class ApplyToVelocitiesTask;
CpuSETTLE(const System& system, const ReferenceSETTLEAlgorithm& settle, ThreadPool& threads);
~CpuSETTLE();
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2014-2016 Stanford University and the Authors. *
* Portions copyright (c) 2014-2017 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -35,25 +35,6 @@
using namespace OpenMM;
using namespace std;
class CpuBondForce::ComputeForceTask : public ThreadPool::Task {
public:
ComputeForceTask(CpuBondForce& owner, vector<Vec3>& atomCoordinates, double** parameters, vector<Vec3>& forces,
vector<double>& threadEnergy, double* totalEnergy, ReferenceBondIxn& referenceBondIxn) : owner(owner), atomCoordinates(atomCoordinates),
parameters(parameters), forces(forces), threadEnergy(threadEnergy), totalEnergy(totalEnergy), referenceBondIxn(referenceBondIxn) {
}
void execute(ThreadPool& threads, int threadIndex) {
double* energy = (totalEnergy == NULL ? NULL : &threadEnergy[threadIndex]);
owner.threadComputeForce(threads, threadIndex, atomCoordinates, parameters, forces, energy, referenceBondIxn);
}
CpuBondForce& owner;
vector<Vec3>& atomCoordinates;
double** parameters;
vector<Vec3>& forces;
vector<double>& threadEnergy;
double* totalEnergy;
ReferenceBondIxn& referenceBondIxn;
};
CpuBondForce::CpuBondForce() {
}
......@@ -188,8 +169,10 @@ void CpuBondForce::calculateForce(vector<Vec3>& atomCoordinates, double** parame
// Have the worker threads compute their forces.
vector<double> threadEnergy(threads->getNumThreads(), 0);
ComputeForceTask task(*this, atomCoordinates, parameters, forces, threadEnergy, totalEnergy, referenceBondIxn);
threads->execute(task);
threads->execute([&] (ThreadPool& threads, int threadIndex) {
double* energy = (totalEnergy == NULL ? NULL : &threadEnergy[threadIndex]);
threadComputeForce(threads, threadIndex, atomCoordinates, parameters, forces, energy, referenceBondIxn);
});
threads->waitForThreads();
// Compute any "extra" bonds.
......
/* Portions copyright (c) 2009-2016 Stanford University and Simbios.
/* Portions copyright (c) 2009-2017 Stanford University and Simbios.
* Contributors: Peter Eastman
*
* Permission is hereby granted, free of charge, to any person obtaining
......@@ -33,16 +33,6 @@
using namespace OpenMM;
using namespace std;
class CpuCustomGBForce::ComputeForceTask : public ThreadPool::Task {
public:
ComputeForceTask(CpuCustomGBForce& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadComputeForce(threads, threadIndex);
}
CpuCustomGBForce& owner;
};
CpuCustomGBForce::ThreadData::ThreadData(int numAtoms, int numThreads, int threadIndex,
const vector<Lepton::CompiledExpression>& valueExpressions,
const vector<vector<Lepton::CompiledExpression> >& valueDerivExpressions,
......@@ -206,7 +196,7 @@ void CpuCustomGBForce::calculateIxn(int numberOfAtoms, float* posq, double** ato
// Calculate the first computed value.
ComputeForceTask task(*this);
auto task = [&] (ThreadPool& threads, int threadIndex) { threadComputeForce(threads, threadIndex); };
gmx_atomic_set(&counter, 0);
threads.execute(task);
threads.waitForThreads();
......
/* Portions copyright (c) 2009-2014 Stanford University and Simbios.
/* Portions copyright (c) 2009-2017 Stanford University and Simbios.
* Contributors: Peter Eastman
*
* Permission is hereby granted, free of charge, to any person obtaining
......@@ -37,16 +37,6 @@
using namespace OpenMM;
using namespace std;
class CpuCustomManyParticleForce::ComputeForceTask : public ThreadPool::Task {
public:
ComputeForceTask(CpuCustomManyParticleForce& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadComputeForce(threads, threadIndex);
}
CpuCustomManyParticleForce& owner;
};
CpuCustomManyParticleForce::CpuCustomManyParticleForce(const CustomManyParticleForce& force, ThreadPool& threads) :
threads(threads), useCutoff(false), usePeriodic(false), neighborList(NULL) {
numParticles = force.getNumParticles();
......@@ -141,8 +131,7 @@ void CpuCustomManyParticleForce::calculateIxn(AlignedArray<float>& posq, double*
// Signal the threads to start running and wait for them to finish.
ComputeForceTask task(*this);
threads.execute(task);
threads.execute([&] (ThreadPool& threads, int threadIndex) { threadComputeForce(threads, threadIndex); });
threads.waitForThreads();
// Combine the energies from all the threads.
......
/* Portions copyright (c) 2009-2016 Stanford University and Simbios.
/* Portions copyright (c) 2009-2017 Stanford University and Simbios.
* Contributors: Peter Eastman
*
* Permission is hereby granted, free of charge, to any person obtaining
......@@ -33,16 +33,6 @@
using namespace OpenMM;
using namespace std;
class CpuCustomNonbondedForce::ComputeForceTask : public ThreadPool::Task {
public:
ComputeForceTask(CpuCustomNonbondedForce& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadComputeForce(threads, threadIndex);
}
CpuCustomNonbondedForce& owner;
};
CpuCustomNonbondedForce::ThreadData::ThreadData(const Lepton::CompiledExpression& energyExpression, const Lepton::CompiledExpression& forceExpression,
const vector<string>& parameterNames, const std::vector<Lepton::CompiledExpression> energyParamDerivExpressions) :
energyExpression(energyExpression), forceExpression(forceExpression), energyParamDerivExpressions(energyParamDerivExpressions) {
......@@ -150,8 +140,7 @@ void CpuCustomNonbondedForce::calculatePairIxn(int numberOfAtoms, float* posq, v
// Signal the threads to start running and wait for them to finish.
ComputeForceTask task(*this);
threads.execute(task);
threads.execute([&] (ThreadPool& threads, int threadIndex) { threadComputeForce(threads, threadIndex); });
threads.waitForThreads();
// Combine the energies from all the threads.
......
/* Portions copyright (c) 2006-2016 Stanford University and Simbios.
/* Portions copyright (c) 2006-2017 Stanford University and Simbios.
* Contributors: Pande Group
*
* Permission is hereby granted, free of charge, to any person obtaining
......@@ -36,16 +36,6 @@ const int CpuGBSAOBCForce::NUM_TABLE_POINTS = 4096;
const float CpuGBSAOBCForce::TABLE_MIN = 0.25f;
const float CpuGBSAOBCForce::TABLE_MAX = 1.5f;
class CpuGBSAOBCForce::ComputeTask : public ThreadPool::Task {
public:
ComputeTask(CpuGBSAOBCForce& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadComputeForce(threads, threadIndex);
}
CpuGBSAOBCForce& owner;
};
CpuGBSAOBCForce::CpuGBSAOBCForce() : cutoff(false), periodic(false) {
logDX = (TABLE_MAX-TABLE_MIN)/NUM_TABLE_POINTS;
logDXInv = 1.0f/logDX;
......@@ -110,9 +100,8 @@ void CpuGBSAOBCForce::computeForce(const AlignedArray<float>& posq, vector<Align
// Signal the threads to start running and wait for them to finish.
ComputeTask task(*this);
gmx_atomic_set(&counter, 0);
threads.execute(task);
threads.execute([&] (ThreadPool& threads, int threadIndex) { threadComputeForce(threads, threadIndex); });
threads.waitForThreads(); // Compute Born radii
gmx_atomic_set(&counter, 0);
threads.resumeThreads();
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2016 Stanford University and the Authors. *
* Portions copyright (c) 2016-2017 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -44,17 +44,6 @@
using namespace OpenMM;
using namespace std;
class CpuGayBerneForce::ComputeTask : public ThreadPool::Task {
public:
ComputeTask(CpuGayBerneForce& owner, CpuNeighborList* neighborList) : owner(owner), neighborList(neighborList) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadComputeForce(threads, threadIndex, neighborList);
}
CpuGayBerneForce& owner;
CpuNeighborList* neighborList;
};
CpuGayBerneForce::CpuGayBerneForce(const GayBerneForce& force) {
// Record the force parameters.
......@@ -137,8 +126,7 @@ double CpuGayBerneForce::calculateForce(const vector<Vec3>& positions, std::vect
// Signal the threads to compute the pairwise interactions.
ComputeTask task(*this, data.neighborList);
threads.execute(task);
threads.execute([&] (ThreadPool& threads, int threadIndex) { threadComputeForce(threads, threadIndex, data.neighborList); });
threads.waitForThreads();
// Signal the threads to compute exceptions.
......
......@@ -50,6 +50,7 @@
#include "lepton/CustomFunction.h"
#include "lepton/Operation.h"
#include "lepton/Parser.h"
#include <iostream>
#include "lepton/ParsedExpression.h"
using namespace OpenMM;
......@@ -137,35 +138,27 @@ static double computeShiftedKineticEnergy(ContextImpl& context, vector<double>&
return 0.5*energy;
}
class CpuCalcForcesAndEnergyKernel::SumForceTask : public ThreadPool::Task {
public:
SumForceTask(int numParticles, vector<Vec3>& forceData, CpuPlatform::PlatformData& data) : numParticles(numParticles), forceData(forceData), data(data) {
}
void execute(ThreadPool& threads, int threadIndex) {
// Sum the contributions to forces that have been calculated by different threads.
int numThreads = threads.getNumThreads();
int start = threadIndex*numParticles/numThreads;
int end = (threadIndex+1)*numParticles/numThreads;
for (int i = start; i < end; i++) {
fvec4 f(0.0f);
for (int j = 0; j < numThreads; j++)
f += fvec4(&data.threadForce[j][4*i]);
forceData[i][0] += f[0];
forceData[i][1] += f[1];
forceData[i][2] += f[2];
}
}
int numParticles;
vector<Vec3>& forceData;
CpuPlatform::PlatformData& data;
};
CpuCalcForcesAndEnergyKernel::CpuCalcForcesAndEnergyKernel(std::string name, const Platform& platform, CpuPlatform::PlatformData& data, ContextImpl& context) :
CalcForcesAndEnergyKernel(name, platform), data(data) {
// Create a Reference platform version of this kernel.
ReferenceKernelFactory referenceFactory;
referenceKernel = Kernel(referenceFactory.createKernelImpl(name, platform, context));
}
class CpuCalcForcesAndEnergyKernel::InitForceTask : public ThreadPool::Task {
public:
InitForceTask(int numParticles, ContextImpl& context, CpuPlatform::PlatformData& data) : numParticles(numParticles), positionsValid(true), context(context), data(data) {
}
void execute(ThreadPool& threads, int threadIndex) {
void CpuCalcForcesAndEnergyKernel::initialize(const System& system) {
referenceKernel.getAs<ReferenceCalcForcesAndEnergyKernel>().initialize(system);
lastPositions.resize(system.getNumParticles(), Vec3(1e10, 1e10, 1e10));
}
void CpuCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups) {
referenceKernel.getAs<ReferenceCalcForcesAndEnergyKernel>().beginComputation(context, includeForce, includeEnergy, groups);
// Convert positions to single precision and clear the forces.
int numParticles = context.getSystem().getNumParticles();
bool positionsValid = true;
data.threads.execute([&] (ThreadPool& threads, int threadIndex) {
// Convert the positions to single precision and apply periodic boundary conditions
AlignedArray<float>& posq = data.posq;
......@@ -218,36 +211,9 @@ public:
fvec4 zero(0.0f);
for (int j = 0; j < numParticles; j++)
zero.store(&data.threadForce[threadIndex][j*4]);
}
int numParticles;
bool positionsValid;
ContextImpl& context;
CpuPlatform::PlatformData& data;
};
CpuCalcForcesAndEnergyKernel::CpuCalcForcesAndEnergyKernel(std::string name, const Platform& platform, CpuPlatform::PlatformData& data, ContextImpl& context) :
CalcForcesAndEnergyKernel(name, platform), data(data) {
// Create a Reference platform version of this kernel.
ReferenceKernelFactory referenceFactory;
referenceKernel = Kernel(referenceFactory.createKernelImpl(name, platform, context));
}
void CpuCalcForcesAndEnergyKernel::initialize(const System& system) {
referenceKernel.getAs<ReferenceCalcForcesAndEnergyKernel>().initialize(system);
lastPositions.resize(system.getNumParticles(), Vec3(1e10, 1e10, 1e10));
}
void CpuCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups) {
referenceKernel.getAs<ReferenceCalcForcesAndEnergyKernel>().beginComputation(context, includeForce, includeEnergy, groups);
// Convert positions to single precision and clear the forces.
int numParticles = context.getSystem().getNumParticles();
InitForceTask task(numParticles, context, data);
data.threads.execute(task);
});
data.threads.waitForThreads();
if (!task.positionsValid)
if (!positionsValid)
throw OpenMMException("Particle coordinate is nan");
// Determine whether we need to recompute the neighbor list.
......@@ -302,8 +268,23 @@ void CpuCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool i
double CpuCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups, bool& valid) {
// Sum the forces from all the threads.
SumForceTask task(context.getSystem().getNumParticles(), extractForces(context), data);
data.threads.execute(task);
data.threads.execute([&] (ThreadPool& threads, int threadIndex) {
// Sum the contributions to forces that have been calculated by different threads.
int numParticles = context.getSystem().getNumParticles();
int numThreads = threads.getNumThreads();
int start = threadIndex*numParticles/numThreads;
int end = (threadIndex+1)*numParticles/numThreads;
vector<Vec3>& forceData = extractForces(context);
for (int i = start; i < end; i++) {
fvec4 f(0.0f);
for (int j = 0; j < numThreads; j++)
f += fvec4(&data.threadForce[j][4*i]);
forceData[i][0] += f[0];
forceData[i][1] += f[1];
forceData[i][2] += f[2];
}
});
data.threads.waitForThreads();
return referenceKernel.getAs<ReferenceCalcForcesAndEnergyKernel>().finishComputation(context, includeForce, includeEnergy, groups, valid);
}
......@@ -528,7 +509,7 @@ CpuNonbondedForce* createCpuNonbondedForceVec4();
CpuNonbondedForce* createCpuNonbondedForceVec8();
CpuCalcNonbondedForceKernel::CpuCalcNonbondedForceKernel(string name, const Platform& platform, CpuPlatform::PlatformData& data) : CalcNonbondedForceKernel(name, platform),
data(data), bonded14IndexArray(NULL), bonded14ParamArray(NULL), hasInitializedPme(false), nonbonded(NULL) {
data(data), bonded14IndexArray(NULL), bonded14ParamArray(NULL), hasInitializedPme(false), hasInitializedDispersionPme(false), nonbonded(NULL) {
if (isVec8Supported())
nonbonded = createCpuNonbondedForceVec8();
else
......@@ -575,12 +556,14 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
for (int i = 0; i < num14; i++)
bonded14ParamArray[i] = new double[3];
particleParams.resize(numParticles);
C6params.resize(numParticles);
double sumSquaredCharges = 0.0;
for (int i = 0; i < numParticles; ++i) {
double charge, radius, depth;
force.getParticleParameters(i, charge, radius, depth);
data.posq[4*i+3] = (float) charge;
particleParams[i] = make_pair((float) (0.5*radius), (float) (2.0*sqrt(depth)));
C6params[i] = 8.0*pow(particleParams[i].first, 3.0) * particleParams[i].second;
sumSquaredCharges += charge*charge;
}
......@@ -616,19 +599,35 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
}
else if (nonbondedMethod == PME) {
double alpha;
NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSize[0], gridSize[1], gridSize[2]);
NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSize[0], gridSize[1], gridSize[2], false);
ewaldAlpha = alpha;
}
if (nonbondedMethod == Ewald || nonbondedMethod == PME)
else if (nonbondedMethod == LJPME) {
double alpha;
NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSize[0], gridSize[1], gridSize[2], false);
ewaldAlpha = alpha;
NonbondedForceImpl::calcPMEParameters(system, force, alpha, dispersionGridSize[0], dispersionGridSize[1], dispersionGridSize[2], true);
ewaldDispersionAlpha = alpha;
useSwitchingFunction = false;
}
if (nonbondedMethod == Ewald || nonbondedMethod == PME || nonbondedMethod == LJPME) {
ewaldSelfEnergy = -ONE_4PI_EPS0*ewaldAlpha*sumSquaredCharges/sqrt(M_PI);
else
if(nonbondedMethod == LJPME){
for (int atom = 0; atom < numParticles; atom++) {
// Dispersion self term
ewaldSelfEnergy += pow(ewaldDispersionAlpha, 6.0) * C6params[atom]*C6params[atom] / 12.0;
}
}
} else {
ewaldSelfEnergy = 0.0;
}
rfDielectric = force.getReactionFieldDielectric();
if (force.getUseDispersionCorrection())
dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(system, force);
else
dispersionCoefficient = 0.0;
data.isPeriodic = (nonbondedMethod == CutoffPeriodic || nonbondedMethod == Ewald || nonbondedMethod == PME);
data.isPeriodic = (nonbondedMethod == CutoffPeriodic || nonbondedMethod == Ewald || nonbondedMethod == PME || nonbondedMethod == LJPME);
}
double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy, bool includeDirect, bool includeReciprocal) {
......@@ -646,6 +645,20 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
optimizedPme.getAs<CalcPmeReciprocalForceKernel>().initialize(gridSize[0], gridSize[1], gridSize[2], numParticles, ewaldAlpha);
}
}
if (nonbondedMethod == LJPME) {
// If available, use the optimized PME implementation.
vector<string> kernelNames;
kernelNames.push_back("CalcPmeReciprocalForce");
useOptimizedPme = getPlatform().supportsKernels(kernelNames);
if (useOptimizedPme) {
optimizedPme = getPlatform().createKernel(CalcPmeReciprocalForceKernel::Name(), context);
optimizedPme.getAs<CalcPmeReciprocalForceKernel>().initialize(gridSize[0], gridSize[1], gridSize[2], numParticles, ewaldAlpha);
optimizedDispersionPme = getPlatform().createKernel(CalcDispersionPmeReciprocalForceKernel::Name(), context);
optimizedDispersionPme.getAs<CalcDispersionPmeReciprocalForceKernel>().initialize(dispersionGridSize[0], dispersionGridSize[1],
dispersionGridSize[2], numParticles, ewaldDispersionAlpha);
}
}
}
AlignedArray<float>& posq = data.posq;
vector<Vec3>& posData = extractPositions(context);
......@@ -654,6 +667,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
double energy = (includeReciprocal ? ewaldSelfEnergy : 0.0);
bool ewald = (nonbondedMethod == Ewald);
bool pme = (nonbondedMethod == PME);
bool ljpme = (nonbondedMethod == LJPME);
if (nonbondedMethod != NoCutoff)
nonbonded->setUseCutoff(nonbondedCutoff, *data.neighborList, rfDielectric);
if (data.isPeriodic) {
......@@ -669,9 +683,13 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
nonbonded->setUsePME(ewaldAlpha, gridSize);
if (useSwitchingFunction)
nonbonded->setUseSwitchingFunction(switchingDistance);
if (ljpme){
nonbonded->setUsePME(ewaldAlpha, gridSize);
nonbonded->setUseLJPME(ewaldDispersionAlpha, dispersionGridSize);
}
double nonbondedEnergy = 0;
if (includeDirect)
nonbonded->calculateDirectIxn(numParticles, &posq[0], posData, particleParams, exclusions, data.threadForce, includeEnergy ? &nonbondedEnergy : NULL, data.threads);
nonbonded->calculateDirectIxn(numParticles, &posq[0], posData, particleParams, C6params, exclusions, data.threadForce, includeEnergy ? &nonbondedEnergy : NULL, data.threads);
if (includeReciprocal) {
if (useOptimizedPme) {
PmeIO io(&posq[0], &data.threadForce[0][0], numParticles);
......@@ -680,13 +698,13 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
nonbondedEnergy += optimizedPme.getAs<CalcPmeReciprocalForceKernel>().finishComputation(io);
}
else
nonbonded->calculateReciprocalIxn(numParticles, &posq[0], posData, particleParams, exclusions, forceData, includeEnergy ? &nonbondedEnergy : NULL);
nonbonded->calculateReciprocalIxn(numParticles, &posq[0], posData, particleParams, C6params, exclusions, forceData, includeEnergy ? &nonbondedEnergy : NULL);
}
energy += nonbondedEnergy;
if (includeDirect) {
ReferenceLJCoulomb14 nonbonded14;
bondForce.calculateForce(posData, bonded14ParamArray, forceData, includeEnergy ? &energy : NULL, nonbonded14);
if (data.isPeriodic)
if (data.isPeriodic && nonbondedMethod != LJPME)
energy += dispersionCoefficient/(boxVectors[0][0]*boxVectors[1][1]*boxVectors[2][2]);
}
return energy;
......@@ -739,7 +757,7 @@ void CpuCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
}
void CpuCalcNonbondedForceKernel::getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
if (nonbondedMethod != PME)
if (nonbondedMethod != PME && nonbondedMethod != LJPME)
throw OpenMMException("getPMEParametersInContext: This Context is not using PME");
if (useOptimizedPme)
optimizedPme.getAs<const CalcPmeReciprocalForceKernel>().getPMEParameters(alpha, nx, ny, nz);
......@@ -751,6 +769,19 @@ void CpuCalcNonbondedForceKernel::getPMEParameters(double& alpha, int& nx, int&
}
}
void CpuCalcNonbondedForceKernel::getLJPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
if (nonbondedMethod != LJPME)
throw OpenMMException("getPMEParametersInContext: This Context is not using PME");
if (useOptimizedPme)
optimizedDispersionPme.getAs<const CalcPmeReciprocalForceKernel>().getPMEParameters(alpha, nx, ny, nz);
else {
alpha = ewaldDispersionAlpha;
nx = dispersionGridSize[0];
ny = dispersionGridSize[1];
nz = dispersionGridSize[2];
}
}
CpuCalcCustomNonbondedForceKernel::CpuCalcCustomNonbondedForceKernel(string name, const Platform& platform, CpuPlatform::PlatformData& data) :
CalcCustomNonbondedForceKernel(name, platform), data(data), forceCopy(NULL), nonbonded(NULL) {
}
......
/* Portions copyright (c) 2006-2016 Stanford University and Simbios.
/* Portions copyright (c) 2006-2017 Stanford University and Simbios.
* Authors: Peter Eastman
* Contributors:
*
......@@ -29,36 +29,6 @@
using namespace OpenMM;
using namespace std;
class CpuLangevinDynamics::Update1Task : public ThreadPool::Task {
public:
Update1Task(CpuLangevinDynamics& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadUpdate1(threadIndex);
}
CpuLangevinDynamics& owner;
};
class CpuLangevinDynamics::Update2Task : public ThreadPool::Task {
public:
Update2Task(CpuLangevinDynamics& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadUpdate2(threadIndex);
}
CpuLangevinDynamics& owner;
};
class CpuLangevinDynamics::Update3Task : public ThreadPool::Task {
public:
Update3Task(CpuLangevinDynamics& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadUpdate3(threadIndex);
}
CpuLangevinDynamics& owner;
};
CpuLangevinDynamics::CpuLangevinDynamics(int numberOfAtoms, double deltaT, double friction, double temperature, ThreadPool& threads, CpuRandom& random) :
ReferenceStochasticDynamics(numberOfAtoms, deltaT, friction, temperature), threads(threads), random(random) {
}
......@@ -79,8 +49,7 @@ void CpuLangevinDynamics::updatePart1(int numberOfAtoms, vector<Vec3>& atomCoord
// Signal the threads to start running and wait for them to finish.
Update1Task task(*this);
threads.execute(task);
threads.execute([&] (ThreadPool& threads, int threadIndex) { threadUpdate1(threadIndex); });
threads.waitForThreads();
}
......@@ -97,8 +66,7 @@ void CpuLangevinDynamics::updatePart2(int numberOfAtoms, vector<Vec3>& atomCoord
// Signal the threads to start running and wait for them to finish.
Update2Task task(*this);
threads.execute(task);
threads.execute([&] (ThreadPool& threads, int threadIndex) { threadUpdate2(threadIndex); });
threads.waitForThreads();
}
......@@ -114,8 +82,7 @@ void CpuLangevinDynamics::updatePart3(int numberOfAtoms, vector<Vec3>& atomCoord
// Signal the threads to start running and wait for them to finish.
Update3Task task(*this);
threads.execute(task);
threads.execute([&] (ThreadPool& threads, int threadIndex) { threadUpdate3(threadIndex); });
threads.waitForThreads();
}
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013-2016 Stanford University and the Authors. *
* Portions copyright (c) 2013-2017 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -409,16 +409,6 @@ private:
vector<vector<vector<pair<float, int> > > > bins;
};
class CpuNeighborList::ThreadTask : public ThreadPool::Task {
public:
ThreadTask(CpuNeighborList& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadComputeNeighborList(threads, threadIndex);
}
CpuNeighborList& owner;
};
CpuNeighborList::CpuNeighborList(int blockSize) : blockSize(blockSize) {
}
......@@ -460,8 +450,7 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
// Sort the atoms based on a Hilbert curve.
atomBins.resize(numAtoms);
ThreadTask task(*this);
threads.execute(task);
threads.execute([&] (ThreadPool& threads, int threadIndex) { threadComputeNeighborList(threads, threadIndex); });
threads.waitForThreads();
sort(atomBins.begin(), atomBins.end());
......
/* Portions copyright (c) 2006-2015 Stanford University and Simbios.
/* Portions copyright (c) 2006-2017 Stanford University and Simbios.
* Contributors: Pande Group
*
* Permission is hereby granted, free of charge, to any person obtaining
......@@ -30,6 +30,7 @@
#include "ReferencePME.h"
#include "openmm/internal/gmx_atomic.h"
#include <algorithm>
#include <iostream>
// In case we're using some primitive version of Visual Studio this will
// make sure that erf() and erfc() are defined.
......@@ -41,23 +42,14 @@ using namespace OpenMM;
const float CpuNonbondedForce::TWO_OVER_SQRT_PI = (float) (2/sqrt(PI_M));
const int CpuNonbondedForce::NUM_TABLE_POINTS = 2048;
class CpuNonbondedForce::ComputeDirectTask : public ThreadPool::Task {
public:
ComputeDirectTask(CpuNonbondedForce& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadComputeDirect(threads, threadIndex);
}
CpuNonbondedForce& owner;
};
/**---------------------------------------------------------------------------------------
CpuNonbondedForce constructor
--------------------------------------------------------------------------------------- */
CpuNonbondedForce::CpuNonbondedForce() : cutoff(false), useSwitch(false), periodic(false), ewald(false), pme(false), tableIsValid(false), cutoffDistance(0.0f), alphaEwald(0.0f) {
CpuNonbondedForce::CpuNonbondedForce() : cutoff(false), useSwitch(false), periodic(false), ewald(false), pme(false), ljpme(false), tableIsValid(false), expTableIsValid(false),
cutoffDistance(0.0f), alphaDispersionEwald(0.0f), alphaEwald(0.0f) {
}
CpuNonbondedForce::~CpuNonbondedForce() {
......@@ -78,10 +70,21 @@ void CpuNonbondedForce::setUseCutoff(float distance, const CpuNeighborList& neig
tableIsValid = false;
cutoff = true;
cutoffDistance = distance;
inverseRcut6 = pow(cutoffDistance, -6);
neighborList = &neighbors;
krf = pow(cutoffDistance, -3.0f)*(solventDielectric-1.0)/(2.0*solventDielectric+1.0);
crf = (1.0/cutoffDistance)*(3.0*solventDielectric)/(2.0*solventDielectric+1.0);
}
if(alphaDispersionEwald != 0.0f){
// We set this here, in case setUseCutoff is called after the dispersion alpha is set.
double dalphaR = alphaDispersionEwald*cutoffDistance;
double dar2 = dalphaR * dalphaR;
double dar4 = dar2*dar2;
double dar6 = dar4*dar2;
double expterm = EXP(-dar2);
inverseRcut6Expterm = inverseRcut6*(1.0 - expterm * (1.0 + dar2 + 0.5*dar4));
}
}
/**---------------------------------------------------------------------------------------
......@@ -96,7 +99,7 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
switchingDistance = distance;
}
/**---------------------------------------------------------------------------------------
/**---------------------------------------------------------------------------------------
Set the force to use periodic boundary conditions. This requires that a cutoff has
also been set, and the smallest side of the periodic box is at least twice the cutoff
......@@ -106,7 +109,7 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
--------------------------------------------------------------------------------------- */
void CpuNonbondedForce::setPeriodic(Vec3* periodicBoxVectors) {
void CpuNonbondedForce::setPeriodic(Vec3* periodicBoxVectors) {
assert(cutoff);
assert(periodicBoxVectors[0][0] >= 2.0*cutoffDistance);
......@@ -124,11 +127,11 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
periodicBoxVec4[1] = fvec4(periodicBoxVectors[1][0], periodicBoxVectors[1][1], periodicBoxVectors[1][2], 0);
periodicBoxVec4[2] = fvec4(periodicBoxVectors[2][0], periodicBoxVectors[2][1], periodicBoxVectors[2][2], 0);
triclinic = (periodicBoxVectors[0][1] != 0.0 || periodicBoxVectors[0][2] != 0.0 ||
periodicBoxVectors[1][0] != 0.0 || periodicBoxVectors[1][2] != 0.0 ||
periodicBoxVectors[2][0] != 0.0 || periodicBoxVectors[2][1] != 0.0);
}
periodicBoxVectors[1][0] != 0.0 || periodicBoxVectors[1][2] != 0.0 ||
periodicBoxVectors[2][0] != 0.0 || periodicBoxVectors[2][1] != 0.0);
}
/**---------------------------------------------------------------------------------------
/**---------------------------------------------------------------------------------------
Set the force to use Ewald summation.
......@@ -139,18 +142,18 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
--------------------------------------------------------------------------------------- */
void CpuNonbondedForce::setUseEwald(float alpha, int kmaxx, int kmaxy, int kmaxz) {
if (alpha != alphaEwald)
tableIsValid = false;
alphaEwald = alpha;
numRx = kmaxx;
numRy = kmaxy;
numRz = kmaxz;
ewald = true;
tabulateEwaldScaleFactor();
}
void CpuNonbondedForce::setUseEwald(float alpha, int kmaxx, int kmaxy, int kmaxz) {
if (alpha != alphaEwald)
tableIsValid = false;
alphaEwald = alpha;
numRx = kmaxx;
numRy = kmaxy;
numRz = kmaxz;
ewald = true;
tabulateEwaldScaleFactor();
}
/**---------------------------------------------------------------------------------------
/**---------------------------------------------------------------------------------------
Set the force to use Particle-Mesh Ewald (PME) summation.
......@@ -159,19 +162,49 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
--------------------------------------------------------------------------------------- */
void CpuNonbondedForce::setUsePME(float alpha, int meshSize[3]) {
if (alpha != alphaEwald)
tableIsValid = false;
alphaEwald = alpha;
meshDim[0] = meshSize[0];
meshDim[1] = meshSize[1];
meshDim[2] = meshSize[2];
pme = true;
tabulateEwaldScaleFactor();
}
void CpuNonbondedForce::tabulateEwaldScaleFactor() {
void CpuNonbondedForce::setUsePME(float alpha, int meshSize[3]) {
if (alpha != alphaEwald)
tableIsValid = false;
alphaEwald = alpha;
meshDim[0] = meshSize[0];
meshDim[1] = meshSize[1];
meshDim[2] = meshSize[2];
pme = true;
tabulateEwaldScaleFactor();
}
/**---------------------------------------------------------------------------------------
Set the force to use Particle-Mesh Ewald (PME) summation for dispersion.
@param alpha the Ewald separation parameter
@param gridSize the dimensions of the mesh
--------------------------------------------------------------------------------------- */
void CpuNonbondedForce::setUseLJPME(float alpha, int meshSize[3]) {
if (alpha != alphaDispersionEwald)
expTableIsValid = false;
alphaDispersionEwald = alpha;
dispersionMeshDim[0] = meshSize[0];
dispersionMeshDim[1] = meshSize[1];
dispersionMeshDim[2] = meshSize[2];
ljpme = true;
tabulateExpTerms();
if(cutoffDistance != 0.0f){
// We set this here, in case setUseLJPME is called after the cutoff is set
double dalphaR = alphaDispersionEwald*cutoffDistance;
double dar2 = dalphaR * dalphaR;
double dar4 = dar2*dar2;
double dar6 = dar4*dar2;
double expterm = EXP(-dar2);
inverseRcut6Expterm = inverseRcut6*(1.0 - expterm * (1.0 + dar2 + 0.5*dar4));
}
}
void CpuNonbondedForce::tabulateEwaldScaleFactor() {
if (tableIsValid)
return;
tableIsValid = true;
......@@ -187,10 +220,30 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
ewaldScaleTable[i] = erfcTable[i] + TWO_OVER_SQRT_PI*alphaR*exp(-alphaR*alphaR);
}
}
void CpuNonbondedForce::tabulateExpTerms() {
if (expTableIsValid)
return;
expTableIsValid = true;
exptermsDX = cutoffDistance/NUM_TABLE_POINTS;
exptermsDXInv = 1.0f/exptermsDX;
exptermsTable.resize(NUM_TABLE_POINTS+4);
dExptermsTable.resize(NUM_TABLE_POINTS+4);
for (int i = 0; i < NUM_TABLE_POINTS+4; i++) {
double r = i*ewaldDX;
double dalphaR = alphaDispersionEwald*r;
double dar2 = dalphaR * dalphaR;
double dar4 = dar2*dar2;
double dar6 = dar4*dar2;
double expterm = EXP(-dar2);
exptermsTable[i] = (1.0 - expterm * (1.0 + dar2 + 0.5*dar4));
dExptermsTable[i] = (1.0 - expterm * (1.0 + dar2 + 0.5*dar4 + dar6/6.0));
}
}
void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, const vector<Vec3>& atomCoordinates,
const vector<pair<float, float> >& atomParameters, const vector<set<int> >& exclusions,
vector<Vec3>& forces, double* totalEnergy) const {
const vector<pair<float, float> >& atomParameters, const vector<float> &C6params, const vector<set<int> >& exclusions,
vector<Vec3>& forces, double* totalEnergy) const {
typedef std::complex<float> d_complex;
static const float epsilon = 1.0;
......@@ -211,6 +264,29 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c
if (totalEnergy)
*totalEnergy += recipEnergy;
pme_destroy(pmedata);
if (ljpme) {
// Dispersion reciprocal space terms
pme_init(&pmedata,alphaDispersionEwald,numberOfAtoms,dispersionMeshDim,5,1);
std::vector<Vec3> dpmeforces;
for (int i = 0; i < numberOfAtoms; i++){
charges[i] = C6params[i];
dpmeforces.push_back(Vec3());
}
double recipDispersionEnergy = 0.0;
pme_exec_dpme(pmedata,atomCoordinates,dpmeforces,charges,periodicBoxVectors,&recipDispersionEnergy);
for (int i = 0; i < numberOfAtoms; i++){
forces[i][0] -= 2.0*dpmeforces[i][0];
forces[i][1] -= 2.0*dpmeforces[i][1];
forces[i][2] -= 2.0*dpmeforces[i][2];
}
if (totalEnergy)
*totalEnergy += recipDispersionEnergy;
pme_destroy(pmedata);
}
}
// Ewald method
......@@ -224,7 +300,7 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c
// setup K-vectors
#define EIR(x, y, z) eir[(x)*numberOfAtoms*3+(y)*3+z]
#define EIR(x, y, z) eir[(x)*numberOfAtoms*3+(y)*3+z]
vector<d_complex> eir(kmax*numberOfAtoms*3);
vector<d_complex> tab_xy(numberOfAtoms);
vector<d_complex> tab_qxyz(numberOfAtoms);
......@@ -232,15 +308,15 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c
for (int i = 0; (i < numberOfAtoms); i++) {
float* pos = posq+4*i;
for (int m = 0; (m < 3); m++)
EIR(0, i, m) = d_complex(1,0);
EIR(0, i, m) = d_complex(1,0);
for (int m=0; (m<3); m++)
EIR(1, i, m) = d_complex(cos(pos[m]*recipBoxSize[m]),
sin(pos[m]*recipBoxSize[m]));
EIR(1, i, m) = d_complex(cos(pos[m]*recipBoxSize[m]),
sin(pos[m]*recipBoxSize[m]));
for (int j=2; (j<kmax); j++)
for (int m=0; (m<3); m++)
EIR(j, i, m) = EIR(j-1, i, m) * EIR(1, i, m);
for (int m=0; (m<3); m++)
EIR(j, i, m) = EIR(j-1, i, m) * EIR(1, i, m);
}
// calculate reciprocal space energy and forces
......@@ -254,11 +330,11 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c
float ky = ry * recipBoxSize[1];
if (ry >= 0) {
for (int n = 0; n < numberOfAtoms; n++)
tab_xy[n] = EIR(rx, n, 0) * EIR(ry, n, 1);
tab_xy[n] = EIR(rx, n, 0) * EIR(ry, n, 1);
}
else {
for (int n = 0; n < numberOfAtoms; n++)
tab_xy[n]= EIR(rx, n, 0) * conj (EIR(-ry, n, 1));
tab_xy[n]= EIR(rx, n, 0) * conj (EIR(-ry, n, 1));
}
for (int rz = lowrz; rz < numRz; rz++) {
if (rz >= 0) {
......@@ -301,13 +377,14 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c
void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const vector<Vec3>& atomCoordinates, const vector<pair<float, float> >& atomParameters,
const vector<set<int> >& exclusions, vector<AlignedArray<float> >& threadForce, double* totalEnergy, ThreadPool& threads) {
const vector<float>& C6params, const vector<set<int> >& exclusions, vector<AlignedArray<float> >& threadForce, double* totalEnergy, ThreadPool& threads) {
// Record the parameters for the threads.
this->numberOfAtoms = numberOfAtoms;
this->posq = posq;
this->atomCoordinates = &atomCoordinates[0];
this->atomParameters = &atomParameters[0];
this->C6params = &C6params[0];
this->exclusions = &exclusions[0];
this->threadForce = &threadForce;
includeEnergy = (totalEnergy != NULL);
......@@ -318,8 +395,7 @@ void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const
// Signal the threads to start running and wait for them to finish.
ComputeDirectTask task(*this);
threads.execute(task);
threads.execute([&] (ThreadPool& threads, int threadIndex) { threadComputeDirect(threads, threadIndex); });
threads.waitForThreads();
// Signal the threads to subtract the exclusions.
......@@ -350,9 +426,8 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
float* forces = &(*threadForce)[threadIndex][0];
fvec4 boxSize(periodicBoxVectors[0][0], periodicBoxVectors[1][1], periodicBoxVectors[2][2], 0);
fvec4 invBoxSize(recipBoxSize[0], recipBoxSize[1], recipBoxSize[2], 0);
if (ewald || pme) {
if (ewald || pme || ljpme) {
// Compute the interactions from the neighbor list.
while (true) {
int nextBlock = gmx_atomic_fetch_add(reinterpret_cast<gmx_atomic_t*>(atomicCounter), 1);
if (nextBlock >= neighborList->getNumBlocks())
......@@ -370,7 +445,7 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
break;
int end = min(start+groupSize, numberOfAtoms);
for (int i = start; i < end; i++) {
fvec4 posI((float) atomCoordinates[i][0], (float) atomCoordinates[i][1], (float) atomCoordinates[i][2], 0.0f);
fvec4 posI((float) atomCoordinates[i][0], (float) atomCoordinates[i][1], (float) atomCoordinates[i][2], 0.0f);
float scaledChargeI = (float) (ONE_4PI_EPS0*posq[4*i+3]);
for (set<int>::const_iterator iter = exclusions[i].begin(); iter != exclusions[i].end(); ++iter) {
if (*iter > i) {
......@@ -394,7 +469,18 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
threadEnergy[threadIndex] -= chargeProdOverR*erfAlphaR;
}
else if (includeEnergy)
threadEnergy[threadIndex] -= alphaEwald*TWO_OVER_SQRT_PI*scaledChargeI*posq[4*j+3];
threadEnergy[threadIndex] -= alphaEwald*TWO_OVER_SQRT_PI*scaledChargeI*posq[4*j+3];
if (ljpme) {
float C6ij = C6params[i]*C6params[j];
float inverseR2 = 1.0f/r2;
float emult = C6ij*inverseR2*inverseR2*inverseR2*exptermsApprox(r);
if(includeEnergy)
threadEnergy[threadIndex] += emult;
float dEdR = -6.0f*C6ij*inverseR2*inverseR2*inverseR2*inverseR2*dExptermsApprox(r);
fvec4 result = deltaR*dEdR;
(fvec4(forces+4*i)-result).store(forces+4*i);
(fvec4(forces+4*j)+result).store(forces+4*j);
}
}
}
}
......@@ -444,7 +530,7 @@ void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* forces, double* t
}
float sig = atomParameters[ii].first + atomParameters[jj].first;
float sig2 = inverseR*sig;
sig2 *= sig2;
sig2 *= sig2;
float sig6 = sig2*sig2*sig2;
float eps = atomParameters[ii].second*atomParameters[jj].second;
......@@ -476,7 +562,7 @@ void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* forces, double* t
fvec4 result = deltaR*dEdR;
(fvec4(forces+4*ii)+result).store(forces+4*ii);
(fvec4(forces+4*jj)-result).store(forces+4*jj);
}
}
void CpuNonbondedForce::getDeltaR(const fvec4& posI, const fvec4& posJ, fvec4& deltaR, float& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const {
deltaR = posJ-posI;
......@@ -502,3 +588,18 @@ float CpuNonbondedForce::erfcApprox(float x) {
return coeff1*erfcTable[index] + coeff2*erfcTable[index+1];
}
float CpuNonbondedForce::exptermsApprox(float x) {
float x1 = x*exptermsDXInv;
int index = min((int) floor(x1), NUM_TABLE_POINTS);
float coeff2 = x1-index;
float coeff1 = 1.0f-coeff2;
return coeff1*exptermsTable[index] + coeff2*exptermsTable[index+1];
}
float CpuNonbondedForce::dExptermsApprox(float x) {
float x1 = x*exptermsDXInv;
int index = min((int) floor(x1), NUM_TABLE_POINTS);
float coeff2 = x1-index;
float coeff1 = 1.0f-coeff2;
return coeff1*dExptermsTable[index] + coeff2*dExptermsTable[index+1];
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment