Commit c7aa1d00 authored by kyleabeauchamp's avatar kyleabeauchamp
Browse files

Merge remote-tracking branch 'upstream/master' into vagrant

parents aea4e454 f7127d33
language: c language: cpp
compiler:
- clang
install: before_install:
- source tools/ci/install.sh - sudo apt-get update -qq
- export PYTHONUNBUFFERED=true - sudo apt-get install -qq libpcre3 libpcre3-dev gromacs
- sudo apt-get install -qq swig doxygen llvm-3.3
- sudo apt-get install -qq python-numpy python-scipy python-pip
- sudo pip install nose
- export ASAN_SYMBOLIZER_PATH=/usr/bin/llvm-symbolizer-3.3
script: script:
- export CC="clang++" - cmake -DCMAKE_INSTALL_PREFIX=$HOME/OpenMM .
- source deactivate - make -j2
- conda install --yes conda-build - make -j2 install
- # Build the conda package, testing build before packaging. - sudo make PythonInstall
- conda build tools/conda-recipe - # run all of the tests
- # Install the conda package locally. - ctest -j2 -V
- source activate $python - # get a list of all of the failed tests into this stupid ctest format
- conda install $HOME/miniconda/conda-bld/linux-64/openmm-dev-* - python -c 'fn = "Testing/Temporary/LastTestsFailed.log"; import os; os.path.exists(fn) or exit(0); l = [line.split(":")[0] for line in open(fn)]; triplets = zip(l, l, [","]*len(l)); print "".join(",".join(t) for t in triplets)' > FailedTests.log
- conda list -e - # rerun all of the failed tests
- # Run the Python tests. - if [ -s FailedTests.log ]; then ctest -V -I FailedTests.log; fi;
- pushd . - # run the python tests too
- cd wrappers/python/tests - cd python/tests
- nosetests -vv --processes=-1 --process-timeout=200 - nosetests -vv --processes=-1 --process-timeout=200
- popd
env:
global:
# encrypted BINSTAR_TOKEN for push of dev package to binstar
- secure: Qz3pEYXXFnNQ/WK+15ad4cdbLJvzgCIZRwKD9fLiS3CDO2ldAQWxzaz8RQOwqbFtZUWu7lQpr+GukNJz5p0w18QEto+BxLYG9aW5mjoc+F2vCjyWFjkwnJ/Z/3uBKTcr5x9Y7HKaPGivaJ4BNACifjt7cCpeVJzV6u2+bBgSoHc=
matrix:
- python=2.7 CONDA_PY=27
#- python=3.3 CONDA_PY=33
after_success:
- echo "after_success"
- source tools/ci/after_success.sh
...@@ -651,19 +651,20 @@ For the main force field, OpenMM provides the following options: ...@@ -651,19 +651,20 @@ For the main force field, OpenMM provides the following options:
.. tabularcolumns:: |l|L| .. tabularcolumns:: |l|L|
================= ================================================================================ ===================== ================================================================================
File Force Field File Force Field
================= ================================================================================ ===================== ================================================================================
amber96.xml AMBER96\ :cite:`Kollman1997` amber96.xml AMBER96\ :cite:`Kollman1997`
amber99sb.xml AMBER99\ :cite:`Wang2000` with modified backbone torsions\ :cite:`Hornak2006` amber99sb.xml AMBER99\ :cite:`Wang2000` with modified backbone torsions\ :cite:`Hornak2006`
amber99sbildn.xml AMBER99SB plus improved side chain torsions\ :cite:`Lindorff-Larsen2010` amber99sbildn.xml AMBER99SB plus improved side chain torsions\ :cite:`Lindorff-Larsen2010`
amber99sbnmr.xml AMBER99SB with modifications to fit NMR data\ :cite:`Li2010` amber99sbnmr.xml AMBER99SB with modifications to fit NMR data\ :cite:`Li2010`
amber03.xml AMBER03\ :cite:`Duan2003` amber03.xml AMBER03\ :cite:`Duan2003`
amber10.xml AMBER10 amber10.xml AMBER10
amoeba2009.xml AMOEBA 2009\ :cite:`Ren2002`. This force field is deprecated. It is amoeba2009.xml AMOEBA 2009\ :cite:`Ren2002`. This force field is deprecated. It is
recommended to use AMOEBA 2013 instead. recommended to use AMOEBA 2013 instead.
amoeba2013.xml AMOEBA 2013\ :cite:`Shi2013` amoeba2013.xml AMOEBA 2013\ :cite:`Shi2013`
================= ================================================================================ charmm_polar_2013.xml CHARMM 2013 polarizable force field\ :cite:`Lopes2013`
===================== ================================================================================
The AMBER files do not include parameters for water molecules. This allows you The AMBER files do not include parameters for water molecules. This allows you
...@@ -686,10 +687,10 @@ swm4ndp.xml SWM4-NDP water model\ :cite:`Lamoureux2006` ...@@ -686,10 +687,10 @@ swm4ndp.xml SWM4-NDP water model\ :cite:`Lamoureux2006`
=========== ============================================ =========== ============================================
For the AMOEBA force field, only one explicit water model is currently available For the polarizable force fields (AMOEBA and CHARMM), only one explicit water model
and the water parameters are included in the file :code:`amoeba2009.xml`\ . is currently available and the water parameters are included in the same file as
Also the AMOEBA force field file only includes the parameters for amino acids the macromolecule parameters. Also, the polarizable force fields only include
and ions; nucleic acids will be included in a future release. parameters for amino acids and ions, not for nucleic acids.
If you want to include an implicit solvation model, you can also specify one of If you want to include an implicit solvation model, you can also specify one of
the following files: the following files:
......
...@@ -215,6 +215,17 @@ ...@@ -215,6 +215,17 @@
type = {Journal Article} type = {Journal Article}
} }
@article{Lopes2013,
author = {Lopes, Pedro E. M. and Huang, Jing and Shim, Jihyun and Luo, Yun and Li, Hui and Roux, Benoît and MacKerell, Alexander D.},
title = {Polarizable Force Field for Peptides and Proteins Based on the Classical Drude Oscillator},
journal = {Journal of Chemical Theory and Computation},
volume = {9},
number = {12},
pages = {5430-5449},
year = {2013},
type = {Journal Article}
}
@article{Mahoney2000 @article{Mahoney2000
author = {Mahoney, Michael W. and Jorgensen, William L.}, author = {Mahoney, Michael W. and Jorgensen, William L.},
title = {A five-site model for liquid water and the reproduction of the density anomaly by rigid, nonpolarizable potential functions}, title = {A five-site model for liquid water and the reproduction of the density anomaly by rigid, nonpolarizable potential functions},
......
...@@ -157,6 +157,7 @@ public: ...@@ -157,6 +157,7 @@ public:
* of r, the distance between them, as well as any global and per-particle parameters * of r, the distance between them, as well as any global and per-particle parameters
*/ */
explicit CustomNonbondedForce(const std::string& energy); explicit CustomNonbondedForce(const std::string& energy);
CustomNonbondedForce(const CustomNonbondedForce& rhs); // copy constructor
~CustomNonbondedForce(); ~CustomNonbondedForce();
/** /**
* Get the number of particles for which force field parameters have been defined. * Get the number of particles for which force field parameters have been defined.
...@@ -466,6 +467,7 @@ public: ...@@ -466,6 +467,7 @@ public:
protected: protected:
ForceImpl* createImpl() const; ForceImpl* createImpl() const;
private: private:
// REMEMBER TO UPDATE THE COPY CONSTRUCTOR IF YOU ADD ANY NEW FIELDS !!
class ParticleInfo; class ParticleInfo;
class PerParticleParameterInfo; class PerParticleParameterInfo;
class GlobalParameterInfo; class GlobalParameterInfo;
......
...@@ -59,6 +59,7 @@ class OPENMM_EXPORT TabulatedFunction { ...@@ -59,6 +59,7 @@ class OPENMM_EXPORT TabulatedFunction {
public: public:
virtual ~TabulatedFunction() { virtual ~TabulatedFunction() {
} }
virtual TabulatedFunction* Copy() const = 0;
}; };
/** /**
...@@ -96,6 +97,10 @@ public: ...@@ -96,6 +97,10 @@ public:
* @param max the value of x corresponding to the last element of values * @param max the value of x corresponding to the last element of values
*/ */
void setFunctionParameters(const std::vector<double>& values, double min, double max); void setFunctionParameters(const std::vector<double>& values, double min, double max);
/**
* Create a deep copy of the tabulated function.
*/
Continuous1DFunction* Copy() const;
private: private:
std::vector<double> values; std::vector<double> values;
double min, max; double min, max;
...@@ -151,6 +156,10 @@ public: ...@@ -151,6 +156,10 @@ public:
* @param ymax the value of y corresponding to the last element of values * @param ymax the value of y corresponding to the last element of values
*/ */
void setFunctionParameters(int xsize, int ysize, const std::vector<double>& values, double xmin, double xmax, double ymin, double ymax); void setFunctionParameters(int xsize, int ysize, const std::vector<double>& values, double xmin, double xmax, double ymin, double ymax);
/**
* Create a deep copy of the tabulated function
*/
Continuous2DFunction* Copy() const;
private: private:
std::vector<double> values; std::vector<double> values;
int xsize, ysize; int xsize, ysize;
...@@ -222,6 +231,10 @@ public: ...@@ -222,6 +231,10 @@ public:
* @param zmax the value of z corresponding to the last element of values * @param zmax the value of z corresponding to the last element of values
*/ */
void setFunctionParameters(int xsize, int ysize, int zsize, const std::vector<double>& values, double xmin, double xmax, double ymin, double ymax, double zmin, double zmax); void setFunctionParameters(int xsize, int ysize, int zsize, const std::vector<double>& values, double xmin, double xmax, double ymin, double ymax, double zmin, double zmax);
/**
* Create a deep copy of the tabulated function
*/
Continuous3DFunction* Copy() const;
private: private:
std::vector<double> values; std::vector<double> values;
int xsize, ysize, zsize; int xsize, ysize, zsize;
...@@ -253,6 +266,10 @@ public: ...@@ -253,6 +266,10 @@ public:
* @param values the tabulated values of the function f(x) * @param values the tabulated values of the function f(x)
*/ */
void setFunctionParameters(const std::vector<double>& values); void setFunctionParameters(const std::vector<double>& values);
/**
* Create a deep copy of the tabulated function
*/
Discrete1DFunction* Copy() const;
private: private:
std::vector<double> values; std::vector<double> values;
}; };
...@@ -291,6 +308,10 @@ public: ...@@ -291,6 +308,10 @@ public:
* values[i+xsize*j] = f(i,j). This must be of length xsize*ysize. * values[i+xsize*j] = f(i,j). This must be of length xsize*ysize.
*/ */
void setFunctionParameters(int xsize, int ysize, const std::vector<double>& values); void setFunctionParameters(int xsize, int ysize, const std::vector<double>& values);
/**
* Create a deep copy of the tabulated function
*/
Discrete2DFunction* Copy() const;
private: private:
int xsize, ysize; int xsize, ysize;
std::vector<double> values; std::vector<double> values;
...@@ -333,6 +354,10 @@ public: ...@@ -333,6 +354,10 @@ public:
* values[i+xsize*j+xsize*ysize*k] = f(i,j,k). This must be of length xsize*ysize*zsize. * values[i+xsize*j+xsize*ysize*k] = f(i,j,k). This must be of length xsize*ysize*zsize.
*/ */
void setFunctionParameters(int xsize, int ysize, int zsize, const std::vector<double>& values); void setFunctionParameters(int xsize, int ysize, int zsize, const std::vector<double>& values);
/**
* Create a deep copy of the tabulated function
*/
Discrete3DFunction* Copy() const;
private: private:
int xsize, ysize, zsize; int xsize, ysize, zsize;
std::vector<double> values; std::vector<double> values;
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2008-2013 Stanford University and the Authors. * * Portions copyright (c) 2008-2014 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -67,7 +67,6 @@ public: ...@@ -67,7 +67,6 @@ public:
*/ */
static double calcLongRangeCorrection(const CustomNonbondedForce& force, const Context& context); static double calcLongRangeCorrection(const CustomNonbondedForce& force, const Context& context);
private: private:
class TabulatedFunction;
static double integrateInteraction(Lepton::CompiledExpression& expression, const std::vector<double>& params1, const std::vector<double>& params2, static double integrateInteraction(Lepton::CompiledExpression& expression, const std::vector<double>& params1, const std::vector<double>& params2,
const CustomNonbondedForce& force, const Context& context); const CustomNonbondedForce& force, const Context& context);
const CustomNonbondedForce& owner; const CustomNonbondedForce& owner;
......
...@@ -51,6 +51,23 @@ CustomNonbondedForce::CustomNonbondedForce(const string& energy) : energyExpress ...@@ -51,6 +51,23 @@ CustomNonbondedForce::CustomNonbondedForce(const string& energy) : energyExpress
switchingDistance(-1.0), useSwitchingFunction(false), useLongRangeCorrection(false) { switchingDistance(-1.0), useSwitchingFunction(false), useLongRangeCorrection(false) {
} }
CustomNonbondedForce::CustomNonbondedForce(const CustomNonbondedForce& rhs) {
// Copy everything and deep copy the tabulated functions
energyExpression = rhs.energyExpression;
nonbondedMethod = rhs.nonbondedMethod;
cutoffDistance = rhs.cutoffDistance;
switchingDistance = rhs.switchingDistance;
useSwitchingFunction = rhs.useSwitchingFunction;
useLongRangeCorrection = rhs.useLongRangeCorrection;
parameters = rhs.parameters;
globalParameters = rhs.globalParameters;
particles = rhs.particles;
exclusions = rhs.exclusions;
interactionGroups = rhs.interactionGroups;
for (vector<FunctionInfo>::const_iterator it = rhs.functions.begin(); it != rhs.functions.end(); it++)
functions.push_back(FunctionInfo(it->name, it->function->Copy()));
}
CustomNonbondedForce::~CustomNonbondedForce() { CustomNonbondedForce::~CustomNonbondedForce() {
for (int i = 0; i < (int) functions.size(); i++) for (int i = 0; i < (int) functions.size(); i++)
delete functions[i].function; delete functions[i].function;
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2008-2013 Stanford University and the Authors. * * Portions copyright (c) 2008-2014 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -37,7 +37,7 @@ ...@@ -37,7 +37,7 @@
#include "openmm/internal/CustomNonbondedForceImpl.h" #include "openmm/internal/CustomNonbondedForceImpl.h"
#include "openmm/internal/SplineFitter.h" #include "openmm/internal/SplineFitter.h"
#include "openmm/kernels.h" #include "openmm/kernels.h"
#include "lepton/CustomFunction.h" #include "ReferenceTabulatedFunction.h"
#include "lepton/ParsedExpression.h" #include "lepton/ParsedExpression.h"
#include "lepton/Parser.h" #include "lepton/Parser.h"
#include <cmath> #include <cmath>
...@@ -137,38 +137,6 @@ void CustomNonbondedForceImpl::updateParametersInContext(ContextImpl& context) { ...@@ -137,38 +137,6 @@ void CustomNonbondedForceImpl::updateParametersInContext(ContextImpl& context) {
kernel.getAs<CalcCustomNonbondedForceKernel>().copyParametersToContext(context, owner); kernel.getAs<CalcCustomNonbondedForceKernel>().copyParametersToContext(context, owner);
} }
class CustomNonbondedForceImpl::TabulatedFunction : public Lepton::CustomFunction {
public:
TabulatedFunction(double min, double max, const vector<double>& values) :
min(min), max(max), values(values) {
int numValues = values.size();
x.resize(numValues);
for (int i = 0; i < numValues; i++)
x[i] = min+i*(max-min)/(numValues-1);
SplineFitter::createNaturalSpline(x, values, derivs);
}
int getNumArguments() const {
return 1;
}
double evaluate(const double* arguments) const {
double t = arguments[0];
if (t < min || t > max)
return 0.0;
return SplineFitter::evaluateSpline(x, values, derivs, t);
}
double evaluateDerivative(const double* arguments, const int* derivOrder) const {
double t = arguments[0];
if (t < min || t > max)
return 0.0;
return SplineFitter::evaluateSplineDerivative(x, values, derivs, t);
}
CustomFunction* clone() const {
return new TabulatedFunction(min, max, values);
}
double min, max;
vector<double> x, values, derivs;
};
double CustomNonbondedForceImpl::calcLongRangeCorrection(const CustomNonbondedForce& force, const Context& context) { double CustomNonbondedForceImpl::calcLongRangeCorrection(const CustomNonbondedForce& force, const Context& context) {
if (force.getNonbondedMethod() == CustomNonbondedForce::NoCutoff || force.getNonbondedMethod() == CustomNonbondedForce::CutoffNonPeriodic) if (force.getNonbondedMethod() == CustomNonbondedForce::NoCutoff || force.getNonbondedMethod() == CustomNonbondedForce::CutoffNonPeriodic)
return 0.0; return 0.0;
...@@ -176,13 +144,8 @@ double CustomNonbondedForceImpl::calcLongRangeCorrection(const CustomNonbondedFo ...@@ -176,13 +144,8 @@ double CustomNonbondedForceImpl::calcLongRangeCorrection(const CustomNonbondedFo
// Parse the energy expression. // Parse the energy expression.
map<string, Lepton::CustomFunction*> functions; map<string, Lepton::CustomFunction*> functions;
for (int i = 0; i < force.getNumFunctions(); i++) { for (int i = 0; i < force.getNumFunctions(); i++)
string name; functions[force.getTabulatedFunctionName(i)] = createReferenceTabulatedFunction(force.getTabulatedFunction(i));
vector<double> values;
double min, max;
force.getFunctionParameters(i, name, values, min, max);
functions[name] = new TabulatedFunction(min, max, values);
}
Lepton::CompiledExpression expression = Lepton::Parser::parse(force.getEnergyFunction(), functions).createCompiledExpression(); Lepton::CompiledExpression expression = Lepton::Parser::parse(force.getEnergyFunction(), functions).createCompiledExpression();
// Identify all particle classes (defined by parameters), and record the class of each particle. // Identify all particle classes (defined by parameters), and record the class of each particle.
......
...@@ -61,6 +61,13 @@ void Continuous1DFunction::setFunctionParameters(const vector<double>& values, d ...@@ -61,6 +61,13 @@ void Continuous1DFunction::setFunctionParameters(const vector<double>& values, d
this->max = max; this->max = max;
} }
Continuous1DFunction* Continuous1DFunction::Copy() const {
vector<double> new_vec(values.size());
for (size_t i = 0; i < values.size(); i++)
new_vec[i] = values[i];
return new Continuous1DFunction(new_vec, min, max);
}
Continuous2DFunction::Continuous2DFunction(int xsize, int ysize, const vector<double>& values, double xmin, double xmax, double ymin, double ymax) { Continuous2DFunction::Continuous2DFunction(int xsize, int ysize, const vector<double>& values, double xmin, double xmax, double ymin, double ymax) {
if (xsize < 2 || ysize < 2) if (xsize < 2 || ysize < 2)
throw OpenMMException("Continuous2DFunction: must have at least two points along each axis"); throw OpenMMException("Continuous2DFunction: must have at least two points along each axis");
...@@ -107,6 +114,13 @@ void Continuous2DFunction::setFunctionParameters(int xsize, int ysize, const vec ...@@ -107,6 +114,13 @@ void Continuous2DFunction::setFunctionParameters(int xsize, int ysize, const vec
this->ymax = ymax; this->ymax = ymax;
} }
Continuous2DFunction* Continuous2DFunction::Copy() const {
vector<double> new_vec(values.size());
for (size_t i = 0; i < values.size(); i++)
new_vec[i] = values[i];
return new Continuous2DFunction(xsize, ysize, new_vec, xmin, xmax, ymin, ymax);
}
Continuous3DFunction::Continuous3DFunction(int xsize, int ysize, int zsize, const vector<double>& values, double xmin, double xmax, double ymin, double ymax, double zmin, double zmax) { Continuous3DFunction::Continuous3DFunction(int xsize, int ysize, int zsize, const vector<double>& values, double xmin, double xmax, double ymin, double ymax, double zmin, double zmax) {
if (xsize < 2 || ysize < 2 || zsize < 2) if (xsize < 2 || ysize < 2 || zsize < 2)
throw OpenMMException("Continuous3DFunction: must have at least two points along each axis"); throw OpenMMException("Continuous3DFunction: must have at least two points along each axis");
...@@ -166,6 +180,14 @@ void Continuous3DFunction::setFunctionParameters(int xsize, int ysize, int zsize ...@@ -166,6 +180,14 @@ void Continuous3DFunction::setFunctionParameters(int xsize, int ysize, int zsize
this->zmax = zmax; this->zmax = zmax;
} }
Continuous3DFunction* Continuous3DFunction::Copy() const {
vector<double> new_vec(values.size());
for (size_t i = 0; i < values.size(); i++)
new_vec[i] = values[i];
return new Continuous3DFunction(xsize, ysize, zsize, new_vec, xmin, xmax, ymin, ymax, zmin, zmax);
}
Discrete1DFunction::Discrete1DFunction(const vector<double>& values) { Discrete1DFunction::Discrete1DFunction(const vector<double>& values) {
this->values = values; this->values = values;
} }
...@@ -178,6 +200,13 @@ void Discrete1DFunction::setFunctionParameters(const vector<double>& values) { ...@@ -178,6 +200,13 @@ void Discrete1DFunction::setFunctionParameters(const vector<double>& values) {
this->values = values; this->values = values;
} }
Discrete1DFunction* Discrete1DFunction::Copy() const {
vector<double> new_vec(values.size());
for (size_t i = 0; i < values.size(); i++)
new_vec[i] = values[i];
return new Discrete1DFunction(new_vec);
}
Discrete2DFunction::Discrete2DFunction(int xsize, int ysize, const vector<double>& values) { Discrete2DFunction::Discrete2DFunction(int xsize, int ysize, const vector<double>& values) {
if (values.size() != xsize*ysize) if (values.size() != xsize*ysize)
throw OpenMMException("Discrete2DFunction: incorrect number of values"); throw OpenMMException("Discrete2DFunction: incorrect number of values");
...@@ -200,6 +229,13 @@ void Discrete2DFunction::setFunctionParameters(int xsize, int ysize, const vecto ...@@ -200,6 +229,13 @@ void Discrete2DFunction::setFunctionParameters(int xsize, int ysize, const vecto
this->values = values; this->values = values;
} }
Discrete2DFunction* Discrete2DFunction::Copy() const {
vector<double> new_vec(values.size());
for (size_t i = 0; i < values.size(); i++)
new_vec[i] = values[i];
return new Discrete2DFunction(xsize, ysize, new_vec);
}
Discrete3DFunction::Discrete3DFunction(int xsize, int ysize, int zsize, const vector<double>& values) { Discrete3DFunction::Discrete3DFunction(int xsize, int ysize, int zsize, const vector<double>& values) {
if (values.size() != xsize*ysize*zsize) if (values.size() != xsize*ysize*zsize)
throw OpenMMException("Discrete3DFunction: incorrect number of values"); throw OpenMMException("Discrete3DFunction: incorrect number of values");
...@@ -224,3 +260,10 @@ void Discrete3DFunction::setFunctionParameters(int xsize, int ysize, int zsize, ...@@ -224,3 +260,10 @@ void Discrete3DFunction::setFunctionParameters(int xsize, int ysize, int zsize,
this->zsize = zsize; this->zsize = zsize;
this->values = values; this->values = values;
} }
Discrete3DFunction* Discrete3DFunction::Copy() const {
vector<double> new_vec(values.size());
for (size_t i = 0; i < values.size(); i++)
new_vec[i] = values[i];
return new Discrete3DFunction(xsize, ysize, zsize, new_vec);
}
...@@ -2271,7 +2271,7 @@ void CudaCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbo ...@@ -2271,7 +2271,7 @@ void CudaCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbo
defines["USE_CUTOFF"] = "1"; defines["USE_CUTOFF"] = "1";
if (force.getNonbondedMethod() == CustomNonbondedForce::CutoffPeriodic) if (force.getNonbondedMethod() == CustomNonbondedForce::CutoffPeriodic)
defines["USE_PERIODIC"] = "1"; defines["USE_PERIODIC"] = "1";
defines["THREAD_BLOCK_SIZE"] = cu.intToString(cu.getNonbondedUtilities().getForceThreadBlockSize()); defines["LOCAL_MEMORY_SIZE"] = cu.intToString(max(32, cu.getNonbondedUtilities().getForceThreadBlockSize()));
double cutoff = force.getCutoffDistance(); double cutoff = force.getCutoffDistance();
defines["CUTOFF_SQUARED"] = cu.doubleToString(cutoff*cutoff); defines["CUTOFF_SQUARED"] = cu.doubleToString(cutoff*cutoff);
defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms()); defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
......
#define WARPS_PER_GROUP (THREAD_BLOCK_SIZE/TILE_SIZE)
typedef struct { typedef struct {
real x, y, z; real x, y, z;
real q; real q;
...@@ -19,7 +17,7 @@ extern "C" __global__ void computeInteractionGroups( ...@@ -19,7 +17,7 @@ extern "C" __global__ void computeInteractionGroups(
const unsigned int tgx = threadIdx.x & (TILE_SIZE-1); // index within the warp const unsigned int tgx = threadIdx.x & (TILE_SIZE-1); // index within the warp
const unsigned int tbx = threadIdx.x - tgx; // block warpIndex const unsigned int tbx = threadIdx.x - tgx; // block warpIndex
real energy = 0.0f; real energy = 0.0f;
__shared__ AtomData localData[THREAD_BLOCK_SIZE]; __shared__ AtomData localData[LOCAL_MEMORY_SIZE];
const unsigned int startTile = FIRST_TILE+warp*(LAST_TILE-FIRST_TILE)/totalWarps; const unsigned int startTile = FIRST_TILE+warp*(LAST_TILE-FIRST_TILE)/totalWarps;
const unsigned int endTile = FIRST_TILE+(warp+1)*(LAST_TILE-FIRST_TILE)/totalWarps; const unsigned int endTile = FIRST_TILE+(warp+1)*(LAST_TILE-FIRST_TILE)/totalWarps;
...@@ -86,4 +84,4 @@ extern "C" __global__ void computeInteractionGroups( ...@@ -86,4 +84,4 @@ extern "C" __global__ void computeInteractionGroups(
atomicAdd(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fz*0x100000000))); atomicAdd(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fz*0x100000000)));
} }
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy; energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;
} }
\ No newline at end of file
...@@ -2289,7 +2289,7 @@ void OpenCLCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNon ...@@ -2289,7 +2289,7 @@ void OpenCLCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNon
defines["USE_CUTOFF"] = "1"; defines["USE_CUTOFF"] = "1";
if (force.getNonbondedMethod() == CustomNonbondedForce::CutoffPeriodic) if (force.getNonbondedMethod() == CustomNonbondedForce::CutoffPeriodic)
defines["USE_PERIODIC"] = "1"; defines["USE_PERIODIC"] = "1";
defines["THREAD_BLOCK_SIZE"] = cl.intToString(cl.getNonbondedUtilities().getForceThreadBlockSize()); defines["LOCAL_MEMORY_SIZE"] = cl.intToString(max(32, cl.getNonbondedUtilities().getForceThreadBlockSize()));
double cutoff = force.getCutoffDistance(); double cutoff = force.getCutoffDistance();
defines["CUTOFF_SQUARED"] = cl.doubleToString(cutoff*cutoff); defines["CUTOFF_SQUARED"] = cl.doubleToString(cutoff*cutoff);
defines["PADDED_NUM_ATOMS"] = cl.intToString(cl.getPaddedNumAtoms()); defines["PADDED_NUM_ATOMS"] = cl.intToString(cl.getPaddedNumAtoms());
......
...@@ -2,8 +2,6 @@ ...@@ -2,8 +2,6 @@
#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable #pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
#endif #endif
#define WARPS_PER_GROUP (THREAD_BLOCK_SIZE/TILE_SIZE)
typedef struct { typedef struct {
real x, y, z; real x, y, z;
real q; real q;
...@@ -52,7 +50,7 @@ __kernel void computeInteractionGroups( ...@@ -52,7 +50,7 @@ __kernel void computeInteractionGroups(
const unsigned int tgx = get_local_id(0) & (TILE_SIZE-1); // index within the warp const unsigned int tgx = get_local_id(0) & (TILE_SIZE-1); // index within the warp
const unsigned int tbx = get_local_id(0) - tgx; // block warpIndex const unsigned int tbx = get_local_id(0) - tgx; // block warpIndex
real energy = 0.0f; real energy = 0.0f;
__local AtomData localData[THREAD_BLOCK_SIZE]; __local AtomData localData[LOCAL_MEMORY_SIZE];
const unsigned int startTile = FIRST_TILE+warp*(LAST_TILE-FIRST_TILE)/totalWarps; const unsigned int startTile = FIRST_TILE+warp*(LAST_TILE-FIRST_TILE)/totalWarps;
const unsigned int endTile = FIRST_TILE+(warp+1)*(LAST_TILE-FIRST_TILE)/totalWarps; const unsigned int endTile = FIRST_TILE+(warp+1)*(LAST_TILE-FIRST_TILE)/totalWarps;
...@@ -127,4 +125,4 @@ __kernel void computeInteractionGroups( ...@@ -127,4 +125,4 @@ __kernel void computeInteractionGroups(
#endif #endif
} }
energyBuffer[get_global_id(0)] += energy; energyBuffer[get_global_id(0)] += energy;
} }
\ No newline at end of file
...@@ -40,6 +40,7 @@ ...@@ -40,6 +40,7 @@
#include "CudaForceInfo.h" #include "CudaForceInfo.h"
#include "CudaKernelSources.h" #include "CudaKernelSources.h"
#include "CudaNonbondedUtilities.h" #include "CudaNonbondedUtilities.h"
#include "jama_svd.h"
#include <algorithm> #include <algorithm>
#include <cmath> #include <cmath>
...@@ -796,8 +797,9 @@ private: ...@@ -796,8 +797,9 @@ private:
CudaCalcAmoebaMultipoleForceKernel::CudaCalcAmoebaMultipoleForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CudaCalcAmoebaMultipoleForceKernel::CudaCalcAmoebaMultipoleForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) :
CalcAmoebaMultipoleForceKernel(name, platform), cu(cu), system(system), hasInitializedScaleFactors(false), hasInitializedFFT(false), multipolesAreValid(false), CalcAmoebaMultipoleForceKernel(name, platform), cu(cu), system(system), hasInitializedScaleFactors(false), hasInitializedFFT(false), multipolesAreValid(false),
multipoleParticles(NULL), molecularDipoles(NULL), molecularQuadrupoles(NULL), labFrameDipoles(NULL), labFrameQuadrupoles(NULL), multipoleParticles(NULL), molecularDipoles(NULL), molecularQuadrupoles(NULL), labFrameDipoles(NULL), labFrameQuadrupoles(NULL),
field(NULL), fieldPolar(NULL), inducedField(NULL), inducedFieldPolar(NULL), torque(NULL), dampingAndThole(NULL), field(NULL), fieldPolar(NULL), inducedField(NULL), inducedFieldPolar(NULL), torque(NULL), dampingAndThole(NULL), inducedDipole(NULL),
inducedDipole(NULL), inducedDipolePolar(NULL), inducedDipoleErrors(NULL), polarizability(NULL), covalentFlags(NULL), polarizationGroupFlags(NULL), diisCoefficients(NULL), inducedDipolePolar(NULL), inducedDipoleErrors(NULL), prevDipoles(NULL), prevDipolesPolar(NULL), prevDipolesGk(NULL),
prevDipolesGkPolar(NULL), prevErrors(NULL), diisMatrix(NULL), polarizability(NULL), covalentFlags(NULL), polarizationGroupFlags(NULL),
pmeGrid(NULL), pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL), pmeIgrid(NULL), pmePhi(NULL), pmeGrid(NULL), pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL), pmeIgrid(NULL), pmePhi(NULL),
pmePhid(NULL), pmePhip(NULL), pmePhidp(NULL), pmeAtomGridIndex(NULL), lastPositions(NULL), sort(NULL), gkKernel(NULL) { pmePhid(NULL), pmePhip(NULL), pmePhidp(NULL), pmeAtomGridIndex(NULL), lastPositions(NULL), sort(NULL), gkKernel(NULL) {
} }
...@@ -832,6 +834,20 @@ CudaCalcAmoebaMultipoleForceKernel::~CudaCalcAmoebaMultipoleForceKernel() { ...@@ -832,6 +834,20 @@ CudaCalcAmoebaMultipoleForceKernel::~CudaCalcAmoebaMultipoleForceKernel() {
delete inducedDipolePolar; delete inducedDipolePolar;
if (inducedDipoleErrors != NULL) if (inducedDipoleErrors != NULL)
delete inducedDipoleErrors; delete inducedDipoleErrors;
if (prevDipoles != NULL)
delete prevDipoles;
if (prevDipolesPolar != NULL)
delete prevDipolesPolar;
if (prevDipolesGk != NULL)
delete prevDipolesGk;
if (prevDipolesGkPolar != NULL)
delete prevDipolesGkPolar;
if (prevErrors != NULL)
delete prevErrors;
if (diisMatrix != NULL)
delete diisMatrix;
if (diisCoefficients != NULL)
delete diisCoefficients;
if (polarizability != NULL) if (polarizability != NULL)
delete polarizability; delete polarizability;
if (covalentFlags != NULL) if (covalentFlags != NULL)
...@@ -959,6 +975,11 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const ...@@ -959,6 +975,11 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
inducedDipole = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "inducedDipole"); inducedDipole = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "inducedDipole");
inducedDipolePolar = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "inducedDipolePolar"); inducedDipolePolar = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "inducedDipolePolar");
inducedDipoleErrors = new CudaArray(cu, cu.getNumThreadBlocks(), sizeof(float2), "inducedDipoleErrors"); inducedDipoleErrors = new CudaArray(cu, cu.getNumThreadBlocks(), sizeof(float2), "inducedDipoleErrors");
prevDipoles = new CudaArray(cu, 3*numMultipoles*MaxPrevDIISDipoles, elementSize, "prevDipoles");
prevDipolesPolar = new CudaArray(cu, 3*numMultipoles*MaxPrevDIISDipoles, elementSize, "prevDipolesPolar");
prevErrors = new CudaArray(cu, 3*numMultipoles*MaxPrevDIISDipoles, elementSize, "prevErrors");
diisMatrix = new CudaArray(cu, MaxPrevDIISDipoles*MaxPrevDIISDipoles, elementSize, "diisMatrix");
diisCoefficients = new CudaArray(cu, MaxPrevDIISDipoles+1, sizeof(float), "diisMatrix");
cu.addAutoclearBuffer(*field); cu.addAutoclearBuffer(*field);
cu.addAutoclearBuffer(*fieldPolar); cu.addAutoclearBuffer(*fieldPolar);
cu.addAutoclearBuffer(*torque); cu.addAutoclearBuffer(*torque);
...@@ -1088,6 +1109,8 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const ...@@ -1088,6 +1109,8 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
defines["GK_FQ"] = cu.doubleToString(3*(1-solventDielectric)/(2+3*solventDielectric)); defines["GK_FQ"] = cu.doubleToString(3*(1-solventDielectric)/(2+3*solventDielectric));
fixedThreadMemory += 4*elementSize; fixedThreadMemory += 4*elementSize;
inducedThreadMemory += 13*elementSize; inducedThreadMemory += 13*elementSize;
prevDipolesGk = new CudaArray(cu, 3*numMultipoles*MaxPrevDIISDipoles, elementSize, "prevDipolesGk");
prevDipolesGkPolar = new CudaArray(cu, 3*numMultipoles*MaxPrevDIISDipoles, elementSize, "prevDipolesGkPolar");
} }
int maxThreads = cu.getNonbondedUtilities().getForceThreadBlockSize(); int maxThreads = cu.getNonbondedUtilities().getForceThreadBlockSize();
fixedFieldThreads = min(maxThreads, cu.computeThreadBlockSize(fixedThreadMemory)); fixedFieldThreads = min(maxThreads, cu.computeThreadBlockSize(fixedThreadMemory));
...@@ -1102,9 +1125,12 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const ...@@ -1102,9 +1125,12 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
computeFixedFieldKernel = cu.getKernel(module, "computeFixedField"); computeFixedFieldKernel = cu.getKernel(module, "computeFixedField");
if (maxInducedIterations > 0) { if (maxInducedIterations > 0) {
defines["THREAD_BLOCK_SIZE"] = cu.intToString(inducedFieldThreads); defines["THREAD_BLOCK_SIZE"] = cu.intToString(inducedFieldThreads);
defines["MAX_PREV_DIIS_DIPOLES"] = cu.intToString(MaxPrevDIISDipoles);
module = cu.createModule(CudaKernelSources::vectorOps+CudaAmoebaKernelSources::multipoleInducedField, defines); module = cu.createModule(CudaKernelSources::vectorOps+CudaAmoebaKernelSources::multipoleInducedField, defines);
computeInducedFieldKernel = cu.getKernel(module, "computeInducedField"); computeInducedFieldKernel = cu.getKernel(module, "computeInducedField");
updateInducedFieldKernel = cu.getKernel(module, "updateInducedFieldBySOR"); updateInducedFieldKernel = cu.getKernel(module, "updateInducedFieldByDIIS");
recordDIISDipolesKernel = cu.getKernel(module, "recordInducedDipolesForDIIS");
buildMatrixKernel = cu.getKernel(module, "computeDIISMatrix");
} }
stringstream electrostaticsSource; stringstream electrostaticsSource;
if (usePME) { if (usePME) {
...@@ -1421,7 +1447,6 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in ...@@ -1421,7 +1447,6 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
// Iterate until the dipoles converge. // Iterate until the dipoles converge.
vector<float2> errors;
for (int i = 0; i < maxInducedIterations; i++) { for (int i = 0; i < maxInducedIterations; i++) {
cu.clearBuffer(*inducedField); cu.clearBuffer(*inducedField);
cu.clearBuffer(*inducedFieldPolar); cu.clearBuffer(*inducedFieldPolar);
...@@ -1440,23 +1465,9 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in ...@@ -1440,23 +1465,9 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
&gkKernel->getInducedDipoles()->getDevicePointer(), &gkKernel->getInducedDipolesPolar()->getDevicePointer(), &gkKernel->getInducedDipoles()->getDevicePointer(), &gkKernel->getInducedDipolesPolar()->getDevicePointer(),
&gkKernel->getBornRadii()->getDevicePointer(), &dampingAndThole->getDevicePointer()}; &gkKernel->getBornRadii()->getDevicePointer(), &dampingAndThole->getDevicePointer()};
cu.executeKernel(computeInducedFieldKernel, computeInducedFieldArgs, numForceThreadBlocks*inducedFieldThreads, inducedFieldThreads); cu.executeKernel(computeInducedFieldKernel, computeInducedFieldArgs, numForceThreadBlocks*inducedFieldThreads, inducedFieldThreads);
void* updateInducedGkFieldArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(),
&gkKernel->getField()->getDevicePointer(), &gkKernel->getInducedField()->getDevicePointer(),
&gkKernel->getInducedFieldPolar()->getDevicePointer(), &gkKernel->getInducedDipoles()->getDevicePointer(),
&gkKernel->getInducedDipolesPolar()->getDevicePointer(), &polarizability->getDevicePointer(), &inducedDipoleErrors->getDevicePointer()};
cu.executeKernel(updateInducedFieldKernel, updateInducedGkFieldArgs, cu.getNumThreadBlocks()*cu.ThreadBlockSize, cu.ThreadBlockSize, cu.ThreadBlockSize*elementSize*2);
}
void* updateInducedFieldArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(), &npt, &inducedField->getDevicePointer(),
&inducedFieldPolar->getDevicePointer(), &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(),
&polarizability->getDevicePointer(), &inducedDipoleErrors->getDevicePointer()};
cu.executeKernel(updateInducedFieldKernel, updateInducedFieldArgs, cu.getNumThreadBlocks()*cu.ThreadBlockSize, cu.ThreadBlockSize, cu.ThreadBlockSize*elementSize*2);
inducedDipoleErrors->download(errors);
double total1 = 0.0, total2 = 0.0;
for (int j = 0; j < (int) errors.size(); j++) {
total1 += errors[j].x;
total2 += errors[j].y;
} }
if (48.033324*sqrt(max(total1, total2)/cu.getNumAtoms()) < inducedEpsilon) double maxEpsilon = iterateDipolesByDIIS(i);
if (maxEpsilon < inducedEpsilon)
break; break;
} }
...@@ -1568,17 +1579,8 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in ...@@ -1568,17 +1579,8 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
void* pmeRecordInducedFieldDipolesArgs[] = {&pmePhid->getDevicePointer(), &pmePhip->getDevicePointer(), void* pmeRecordInducedFieldDipolesArgs[] = {&pmePhid->getDevicePointer(), &pmePhip->getDevicePointer(),
&inducedField->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), cu.getInvPeriodicBoxSizePointer()}; &inducedField->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), cu.getInvPeriodicBoxSizePointer()};
cu.executeKernel(pmeRecordInducedFieldDipolesKernel, pmeRecordInducedFieldDipolesArgs, cu.getNumAtoms()); cu.executeKernel(pmeRecordInducedFieldDipolesKernel, pmeRecordInducedFieldDipolesArgs, cu.getNumAtoms());
void* updateInducedFieldArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(), &npt, &inducedField->getDevicePointer(), double maxEpsilon = iterateDipolesByDIIS(i);
&inducedFieldPolar->getDevicePointer(), &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), if (maxEpsilon < inducedEpsilon)
&polarizability->getDevicePointer(), &inducedDipoleErrors->getDevicePointer()};
cu.executeKernel(updateInducedFieldKernel, updateInducedFieldArgs, cu.getNumThreadBlocks()*cu.ThreadBlockSize, cu.ThreadBlockSize, cu.ThreadBlockSize*elementSize*2);
inducedDipoleErrors->download(errors);
double total1 = 0.0, total2 = 0.0;
for (int j = 0; j < (int) errors.size(); j++) {
total1 += errors[j].x;
total2 += errors[j].y;
}
if (48.033324*sqrt(max(total1, total2)/cu.getNumAtoms()) < inducedEpsilon)
break; break;
} }
...@@ -1612,6 +1614,88 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in ...@@ -1612,6 +1614,88 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
return 0.0; return 0.0;
} }
double CudaCalcAmoebaMultipoleForceKernel::iterateDipolesByDIIS(int iteration) {
void* npt = NULL;
bool trueValue = true, falseValue = false;
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
// Record the dipole and errors into the lists of previous dipoles.
if (gkKernel != NULL) {
void* recordDIISDipolesGkArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(), &gkKernel->getField()->getDevicePointer(), &gkKernel->getInducedField()->getDevicePointer(),
&gkKernel->getInducedFieldPolar()->getDevicePointer(), &gkKernel->getInducedDipoles()->getDevicePointer(), &gkKernel->getInducedDipolesPolar()->getDevicePointer(),
&polarizability->getDevicePointer(), &inducedDipoleErrors->getDevicePointer(), &prevDipolesGk->getDevicePointer(),
&prevDipolesGkPolar->getDevicePointer(), &prevErrors->getDevicePointer(), &iteration, &falseValue, &diisMatrix->getDevicePointer()};
cu.executeKernel(recordDIISDipolesKernel, recordDIISDipolesGkArgs, cu.getNumThreadBlocks()*cu.ThreadBlockSize, cu.ThreadBlockSize, cu.ThreadBlockSize*elementSize*2);
}
void* recordDIISDipolesArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(), &npt, &inducedField->getDevicePointer(),
&inducedFieldPolar->getDevicePointer(), &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(),
&polarizability->getDevicePointer(), &inducedDipoleErrors->getDevicePointer(), &prevDipoles->getDevicePointer(),
&prevDipolesPolar->getDevicePointer(), &prevErrors->getDevicePointer(), &iteration, &trueValue, &diisMatrix->getDevicePointer()};
cu.executeKernel(recordDIISDipolesKernel, recordDIISDipolesArgs, cu.getNumThreadBlocks()*cu.ThreadBlockSize, cu.ThreadBlockSize, cu.ThreadBlockSize*elementSize*2);
float2* errors = (float2*) cu.getPinnedBuffer();
inducedDipoleErrors->download(errors, false);
// Determine the coefficients for selecting the new dipoles.
int numPrev = (iteration+1 < MaxPrevDIISDipoles ? iteration+1 : MaxPrevDIISDipoles);
void* buildMatrixArgs[] = {&prevErrors->getDevicePointer(), &iteration, &diisMatrix->getDevicePointer()};
int threadBlocks = min(numPrev, cu.getNumThreadBlocks());
cu.executeKernel(buildMatrixKernel, buildMatrixArgs, threadBlocks*128, 128, 128*elementSize);
vector<float> coefficients(MaxPrevDIISDipoles);
if (iteration == 0)
coefficients[0] = 1;
else {
vector<float> matrix;
diisMatrix->download(matrix);
int rank = numPrev+1;
Array2D<double> b(rank, rank);
b[0][0] = 0;
for (int i = 1; i < rank; i++)
b[i][0] = b[0][i] = -1;
for (int i = 0; i < numPrev; i++)
for (int j = 0; j < numPrev; j++)
b[i+1][j+1] = matrix[i*MaxPrevDIISDipoles+j];
// Solve using SVD. Since the right hand side is (-1, 0, 0, 0, ...), this is simpler than the general case.
JAMA::SVD<double> svd(b);
Array2D<double> u, v;
svd.getU(u);
svd.getV(v);
Array1D<double> s;
svd.getSingularValues(s);
int effectiveRank = svd.rank();
for (int i = 1; i < rank; i++) {
double d = 0;
for (int j = 0; j < effectiveRank; j++)
d -= u[0][j]*v[i][j]/s[j];
coefficients[i-1] = d;
}
}
diisCoefficients->upload(&coefficients[0]);
// Compute the dipoles.
void* updateInducedFieldArgs[] = {&inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(),
&prevDipoles->getDevicePointer(), &prevDipolesPolar->getDevicePointer(), &diisCoefficients->getDevicePointer(), &numPrev};
cu.executeKernel(updateInducedFieldKernel, updateInducedFieldArgs, cu.getNumThreadBlocks()*cu.ThreadBlockSize);
if (gkKernel != NULL) {
void* updateInducedFieldGkArgs[] = {&gkKernel->getInducedDipoles()->getDevicePointer(), &gkKernel->getInducedDipolesPolar()->getDevicePointer(),
&prevDipolesGk->getDevicePointer(), &prevDipolesGkPolar->getDevicePointer(), &diisCoefficients->getDevicePointer(), &numPrev};
cu.executeKernel(updateInducedFieldKernel, updateInducedFieldGkArgs, cu.getNumThreadBlocks()*cu.ThreadBlockSize);
}
// Compute the overall error for monitoring convergence.
double total1 = 0.0, total2 = 0.0;
for (int j = 0; j < inducedDipoleErrors->getSize(); j++) {
total1 += errors[j].x;
total2 += errors[j].y;
}
return 48.033324*sqrt(max(total1, total2)/cu.getNumAtoms());
}
void CudaCalcAmoebaMultipoleForceKernel::ensureMultipolesValid(ContextImpl& context) { void CudaCalcAmoebaMultipoleForceKernel::ensureMultipolesValid(ContextImpl& context) {
if (multipolesAreValid) { if (multipolesAreValid) {
int numParticles = cu.getNumAtoms(); int numParticles = cu.getNumAtoms();
......
...@@ -375,6 +375,7 @@ private: ...@@ -375,6 +375,7 @@ private:
const char* getSortKey() const {return "value.y";} const char* getSortKey() const {return "value.y";}
}; };
void initializeScaleFactors(); void initializeScaleFactors();
double iterateDipolesByDIIS(int iteration);
void ensureMultipolesValid(ContextImpl& context); void ensureMultipolesValid(ContextImpl& context);
template <class T, class T4, class M4> void computeSystemMultipoleMoments(ContextImpl& context, std::vector<double>& outputMultipoleMoments); template <class T, class T4, class M4> void computeSystemMultipoleMoments(ContextImpl& context, std::vector<double>& outputMultipoleMoments);
int numMultipoles, maxInducedIterations; int numMultipoles, maxInducedIterations;
...@@ -399,6 +400,13 @@ private: ...@@ -399,6 +400,13 @@ private:
CudaArray* inducedDipole; CudaArray* inducedDipole;
CudaArray* inducedDipolePolar; CudaArray* inducedDipolePolar;
CudaArray* inducedDipoleErrors; CudaArray* inducedDipoleErrors;
CudaArray* prevDipoles;
CudaArray* prevDipolesPolar;
CudaArray* prevDipolesGk;
CudaArray* prevDipolesGkPolar;
CudaArray* prevErrors;
CudaArray* diisMatrix;
CudaArray* diisCoefficients;
CudaArray* polarizability; CudaArray* polarizability;
CudaArray* covalentFlags; CudaArray* covalentFlags;
CudaArray* polarizationGroupFlags; CudaArray* polarizationGroupFlags;
...@@ -419,8 +427,10 @@ private: ...@@ -419,8 +427,10 @@ private:
CUfunction computeMomentsKernel, recordInducedDipolesKernel, computeFixedFieldKernel, computeInducedFieldKernel, updateInducedFieldKernel, electrostaticsKernel, mapTorqueKernel; CUfunction computeMomentsKernel, recordInducedDipolesKernel, computeFixedFieldKernel, computeInducedFieldKernel, updateInducedFieldKernel, electrostaticsKernel, mapTorqueKernel;
CUfunction pmeGridIndexKernel, pmeSpreadFixedMultipolesKernel, pmeSpreadInducedDipolesKernel, pmeFinishSpreadChargeKernel, pmeConvolutionKernel; CUfunction pmeGridIndexKernel, pmeSpreadFixedMultipolesKernel, pmeSpreadInducedDipolesKernel, pmeFinishSpreadChargeKernel, pmeConvolutionKernel;
CUfunction pmeFixedPotentialKernel, pmeInducedPotentialKernel, pmeFixedForceKernel, pmeInducedForceKernel, pmeRecordInducedFieldDipolesKernel, computePotentialKernel; CUfunction pmeFixedPotentialKernel, pmeInducedPotentialKernel, pmeFixedForceKernel, pmeInducedForceKernel, pmeRecordInducedFieldDipolesKernel, computePotentialKernel;
CUfunction recordDIISDipolesKernel, buildMatrixKernel;
CudaCalcAmoebaGeneralizedKirkwoodForceKernel* gkKernel; CudaCalcAmoebaGeneralizedKirkwoodForceKernel* gkKernel;
static const int PmeOrder = 5; static const int PmeOrder = 5;
static const int MaxPrevDIISDipoles = 20;
}; };
/** /**
......
...@@ -485,7 +485,7 @@ extern "C" __global__ void updateInducedFieldBySOR(const long long* __restrict__ ...@@ -485,7 +485,7 @@ extern "C" __global__ void updateInducedFieldBySOR(const long long* __restrict__
buffer[threadIdx.x] = make_real2(sumErrors, sumPolarErrors); buffer[threadIdx.x] = make_real2(sumErrors, sumPolarErrors);
__syncthreads(); __syncthreads();
for (int offset = 1; offset < blockDim.x; offset *= 2) { for (int offset = 1; offset < blockDim.x; offset *= 2) {
if (threadIdx.x+offset < blockDim.x && (threadIdx.x&(2*offset-1)) == 0) { if (threadIdx.x+offset < blockDim.x && (threadIdx.x&(2*offset-1)) == 0) {
buffer[threadIdx.x].x += buffer[threadIdx.x+offset].x; buffer[threadIdx.x].x += buffer[threadIdx.x+offset].x;
buffer[threadIdx.x].y += buffer[threadIdx.x+offset].y; buffer[threadIdx.x].y += buffer[threadIdx.x+offset].y;
...@@ -494,4 +494,115 @@ extern "C" __global__ void updateInducedFieldBySOR(const long long* __restrict__ ...@@ -494,4 +494,115 @@ extern "C" __global__ void updateInducedFieldBySOR(const long long* __restrict__
} }
if (threadIdx.x == 0) if (threadIdx.x == 0)
errors[blockIdx.x] = make_float2((float) buffer[0].x, (float) buffer[0].y); errors[blockIdx.x] = make_float2((float) buffer[0].x, (float) buffer[0].y);
} }
\ No newline at end of file
extern "C" __global__ void recordInducedDipolesForDIIS(const long long* __restrict__ fixedField, const long long* __restrict__ fixedFieldPolar,
const long long* __restrict__ fixedFieldS, const long long* __restrict__ inducedField, const long long* __restrict__ inducedFieldPolar,
const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar, const float* __restrict__ polarizability, float2* __restrict__ errors,
real* __restrict__ prevDipoles, real* __restrict__ prevDipolesPolar, real* __restrict__ prevErrors, int iteration, bool recordPrevErrors, real* __restrict__ matrix) {
extern __shared__ real2 buffer[];
#ifdef USE_EWALD
const real ewaldScale = (4/(real) 3)*(EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA)/SQRT_PI;
#else
const real ewaldScale = 0;
#endif
const real fieldScale = 1/(real) 0x100000000;
real sumErrors = 0;
real sumPolarErrors = 0;
for (int atom = blockIdx.x*blockDim.x + threadIdx.x; atom < NUM_ATOMS; atom += blockDim.x*gridDim.x) {
real scale = polarizability[atom];
for (int component = 0; component < 3; component++) {
int dipoleIndex = 3*atom+component;
int fieldIndex = atom+component*PADDED_NUM_ATOMS;
if (iteration >= MAX_PREV_DIIS_DIPOLES) {
// We have filled up the buffer for previous dipoles, so shift them all over by one.
for (int i = 1; i < MAX_PREV_DIIS_DIPOLES; i++) {
int index1 = dipoleIndex+(i-1)*NUM_ATOMS*3;
int index2 = dipoleIndex+i*NUM_ATOMS*3;
prevDipoles[index1] = prevDipoles[index2];
prevDipolesPolar[index1] = prevDipolesPolar[index2];
if (recordPrevErrors)
prevErrors[index1] = prevErrors[index2];
}
}
// Compute the new dipole, and record it along with the error.
real oldDipole = inducedDipole[dipoleIndex];
real oldDipolePolar = inducedDipolePolar[dipoleIndex];
long long fixedS = (fixedFieldS == NULL ? (long long) 0 : fixedFieldS[fieldIndex]);
real newDipole = scale*((fixedField[fieldIndex]+fixedS+inducedField[fieldIndex])*fieldScale+ewaldScale*oldDipole);
real newDipolePolar = scale*((fixedFieldPolar[fieldIndex]+fixedS+inducedFieldPolar[fieldIndex])*fieldScale+ewaldScale*oldDipolePolar);
int storePrevIndex = dipoleIndex+min(iteration, MAX_PREV_DIIS_DIPOLES-1)*NUM_ATOMS*3;
prevDipoles[storePrevIndex] = newDipole;
prevDipolesPolar[storePrevIndex] = newDipolePolar;
if (recordPrevErrors)
prevErrors[storePrevIndex] = newDipole-oldDipole;
sumErrors += (newDipole-oldDipole)*(newDipole-oldDipole);
sumPolarErrors += (newDipolePolar-oldDipolePolar)*(newDipolePolar-oldDipolePolar);
}
}
// Sum the errors over threads and store the total for this block.
buffer[threadIdx.x] = make_real2(sumErrors, sumPolarErrors);
__syncthreads();
for (int offset = 1; offset < blockDim.x; offset *= 2) {
if (threadIdx.x+offset < blockDim.x && (threadIdx.x&(2*offset-1)) == 0) {
buffer[threadIdx.x].x += buffer[threadIdx.x+offset].x;
buffer[threadIdx.x].y += buffer[threadIdx.x+offset].y;
}
__syncthreads();
}
if (threadIdx.x == 0)
errors[blockIdx.x] = make_float2((float) buffer[0].x, (float) buffer[0].y);
if (iteration >= MAX_PREV_DIIS_DIPOLES && recordPrevErrors && blockIdx.x == 0) {
// Shift over the existing matrix elements.
for (int i = 0; i < MAX_PREV_DIIS_DIPOLES-1; i++) {
if (threadIdx.x < MAX_PREV_DIIS_DIPOLES-1)
matrix[threadIdx.x+i*MAX_PREV_DIIS_DIPOLES] = matrix[(threadIdx.x+1)+(i+1)*MAX_PREV_DIIS_DIPOLES];
__syncthreads();
}
}
}
extern "C" __global__ void computeDIISMatrix(real* __restrict__ prevErrors, int iteration, real* __restrict__ matrix) {
extern __shared__ real sumBuffer[];
int j = min(iteration, MAX_PREV_DIIS_DIPOLES-1);
for (int i = blockIdx.x; i <= j; i += gridDim.x) {
// All the threads in this thread block work together to compute a single matrix element.
real sum = 0;
for (int index = threadIdx.x; index < NUM_ATOMS*3; index += blockDim.x)
sum += prevErrors[index+i*NUM_ATOMS*3]*prevErrors[index+j*NUM_ATOMS*3];
sumBuffer[threadIdx.x] = sum;
__syncthreads();
for (int offset = 1; offset < blockDim.x; offset *= 2) {
if (threadIdx.x+offset < blockDim.x && (threadIdx.x&(2*offset-1)) == 0)
sumBuffer[threadIdx.x] += sumBuffer[threadIdx.x+offset];
__syncthreads();
}
if (threadIdx.x == 0) {
matrix[i+MAX_PREV_DIIS_DIPOLES*j] = sumBuffer[0];
if (i != j)
matrix[j+MAX_PREV_DIIS_DIPOLES*i] = sumBuffer[0];
}
}
}
extern "C" __global__ void updateInducedFieldByDIIS(real* __restrict__ inducedDipole, real* __restrict__ inducedDipolePolar,
const real* __restrict__ prevDipoles, const real* __restrict__ prevDipolesPolar, const float* __restrict__ coefficients, int numPrev) {
for (int index = blockIdx.x*blockDim.x + threadIdx.x; index < 3*NUM_ATOMS; index += blockDim.x*gridDim.x) {
real sum = 0;
real sumPolar = 0;
for (int i = 0; i < numPrev; i++) {
sum += coefficients[i]*prevDipoles[i*3*NUM_ATOMS+index];
sumPolar += coefficients[i]*prevDipolesPolar[i*3*NUM_ATOMS+index];
}
inducedDipole[index] = sum;
inducedDipolePolar[index] = sumPolar;
}
}
...@@ -126,9 +126,9 @@ void CudaCalcDrudeForceKernel::initialize(const System& system, const DrudeForce ...@@ -126,9 +126,9 @@ void CudaCalcDrudeForceKernel::initialize(const System& system, const DrudeForce
double a1 = (atoms[i][2] == -1 ? 1 : aniso12); double a1 = (atoms[i][2] == -1 ? 1 : aniso12);
double a2 = (atoms[i][3] == -1 || atoms[i][4] == -1 ? 1 : aniso34); double a2 = (atoms[i][3] == -1 || atoms[i][4] == -1 ? 1 : aniso34);
double a3 = 3-a1-a2; double a3 = 3-a1-a2;
double k3 = charge*charge/(polarizability*a3); double k3 = ONE_4PI_EPS0*charge*charge/(polarizability*a3);
double k1 = charge*charge/(polarizability*a1) - k3; double k1 = ONE_4PI_EPS0*charge*charge/(polarizability*a1) - k3;
double k2 = charge*charge/(polarizability*a2) - k3; double k2 = ONE_4PI_EPS0*charge*charge/(polarizability*a2) - k3;
if (atoms[i][2] == -1) { if (atoms[i][2] == -1) {
atoms[i][2] = 0; atoms[i][2] = 0;
k1 = 0; k1 = 0;
...@@ -197,9 +197,9 @@ void CudaCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, con ...@@ -197,9 +197,9 @@ void CudaCalcDrudeForceKernel::copyParametersToContext(ContextImpl& context, con
double a1 = (p2 == -1 ? 1 : aniso12); double a1 = (p2 == -1 ? 1 : aniso12);
double a2 = (p3 == -1 || p4 == -1 ? 1 : aniso34); double a2 = (p3 == -1 || p4 == -1 ? 1 : aniso34);
double a3 = 3-a1-a2; double a3 = 3-a1-a2;
double k3 = charge*charge/(polarizability*a3); double k3 = ONE_4PI_EPS0*charge*charge/(polarizability*a3);
double k1 = charge*charge/(polarizability*a1) - k3; double k1 = ONE_4PI_EPS0*charge*charge/(polarizability*a1) - k3;
double k2 = charge*charge/(polarizability*a2) - k3; double k2 = ONE_4PI_EPS0*charge*charge/(polarizability*a2) - k3;
if (p2 == -1) if (p2 == -1)
k1 = 0; k1 = 0;
if (p3 == -1 || p4 == -1) if (p3 == -1 || p4 == -1)
......
...@@ -13,7 +13,7 @@ real u = drudeParams.x*r; ...@@ -13,7 +13,7 @@ real u = drudeParams.x*r;
real screening = 1-(1+0.5f*u)*EXP(-u); real screening = 1-(1+0.5f*u)*EXP(-u);
real pairEnergy = drudeParams.y*screening*rInv; real pairEnergy = drudeParams.y*screening*rInv;
energy += pairEnergy; energy += pairEnergy;
real3 f = delta*(pairEnergy*rInv*rInv); real3 f = delta*(drudeParams.y*rInv*rInv*(screening*rInv-0.5f*(1+u)*EXP(-u)*drudeParams.x));
force1 += f; force1 += f;
force3 -= f; force3 -= f;
...@@ -26,7 +26,7 @@ u = drudeParams.x*r; ...@@ -26,7 +26,7 @@ u = drudeParams.x*r;
screening = 1-(1+0.5f*u)*EXP(-u); screening = 1-(1+0.5f*u)*EXP(-u);
pairEnergy = -drudeParams.y*screening*rInv; pairEnergy = -drudeParams.y*screening*rInv;
energy += pairEnergy; energy += pairEnergy;
f = delta*(pairEnergy*rInv*rInv); f = delta*(-drudeParams.y*rInv*rInv*(screening*rInv-0.5f*(1+u)*EXP(-u)*drudeParams.x));
force1 += f; force1 += f;
force4 -= f; force4 -= f;
...@@ -39,7 +39,7 @@ u = drudeParams.x*r; ...@@ -39,7 +39,7 @@ u = drudeParams.x*r;
screening = 1-(1+0.5f*u)*EXP(-u); screening = 1-(1+0.5f*u)*EXP(-u);
pairEnergy = -drudeParams.y*screening*rInv; pairEnergy = -drudeParams.y*screening*rInv;
energy += pairEnergy; energy += pairEnergy;
f = delta*(pairEnergy*rInv*rInv); f = delta*(-drudeParams.y*rInv*rInv*(screening*rInv-0.5f*(1+u)*EXP(-u)*drudeParams.x));
force2 += f; force2 += f;
force3 -= f; force3 -= f;
...@@ -52,6 +52,6 @@ u = drudeParams.x*r; ...@@ -52,6 +52,6 @@ u = drudeParams.x*r;
screening = 1-(1+0.5f*u)*EXP(-u); screening = 1-(1+0.5f*u)*EXP(-u);
pairEnergy = drudeParams.y*screening*rInv; pairEnergy = drudeParams.y*screening*rInv;
energy += pairEnergy; energy += pairEnergy;
f = delta*(pairEnergy*rInv*rInv); f = delta*(drudeParams.y*rInv*rInv*(screening*rInv-0.5f*(1+u)*EXP(-u)*drudeParams.x));
force2 += f; force2 += f;
force4 -= f; force4 -= f;
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment