Commit 6160b726 authored by Lee-Ping Wang's avatar Lee-Ping Wang
Browse files

Merge branch 'master' of https://github.com/SimTk/openmm

parents 8df54762 d59a34fb
...@@ -105,15 +105,16 @@ ELSE( CMAKE_SIZEOF_VOID_P EQUAL 8 ) ...@@ -105,15 +105,16 @@ ELSE( CMAKE_SIZEOF_VOID_P EQUAL 8 )
SET( LIB64 ) SET( LIB64 )
ENDIF( CMAKE_SIZEOF_VOID_P EQUAL 8 ) ENDIF( CMAKE_SIZEOF_VOID_P EQUAL 8 )
# Build universal binaries compatible with OS X 10.5 # Build universal binaries compatible with OS X 10.7
IF (APPLE AND NOT CMAKE_OSX_DEPLOYMENT_TARGET) IF (APPLE AND NOT CMAKE_OSX_DEPLOYMENT_TARGET)
SET (CMAKE_OSX_ARCHITECTURES "i386;x86_64" CACHE STRING "The processor architectures to build for" FORCE) SET (CMAKE_OSX_ARCHITECTURES "i386;x86_64" CACHE STRING "The processor architectures to build for" FORCE)
SET (CMAKE_OSX_DEPLOYMENT_TARGET "10.5" CACHE STRING "The minimum version of OS X to support" FORCE) SET (CMAKE_OSX_DEPLOYMENT_TARGET "10.7" CACHE STRING "The minimum version of OS X to support" FORCE)
ENDIF (APPLE AND NOT CMAKE_OSX_DEPLOYMENT_TARGET) ENDIF (APPLE AND NOT CMAKE_OSX_DEPLOYMENT_TARGET)
# Improve the linking behavior of Mac libraries # Improve the linking behavior of Mac libraries
IF (APPLE) IF (APPLE)
SET (CMAKE_INSTALL_NAME_DIR "@rpath") SET (CMAKE_INSTALL_NAME_DIR "@rpath")
SET_PROPERTY(GLOBAL PROPERTY COMPILE_FLAGS "-stdlib=libc++ -mmacosx-version-min=10.7")
ENDIF (APPLE) ENDIF (APPLE)
IF(UNIX AND NOT CMAKE_BUILD_TYPE) IF(UNIX AND NOT CMAKE_BUILD_TYPE)
......
...@@ -16,6 +16,15 @@ find_path(OPENCL_INCLUDE_DIR ...@@ -16,6 +16,15 @@ find_path(OPENCL_INCLUDE_DIR
PATH_SUFFIXES "include" PATH_SUFFIXES "include"
NO_DEFAULT_PATH NO_DEFAULT_PATH
) )
# On Macs, look inside the platform SDK
if(DEFINED CMAKE_OSX_SYSROOT)
find_path(OPENCL_INCLUDE_DIR
NAMES opencl.h opencl.h
PATHS
"${CMAKE_OSX_SYSROOT}/System/Library/Frameworks/OpenCL.framework/Headers"
NO_DEFAULT_PATH
)
endif(DEFINED CMAKE_OSX_SYSROOT)
# As a last resort, look in default system areas followed by other possible locations # As a last resort, look in default system areas followed by other possible locations
find_path(OPENCL_INCLUDE_DIR find_path(OPENCL_INCLUDE_DIR
NAMES OpenCL/opencl.h CL/opencl.h NAMES OpenCL/opencl.h CL/opencl.h
......
This diff is collapsed.
This diff is collapsed.
from __future__ import print_function
import simtk.openmm.app as app
import simtk.openmm as mm
import simtk.unit as unit
import sys
from datetime import datetime
from optparse import OptionParser
def timeIntegration(context, steps):
"""Integrate a Context for a specified number of steps, then return how many seconds it took."""
context.getIntegrator().step(5) # Make sure everything is fully initialized
context.getState(getEnergy=True)
start = datetime.now()
context.getIntegrator().step(steps)
context.getState(getEnergy=True)
end = datetime.now()
elapsed = end -start
return elapsed.seconds + elapsed.microseconds*1e-6
def runOneTest(testName, options):
"""Perform a single benchmarking simulation."""
explicit = (testName in ('rf', 'pme', 'amoebapme'))
amoeba = (testName in ('amoebagk', 'amoebapme'))
hydrogenMass = None
print()
if amoeba:
print('Test: %s (epsilon=%g)' % (testName, options.epsilon))
elif testName == 'pme':
print('Test: pme (cutoff=%g)' % options.cutoff)
else:
print('Test: %s' % testName)
platform = mm.Platform.getPlatformByName(options.platform)
# Create the System.
if amoeba:
constraints = None
epsilon = float(options.epsilon)
if epsilon == 0:
polarization = 'direct'
else:
polarization = 'mutual'
if explicit:
ff = app.ForceField('amoeba2009.xml')
pdb = app.PDBFile('5dfr_solv-cube_equil.pdb')
cutoff = 0.7*unit.nanometers
vdwCutoff = 0.9*unit.nanometers
system = ff.createSystem(pdb.topology, nonbondedMethod=app.PME, nonbondedCutoff=cutoff, vdwCutoff=vdwCutoff, constraints=constraints, ewaldErrorTolerance=0.00075, mutualInducedTargetEpsilon=epsilon, polarization=polarization)
else:
ff = app.ForceField('amoeba2009.xml', 'amoeba2009_gk.xml')
pdb = app.PDBFile('5dfr_minimized.pdb')
cutoff = 2.0*unit.nanometers
vdwCutoff = 1.2*unit.nanometers
system = ff.createSystem(pdb.topology, nonbondedMethod=app.NoCutoff, constraints=constraints, mutualInducedTargetEpsilon=epsilon, polarization=polarization)
dt = 0.001*unit.picoseconds
else:
if explicit:
ff = app.ForceField('amber99sb.xml', 'tip3p.xml')
pdb = app.PDBFile('5dfr_solv-cube_equil.pdb')
if testName == 'pme':
method = app.PME
cutoff = options.cutoff
else:
method = app.CutoffPeriodic
cutoff = 1*unit.nanometers
else:
ff = app.ForceField('amber99sb.xml', 'amber99_obc.xml')
pdb = app.PDBFile('5dfr_minimized.pdb')
method = app.CutoffNonPeriodic
cutoff = 2*unit.nanometers
if options.heavy:
dt = 0.005*unit.picoseconds
constraints = app.AllBonds
hydrogenMass = 4*unit.amu
else:
dt = 0.002*unit.picoseconds
constraints = app.HBonds
hydrogenMass = None
system = ff.createSystem(pdb.topology, nonbondedMethod=method, nonbondedCutoff=cutoff, constraints=constraints, hydrogenMass=hydrogenMass)
print('Step Size: %g fs' % dt.value_in_unit(unit.femtoseconds))
properties = {}
if options.device is not None:
if platform.getName() == 'CUDA':
properties['CudaDeviceIndex'] = options.device
elif platform.getName() == 'OpenCL':
properties['OpenCLDeviceIndex'] = options.device
if options.precision is not None:
if platform.getName() == 'CUDA':
properties['CudaPrecision'] = options.precision
elif platform.getName() == 'OpenCL':
properties['OpenCLPrecision'] = options.device
# Run the simulation.
integ = mm.LangevinIntegrator(300*unit.kelvin, 91*(1/unit.picoseconds), dt)
integ.setConstraintTolerance(1e-5)
if len(properties) > 0:
context = mm.Context(system, integ, platform, properties)
else:
context = mm.Context(system, integ, platform)
context.setPositions(pdb.positions)
context.setVelocitiesToTemperature(300*unit.kelvin)
steps = 20
while True:
time = timeIntegration(context, steps)
if time >= 0.5*options.seconds:
break
if time < 0.5:
steps = int(steps*1.0/time) # Integrate enough steps to get a reasonable estimate for how many we'll need.
else:
steps = int(steps*options.seconds/time)
print('Integrated %d steps in %g seconds' % (steps, time))
print('%g ns/day' % (dt*steps*86400/time).value_in_unit(unit.nanoseconds))
# Parse the command line options.
parser = OptionParser()
platformNames = [mm.Platform.getPlatform(i).getName() for i in range(mm.Platform.getNumPlatforms())]
parser.add_option('--platform', dest='platform', choices=platformNames, help='name of the platform to benchmark')
parser.add_option('--test', dest='test', choices=('gbsa', 'rf', 'pme', 'amoebagk', 'amoebapme'), help='the test to perform: gbsa, rf, pme, amoebagk, or amoebapme [default: all]')
parser.add_option('--pme-cutoff', default='0.9', dest='cutoff', type='float', help='direct space cutoff for PME in nm [default: 0.9]')
parser.add_option('--seconds', default='60', dest='seconds', type='float', help='target simulation length in seconds [default: 60]')
parser.add_option('--mutual-epsilon', default='1e-4', dest='epsilon', type='float', help='mutual induced epsilon for AMOEBA [default: 1e-4]')
parser.add_option('--heavy-hydrogens', action='store_true', default=False, dest='heavy', help='repartition mass to allow a larger time step')
parser.add_option('--device', default=None, dest='device', help='device index for CUDA or OpenCL')
parser.add_option('--precision', default='single', dest='precision', choices=('single', 'mixed', 'double'), help='precision mode for CUDA or OpenCL: single, mixed, or double [default: single]')
(options, args) = parser.parse_args()
if len(args) > 0:
parser.error('Unknown argument: '+args[0])
if options.platform is None:
parser.error('No platform specified')
print('Platform:', options.platform)
if options.platform in ('CUDA', 'OpenCL'):
print('Precision:', options.precision)
if options.device is not None:
print('Device:', options.device)
# Run the simulations.
if options.test is None:
for test in ('gbsa', 'rf', 'pme', 'amoebagk', 'amoebapme'):
try:
runOneTest(test, options)
except Exception as ex:
print('Test failed: %s' % ex.message)
else:
runOneTest(options.test, options)
...@@ -34,6 +34,7 @@ ...@@ -34,6 +34,7 @@
#include "Vec3.h" #include "Vec3.h"
#include <map> #include <map>
#include <string>
#include <vector> #include <vector>
#include "internal/windowsExport.h" #include "internal/windowsExport.h"
......
...@@ -160,6 +160,21 @@ public: ...@@ -160,6 +160,21 @@ public:
ivec4 operator==(ivec4 other) const { ivec4 operator==(ivec4 other) const {
return _mm_cmpeq_epi32(val, other); return _mm_cmpeq_epi32(val, other);
} }
ivec4 operator!=(ivec4 other) const {
return _mm_xor_si128(*this==other, _mm_set1_epi32(0xFFFFFFFF));
}
ivec4 operator>(ivec4 other) const {
return _mm_cmpgt_epi32(val, other);
}
ivec4 operator<(ivec4 other) const {
return _mm_cmplt_epi32(val, other);
}
ivec4 operator>=(ivec4 other) const {
return _mm_xor_si128(_mm_cmplt_epi32(val, other), _mm_set1_epi32(0xFFFFFFFF));
}
ivec4 operator<=(ivec4 other) const {
return _mm_xor_si128(_mm_cmpgt_epi32(val, other), _mm_set1_epi32(0xFFFFFFFF));
}
operator fvec4() const; operator fvec4() const;
}; };
...@@ -230,6 +245,10 @@ static inline ivec4 abs(ivec4 v) { ...@@ -230,6 +245,10 @@ static inline ivec4 abs(ivec4 v) {
return ivec4(_mm_abs_epi32(v.val)); return ivec4(_mm_abs_epi32(v.val));
} }
static inline bool any(ivec4 v) {
return !_mm_test_all_zeros(v, _mm_set1_epi32(0xFFFFFFFF));
}
// Mathematical operators involving a scalar and a vector. // Mathematical operators involving a scalar and a vector.
static inline fvec4 operator+(float v1, fvec4 v2) { static inline fvec4 operator+(float v1, fvec4 v2) {
......
/* Portions copyright (c) 2006-2013 Stanford University and Simbios.
* Contributors: Pande Group
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject
* to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef OPENMM_CPU_GBSAOBC_FORCE_H__
#define OPENMM_CPU_GBSAOBC_FORCE_H__
#include "openmm/internal/ThreadPool.h"
#include "openmm/internal/vectorize.h"
#include <set>
#include <utility>
#include <vector>
namespace OpenMM {
class CpuGBSAOBCForce {
public:
class ComputeTask;
CpuGBSAOBCForce();
/**
* Set the force to use a cutoff.
*
* @param distance the cutoff distance
*/
void setUseCutoff(float distance);
/**
*
* Set the force to use periodic boundary conditions. This requires that a cutoff has
* already been set, and the smallest side of the periodic box is at least twice the cutoff
* distance.
*
* @param boxSize the X, Y, and Z widths of the periodic box
*/
void setPeriodic(float* periodicBoxSize);
/**
* Set the solute dielectric constant.
*/
void setSoluteDielectric(float dielectric);
/**
* Set the solvent dielectric constant.
*/
void setSolventDielectric(float dielectric);
/**
* Get the per-particle parameters (offset radius, scaled radius).
*/
const std::vector<std::pair<float, float> >& getParticleParameters() const;
/**
* Set the per-particle parameters (offset radius, scaled radius).
*/
void setParticleParameters(const std::vector<std::pair<float, float> >& params);
/**
*
* Calculate LJ Coulomb pair ixn
*
* @param posq atom coordinates and charges
* @param forces force array (forces added)
* @param totalEnergy total energy
* @param threads the thread pool to use
*/
void computeForce(const std::vector<float>& posq, std::vector<std::vector<float> >& threadForce, double* totalEnergy, ThreadPool& threads);
/**
* This routine contains the code executed by each thread.
*/
void threadComputeForce(ThreadPool& threads, int threadIndex);
private:
bool cutoff;
bool periodic;
float periodicBoxSize[3];
float cutoffDistance, soluteDielectric, solventDielectric;
std::vector<std::pair<float, float> > particleParams;
std::vector<float> bornRadii;
std::vector<std::vector<float> > threadBornForces;
std::vector<float> obcChain;
std::vector<double> threadEnergy;
std::vector<float> logTable;
float logDX, logDXInv;
// The following variables are used to make information accessible to the individual threads.
float const* posq;
std::vector<std::vector<float> >* threadForce;
bool includeEnergy;
static const int NUM_TABLE_POINTS;
/**
* Compute the displacement and squared distance between a collection of points, optionally using
* periodic boundary conditions.
*/
void getDeltaR(const fvec4& posI, const fvec4& x, const fvec4& y, const fvec4& z, fvec4& dx, fvec4& dy, fvec4& dz, fvec4& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const;
/**
* Evaluate log(x) using a lookup table for speed.
*/
fvec4 fastLog(fvec4 x);
};
} // namespace OpenMM
// ---------------------------------------------------------------------------------------
#endif // OPENMM_CPU_GBSAOBC_FORCE_H__
...@@ -32,22 +32,64 @@ ...@@ -32,22 +32,64 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE. * * USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */ * -------------------------------------------------------------------------- */
#include "CpuPlatform.h" #include "CpuGBSAOBCForce.h"
#include "CpuNeighborList.h" #include "CpuNeighborList.h"
#include "CpuNonbondedForce.h" #include "CpuNonbondedForce.h"
#include "CpuPlatform.h"
#include "openmm/kernels.h" #include "openmm/kernels.h"
#include "openmm/System.h" #include "openmm/System.h"
#include "openmm/internal/ThreadPool.h"
namespace OpenMM { namespace OpenMM {
/**
* This kernel is invoked at the beginning and end of force and energy computations. It gives the
* Platform a chance to clear buffers and do other initialization at the beginning, and to do any
* necessary work at the end to determine the final results.
*/
class CpuCalcForcesAndEnergyKernel : public CalcForcesAndEnergyKernel {
public:
CpuCalcForcesAndEnergyKernel(std::string name, const Platform& platform, CpuPlatform::PlatformData& data, ContextImpl& context);
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
*/
void initialize(const System& system);
/**
* This is called at the beginning of each force/energy computation, before calcForcesAndEnergy() has been called on
* any ForceImpl.
*
* @param context the context in which to execute this kernel
* @param includeForce true if forces should be computed
* @param includeEnergy true if potential energy should be computed
* @param groups a set of bit flags for which force groups to include
*/
void beginComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups);
/**
* This is called at the end of each force/energy computation, after calcForcesAndEnergy() has been called on
* every ForceImpl.
*
* @param context the context in which to execute this kernel
* @param includeForce true if forces should be computed
* @param includeEnergy true if potential energy should be computed
* @param groups a set of bit flags for which force groups to include
* @return the potential energy of the system. This value is added to all values returned by ForceImpls'
* calcForcesAndEnergy() methods. That is, each force kernel may <i>either</i> return its contribution to the
* energy directly, <i>or</i> add it to an internal buffer so that it will be included here.
*/
double finishComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups);
private:
CpuPlatform::PlatformData& data;
Kernel referenceKernel;
};
/** /**
* This kernel is invoked by NonbondedForce to calculate the forces acting on the system. * This kernel is invoked by NonbondedForce to calculate the forces acting on the system.
*/ */
class CpuCalcNonbondedForceKernel : public CalcNonbondedForceKernel { class CpuCalcNonbondedForceKernel : public CalcNonbondedForceKernel {
public: public:
CpuCalcNonbondedForceKernel(std::string name, const Platform& platform) : CalcNonbondedForceKernel(name, platform), CpuCalcNonbondedForceKernel(std::string name, const Platform& platform, CpuPlatform::PlatformData& data) : CalcNonbondedForceKernel(name, platform),
bonded14IndexArray(NULL), bonded14ParamArray(NULL), hasInitializedPme(false) { data(data), bonded14IndexArray(NULL), bonded14ParamArray(NULL), hasInitializedPme(false) {
} }
~CpuCalcNonbondedForceKernel(); ~CpuCalcNonbondedForceKernel();
/** /**
...@@ -77,6 +119,7 @@ public: ...@@ -77,6 +119,7 @@ public:
void copyParametersToContext(ContextImpl& context, const NonbondedForce& force); void copyParametersToContext(ContextImpl& context, const NonbondedForce& force);
private: private:
class PmeIO; class PmeIO;
CpuPlatform::PlatformData& data;
int numParticles, num14; int numParticles, num14;
int **bonded14IndexArray; int **bonded14IndexArray;
double **bonded14ParamArray; double **bonded14ParamArray;
...@@ -85,16 +128,51 @@ private: ...@@ -85,16 +128,51 @@ private:
bool useSwitchingFunction, useOptimizedPme, hasInitializedPme; bool useSwitchingFunction, useOptimizedPme, hasInitializedPme;
std::vector<std::set<int> > exclusions; std::vector<std::set<int> > exclusions;
std::vector<std::pair<float, float> > particleParams; std::vector<std::pair<float, float> > particleParams;
std::vector<float> posq;
std::vector<float> forces;
std::vector<RealVec> lastPositions; std::vector<RealVec> lastPositions;
NonbondedMethod nonbondedMethod; NonbondedMethod nonbondedMethod;
CpuNeighborList neighborList; CpuNeighborList neighborList;
CpuNonbondedForce nonbonded; CpuNonbondedForce nonbonded;
ThreadPool threads;
Kernel optimizedPme; Kernel optimizedPme;
}; };
/**
* This kernel is invoked by GBSAOBCForce to calculate the forces acting on the system.
*/
class CpuCalcGBSAOBCForceKernel : public CalcGBSAOBCForceKernel {
public:
CpuCalcGBSAOBCForceKernel(std::string name, const Platform& platform, CpuPlatform::PlatformData& data) : CalcGBSAOBCForceKernel(name, platform),
data(data) {
}
~CpuCalcGBSAOBCForceKernel();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the GBSAOBCForce this kernel will be used for
*/
void initialize(const System& system, const GBSAOBCForce& force);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double execute(ContextImpl& context, bool includeForces, bool includeEnergy);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the GBSAOBCForce to copy the parameters from
*/
void copyParametersToContext(ContextImpl& context, const GBSAOBCForce& force);
private:
CpuPlatform::PlatformData& data;
std::vector<std::pair<float, float> > particleParams;
CpuGBSAOBCForce obc;
};
} // namespace OpenMM } // namespace OpenMM
#endif /*OPENMM_CPUKERNELS_H_*/ #endif /*OPENMM_CPUKERNELS_H_*/
......
...@@ -124,7 +124,7 @@ class CpuNonbondedForce { ...@@ -124,7 +124,7 @@ class CpuNonbondedForce {
void calculateReciprocalIxn(int numberOfAtoms, float* posq, const std::vector<RealVec>& atomCoordinates, void calculateReciprocalIxn(int numberOfAtoms, float* posq, const std::vector<RealVec>& atomCoordinates,
const std::vector<std::pair<float, float> >& atomParameters, const std::vector<std::set<int> >& exclusions, const std::vector<std::pair<float, float> >& atomParameters, const std::vector<std::set<int> >& exclusions,
std::vector<RealVec>& forces, float* totalEnergy) const; std::vector<RealVec>& forces, double* totalEnergy) const;
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
...@@ -143,7 +143,7 @@ class CpuNonbondedForce { ...@@ -143,7 +143,7 @@ class CpuNonbondedForce {
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
void calculateDirectIxn(int numberOfAtoms, float* posq, const std::vector<RealVec>& atomCoordinates, const std::vector<std::pair<float, float> >& atomParameters, void calculateDirectIxn(int numberOfAtoms, float* posq, const std::vector<RealVec>& atomCoordinates, const std::vector<std::pair<float, float> >& atomParameters,
const std::vector<std::set<int> >& exclusions, float* forces, float* totalEnergy, ThreadPool& threads); const std::vector<std::set<int> >& exclusions, std::vector<std::vector<float> >& threadForce, double* totalEnergy, ThreadPool& threads);
/** /**
* This routine contains the code executed by each thread. * This routine contains the code executed by each thread.
...@@ -165,7 +165,6 @@ private: ...@@ -165,7 +165,6 @@ private:
int meshDim[3]; int meshDim[3];
std::vector<float> ewaldScaleTable; std::vector<float> ewaldScaleTable;
float ewaldDX, ewaldDXInv; float ewaldDX, ewaldDXInv;
std::vector<std::vector<float> > threadForce;
std::vector<double> threadEnergy; std::vector<double> threadEnergy;
// The following variables are used to make information accessible to the individual threads. // The following variables are used to make information accessible to the individual threads.
int numberOfAtoms; int numberOfAtoms;
...@@ -173,6 +172,7 @@ private: ...@@ -173,6 +172,7 @@ private:
RealVec const* atomCoordinates; RealVec const* atomCoordinates;
std::pair<float, float> const* atomParameters; std::pair<float, float> const* atomParameters;
std::set<int> const* exclusions; std::set<int> const* exclusions;
std::vector<std::vector<float> >* threadForce;
bool includeEnergy; bool includeEnergy;
static const float TWO_OVER_SQRT_PI; static const float TWO_OVER_SQRT_PI;
......
...@@ -33,7 +33,10 @@ ...@@ -33,7 +33,10 @@
* -------------------------------------------------------------------------- */ * -------------------------------------------------------------------------- */
#include "ReferencePlatform.h" #include "ReferencePlatform.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/internal/ThreadPool.h"
#include "windowsExportCpu.h" #include "windowsExportCpu.h"
#include <map>
namespace OpenMM { namespace OpenMM {
...@@ -43,6 +46,7 @@ namespace OpenMM { ...@@ -43,6 +46,7 @@ namespace OpenMM {
class OPENMM_EXPORT_CPU CpuPlatform : public ReferencePlatform { class OPENMM_EXPORT_CPU CpuPlatform : public ReferencePlatform {
public: public:
class PlatformData;
CpuPlatform(); CpuPlatform();
const std::string& getName() const { const std::string& getName() const {
static const std::string name = "CPU"; static const std::string name = "CPU";
...@@ -51,6 +55,24 @@ public: ...@@ -51,6 +55,24 @@ public:
double getSpeed() const; double getSpeed() const;
bool supportsDoublePrecision() const; bool supportsDoublePrecision() const;
static bool isProcessorSupported(); static bool isProcessorSupported();
void contextCreated(ContextImpl& context, const std::map<std::string, std::string>& properties) const;
void contextDestroyed(ContextImpl& context) const;
/**
* We cannot use the standard mechanism for platform data, because that is already used by the superclass.
* Instead, we maintain a table of ContextImpls to PlatformDatas.
*/
static PlatformData& getPlatformData(ContextImpl& context);
private:
static std::map<ContextImpl*, PlatformData*> contextData;
};
class CpuPlatform::PlatformData {
public:
PlatformData(int numParticles);
std::vector<float> posq;
std::vector<std::vector<float> > threadForce;
ThreadPool threads;
bool isPeriodic;
}; };
} // namespace OpenMM } // namespace OpenMM
......
SET_SOURCE_FILES_PROPERTIES(${SOURCE_FILES} PROPERTIES COMPILE_FLAGS "-msse4.1") GET_PROPERTY(COMPILE_FLAGS GLOBAL PROPERTY COMPILE_FLAGS)
SET_SOURCE_FILES_PROPERTIES(${SOURCE_FILES} PROPERTIES COMPILE_FLAGS "${COMPILE_FLAGS} -msse4.1")
ADD_LIBRARY(${SHARED_TARGET} SHARED ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_ABS_INCLUDE_FILES}) ADD_LIBRARY(${SHARED_TARGET} SHARED ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_ABS_INCLUDE_FILES})
IF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug) IF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
...@@ -7,6 +8,6 @@ ELSE (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug) ...@@ -7,6 +8,6 @@ ELSE (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
SET(MAIN_OPENMM_LIB ${OPENMM_LIBRARY_NAME}) SET(MAIN_OPENMM_LIB ${OPENMM_LIBRARY_NAME})
ENDIF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug) ENDIF (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${MAIN_OPENMM_LIB} ${PTHREADS_LIB}) TARGET_LINK_LIBRARIES(${SHARED_TARGET} ${MAIN_OPENMM_LIB} ${PTHREADS_LIB})
SET_TARGET_PROPERTIES(${SHARED_TARGET} PROPERTIES COMPILE_FLAGS "-DOPENMM_CPU_BUILDING_SHARED_LIBRARY") SET_TARGET_PROPERTIES(${SHARED_TARGET} PROPERTIES COMPILE_FLAGS "-DOPENMM_CPU_BUILDING_SHARED_LIBRARY" LINK_FLAGS "${COMPILE_FLAGS}")
INSTALL_TARGETS(/lib/plugins RUNTIME_DIRECTORY /lib/plugins ${SHARED_TARGET}) INSTALL_TARGETS(/lib/plugins RUNTIME_DIRECTORY /lib/plugins ${SHARED_TARGET})
/* Portions copyright (c) 2006-2013 Stanford University and Simbios.
* Contributors: Pande Group
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject
* to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "CpuGBSAOBCForce.h"
#include "SimTKOpenMMRealType.h"
#include "openmm/internal/SplineFitter.h"
#include "openmm/internal/vectorize.h"
#include <cmath>
using namespace std;
using namespace OpenMM;
const int CpuGBSAOBCForce::NUM_TABLE_POINTS = 1025;
class CpuGBSAOBCForce::ComputeTask : public ThreadPool::Task {
public:
ComputeTask(CpuGBSAOBCForce& owner) : owner(owner) {
}
void execute(ThreadPool& threads, int threadIndex) {
owner.threadComputeForce(threads, threadIndex);
}
CpuGBSAOBCForce& owner;
};
CpuGBSAOBCForce::CpuGBSAOBCForce() : cutoff(false), periodic(false) {
logDX = 0.5/NUM_TABLE_POINTS;
logDXInv = 1.0f/logDX;
vector<double> x(NUM_TABLE_POINTS+1);
vector<double> y(NUM_TABLE_POINTS+1);
vector<double> deriv;
for (int i = 0; i < NUM_TABLE_POINTS+1; i++) {
x[i] = 0.5+i*0.5/NUM_TABLE_POINTS;
y[i] = log(x[i]);
}
SplineFitter::createNaturalSpline(x, y, deriv);
logTable.resize(4*NUM_TABLE_POINTS);
for (int i = 0; i < NUM_TABLE_POINTS; i++) {
logTable[4*i] = (float) y[i];
logTable[4*i+1] = (float) y[i+1];
logTable[4*i+2] = (float) (deriv[i]*logDX*logDX/6);
logTable[4*i+3] = (float) (deriv[i+1]*logDX*logDX/6);
}
}
void CpuGBSAOBCForce::setUseCutoff(float distance) {
cutoff = true;
cutoffDistance = distance;
}
void CpuGBSAOBCForce::setPeriodic(float* periodicBoxSize) {
periodic = true;
this->periodicBoxSize[0] = periodicBoxSize[0];
this->periodicBoxSize[1] = periodicBoxSize[1];
this->periodicBoxSize[2] = periodicBoxSize[2];
}
void CpuGBSAOBCForce::setSoluteDielectric(float dielectric) {
soluteDielectric = dielectric;
}
void CpuGBSAOBCForce::setSolventDielectric(float dielectric) {
solventDielectric = dielectric;
}
const std::vector<std::pair<float, float> >& CpuGBSAOBCForce::getParticleParameters() const {
return particleParams;
}
void CpuGBSAOBCForce::setParticleParameters(const std::vector<std::pair<float, float> >& params) {
particleParams = params;
bornRadii.resize(params.size()+3);
obcChain.resize(params.size()+3);
}
void CpuGBSAOBCForce::computeForce(const std::vector<float>& posq, vector<vector<float> >& threadForce, double* totalEnergy, ThreadPool& threads) {
// Record the parameters for the threads.
this->posq = &posq[0];
this->threadForce = &threadForce;
includeEnergy = (totalEnergy != NULL);
int numThreads = threads.getNumThreads();
threadEnergy.resize(numThreads);
threadBornForces.resize(numThreads);
for (int i = 0; i < numThreads; i++)
threadBornForces[i].resize(particleParams.size()+3);
// Signal the threads to start running and wait for them to finish.
ComputeTask task(*this);
threads.execute(task);
threads.waitForThreads(); // Compute Born radii
threads.resumeThreads();
threads.waitForThreads(); // Compute surface area term
threads.resumeThreads();
threads.waitForThreads(); // First loop
threads.resumeThreads();
threads.waitForThreads(); // Second loop
// Combine the energies from all the threads.
if (totalEnergy != NULL) {
double energy = 0;
for (int i = 0; i < numThreads; i++)
energy += threadEnergy[i];
*totalEnergy += (float) energy;
}
}
void CpuGBSAOBCForce::threadComputeForce(ThreadPool& threads, int threadIndex) {
int numParticles = particleParams.size();
int numThreads = threads.getNumThreads();
const float dielectricOffset = 0.009;
const float alphaObc = 1.0f;
const float betaObc = 0.8f;
const float gammaObc = 4.85f;
fvec4 boxSize(periodicBoxSize[0], periodicBoxSize[1], periodicBoxSize[2], 0);
fvec4 invBoxSize((1/periodicBoxSize[0]), (1/periodicBoxSize[1]), (1/periodicBoxSize[2]), 0);
int start = (threadIndex*numParticles)/numThreads;
int end = ((threadIndex+1)*numParticles)/numThreads;
// Calculate Born radii
for (int blockStart = start; blockStart < end; blockStart += 4) {
int numInBlock = min(4, end-blockStart);
ivec4 blockAtomIndex(blockStart, blockStart+1, blockStart+2, blockStart+3);
float atomRadius[4], atomx[4], atomy[4], atomz[4];
int blockMask[4] = {0, 0, 0, 0};
for (int i = 0; i < numInBlock; i++) {
int atomIndex = blockStart+i;
atomRadius[i] = particleParams[atomIndex].first;
atomx[i] = posq[4*atomIndex];
atomy[i] = posq[4*atomIndex+1];
atomz[i] = posq[4*atomIndex+2];
blockMask[i] = 0xFFFFFFFF;
}
fvec4 offsetRadiusI(atomRadius);
fvec4 radiusIInverse = 1.0f/offsetRadiusI;
fvec4 x(atomx);
fvec4 y(atomy);
fvec4 z(atomz);
ivec4 mask(blockMask);
float sum[4] = {0.0f, 0.0f, 0.0f, 0.0f};
for (int atomJ = 0; atomJ < numParticles; atomJ++) {
fvec4 posJ(posq+4*atomJ);
fvec4 dx, dy, dz, r2;
getDeltaR(posJ, x, y, z, dx, dy, dz, r2, periodic, boxSize, invBoxSize);
ivec4 include = mask & (blockAtomIndex != ivec4(atomJ));
if (cutoff)
include = include & (r2 < cutoffDistance*cutoffDistance);
if (!any(include))
continue;
fvec4 r = sqrt(r2);
float scaledRadiusJ = particleParams[atomJ].second;
float scaledRadiusJ2 = scaledRadiusJ*scaledRadiusJ;
fvec4 rScaledRadiusJ = r + scaledRadiusJ;
include = include & (offsetRadiusI < rScaledRadiusJ);
fvec4 l_ij = 1.0f/max(offsetRadiusI, abs(r-scaledRadiusJ));
fvec4 u_ij = 1.0f/rScaledRadiusJ;
fvec4 l_ij2 = l_ij*l_ij;
fvec4 u_ij2 = u_ij*u_ij;
fvec4 rInverse = 1.0f/r;
fvec4 r2Inverse = rInverse*rInverse;
fvec4 logRatio = fastLog(u_ij/l_ij);
fvec4 term = l_ij - u_ij + 0.25f*r*(u_ij2 - l_ij2) + (0.5f*rInverse*logRatio) + (0.25f*scaledRadiusJ*scaledRadiusJ*rInverse)*(l_ij2 - u_ij2);
for (int j = 0; j < 4; j++) {
if (include[j]) {
sum[j] += term[j];
if (offsetRadiusI[j] < scaledRadiusJ-r[j])
sum[j] += 2.0f*(radiusIInverse[j]-l_ij[j]);
}
}
}
for (int i = 0; i < numInBlock; i++) {
int atomIndex = blockStart+i;
sum[i] *= 0.5f*atomRadius[i];
float sum2 = sum[i]*sum[i];
float sum3 = sum[i]*sum2;
float tanhSum = tanh(alphaObc*sum[i] - betaObc*sum2 + gammaObc*sum3);
float radiusI = atomRadius[i] + dielectricOffset;
bornRadii[atomIndex] = 1.0f/(1.0f/atomRadius[i] - tanhSum/radiusI);
obcChain[atomIndex] = atomRadius[i]*(alphaObc - 2.0f*betaObc*sum[i] + 3.0f*gammaObc*sum2);
obcChain[atomIndex] = (1.0f - tanhSum*tanhSum)*obcChain[atomIndex]/radiusI;
}
}
threads.syncThreads();
// Calculate ACE surface area term.
const float probeRadius = 0.14f;
const float surfaceAreaFactor = 28.3919551;
double energy = 0.0;
vector<float>& bornForces = threadBornForces[threadIndex];
for (int i = 0; i < numParticles; i++)
bornForces[i] = 0.0f;
for (int atomI = start; atomI < end; atomI++) {
if (bornRadii[atomI] > 0) {
float radiusI = particleParams[atomI].first + dielectricOffset;
float r = radiusI + probeRadius;
float ratio6 = powf(radiusI/bornRadii[atomI], 6.0f);
float saTerm = surfaceAreaFactor*r*r*ratio6;
energy += saTerm;
bornForces[atomI] = -6.0f*saTerm/bornRadii[atomI];
}
else
bornForces[atomI] = 0.0f;
}
threads.syncThreads();
// First loop of Born energy computation.
float* forces = &(*threadForce)[threadIndex][0];
float preFactor;
if (soluteDielectric != 0.0f && solventDielectric != 0.0f)
preFactor = ONE_4PI_EPS0*((1.0f/solventDielectric) - (1.0f/soluteDielectric));
else
preFactor = 0.0f;
for (int blockStart = start; blockStart < end; blockStart += 4) {
int numInBlock = min(4, end-blockStart);
ivec4 blockAtomIndex(blockStart, blockStart+1, blockStart+2, blockStart+3);
float atomCharge[4], atomx[4], atomy[4], atomz[4];
int blockMask[4] = {0, 0, 0, 0};
fvec4 blockAtomForce[4];
for (int i = 0; i < numInBlock; i++) {
int atomIndex = blockStart+i;
atomx[i] = posq[4*atomIndex];
atomy[i] = posq[4*atomIndex+1];
atomz[i] = posq[4*atomIndex+2];
atomCharge[i] = preFactor*posq[4*atomIndex+3];
blockMask[i] = 0xFFFFFFFF;
blockAtomForce[i] = 0.0f;
}
fvec4 radii(&bornRadii[blockStart]);
fvec4 x(atomx);
fvec4 y(atomy);
fvec4 z(atomz);
fvec4 partialChargeI(atomCharge);
ivec4 mask(blockMask);
for (int atomJ = blockStart; atomJ < numParticles; atomJ++) {
fvec4 posJ(posq+4*atomJ);
fvec4 dx, dy, dz, r2;
getDeltaR(posJ, x, y, z, dx, dy, dz, r2, periodic, boxSize, invBoxSize);
ivec4 include = mask & (blockAtomIndex <= ivec4(atomJ));
if (cutoff)
include = include & (r2 < cutoffDistance*cutoffDistance);
if (!any(include))
continue;
fvec4 r = sqrt(r2);
fvec4 alpha2_ij = radii*bornRadii[atomJ];
fvec4 D_ij = r2/(4.0f*alpha2_ij);
fvec4 expTerm(exp(-D_ij[0]), exp(-D_ij[1]), exp(-D_ij[2]), exp(-D_ij[3]));
fvec4 denominator2 = r2 + alpha2_ij*expTerm;
fvec4 denominator = sqrt(denominator2);
fvec4 Gpol = (partialChargeI*posJ[3])/denominator;
fvec4 dGpol_dr = -Gpol*(1.0f - 0.25f*expTerm)/denominator2;
fvec4 dGpol_dalpha2_ij = -0.5f*Gpol*expTerm*(1.0f + D_ij)/denominator2;
fvec4 result[4] = {dx*dGpol_dr, dy*dGpol_dr, dz*dGpol_dr, 0.0f};
transpose(result[0], result[1], result[2], result[3]);
fvec4 atomForce(forces+4*atomJ);
for (int j = 0; j < 4; j++) {
if (include[j]) {
float termEnergy = Gpol[j];
if (blockStart+j != atomJ) {
if (cutoff)
termEnergy -= partialChargeI[j]*posJ[3]/cutoffDistance;
bornForces[atomJ] += dGpol_dalpha2_ij[j]*radii[j];
blockAtomForce[j] -= result[j];
(fvec4(forces+4*atomJ)+result[j]).store(forces+4*atomJ);
}
else
termEnergy *= 0.5f;
energy += termEnergy;
bornForces[blockStart+j] += dGpol_dalpha2_ij[j]*bornRadii[atomJ];
}
}
}
for (int i = 0; i < numInBlock; i++) {
int atomIndex = blockStart+i;
(fvec4(forces+4*atomIndex)+blockAtomForce[i]).store(forces+4*atomIndex);
}
}
threads.syncThreads();
// Second loop of Born energy computation.
for (int blockStart = start; blockStart < end; blockStart += 4) {
fvec4 bornForce(0.0f);
for (int i = 0; i < numThreads; i++)
bornForce += fvec4(&threadBornForces[i][blockStart]);
fvec4 radii(&bornRadii[blockStart]);
bornForce *= radii*radii*fvec4(&obcChain[blockStart]);
int numInBlock = min(4, end-blockStart);
ivec4 blockAtomIndex(blockStart, blockStart+1, blockStart+2, blockStart+3);
float atomRadius[4], atomx[4], atomy[4], atomz[4];
int blockMask[4] = {0, 0, 0, 0};
fvec4 blockAtomForce[4];
for (int i = 0; i < numInBlock; i++) {
int atomIndex = blockStart+i;
atomRadius[i] = particleParams[atomIndex].first;
atomx[i] = posq[4*atomIndex];
atomy[i] = posq[4*atomIndex+1];
atomz[i] = posq[4*atomIndex+2];
blockMask[i] = 0xFFFFFFFF;
blockAtomForce[i] = 0.0f;
}
fvec4 offsetRadiusI(atomRadius);
fvec4 x(atomx);
fvec4 y(atomy);
fvec4 z(atomz);
ivec4 mask(blockMask);
for (int atomJ = 0; atomJ < numParticles; atomJ++) {
fvec4 posJ(posq+4*atomJ);
fvec4 dx, dy, dz, r2;
getDeltaR(posJ, x, y, z, dx, dy, dz, r2, periodic, boxSize, invBoxSize);
ivec4 include = mask & (blockAtomIndex != ivec4(atomJ));
if (cutoff)
include = include & (r2 < cutoffDistance*cutoffDistance);
if (!any(include))
continue;
fvec4 r = sqrt(r2);
float scaledRadiusJ = particleParams[atomJ].second;
float scaledRadiusJ2 = scaledRadiusJ*scaledRadiusJ;
fvec4 rScaledRadiusJ = r + scaledRadiusJ;
include = include & (offsetRadiusI < rScaledRadiusJ);
fvec4 l_ij = 1.0f/max(offsetRadiusI, abs(r-scaledRadiusJ));
fvec4 u_ij = 1.0f/rScaledRadiusJ;
fvec4 l_ij2 = l_ij*l_ij;
fvec4 u_ij2 = u_ij*u_ij;
fvec4 rInverse = 1.0f/r;
fvec4 r2Inverse = rInverse*rInverse;
fvec4 logRatio = fastLog(u_ij/l_ij);
fvec4 t3 = 0.125f*(1.0f + scaledRadiusJ2*r2Inverse)*(l_ij2 - u_ij2) + 0.25f*logRatio*r2Inverse;
fvec4 de = bornForce*t3*rInverse;
fvec4 result[4] = {dx*de, dy*de, dz*de, 0.0f};
transpose(result[0], result[1], result[2], result[3]);
fvec4 atomForce(forces+4*atomJ);
for (int j = 0; j < 4; j++) {
if (include[j]) {
blockAtomForce[j] += result[j];
atomForce -= result[j];
}
}
atomForce.store(forces+4*atomJ);
}
for (int i = 0; i < numInBlock; i++) {
int atomIndex = blockStart+i;
(fvec4(forces+4*atomIndex)+blockAtomForce[i]).store(forces+4*atomIndex);
}
}
threadEnergy[threadIndex] = energy;
}
void CpuGBSAOBCForce::getDeltaR(const fvec4& posI, const fvec4& x, const fvec4& y, const fvec4& z, fvec4& dx, fvec4& dy, fvec4& dz, fvec4& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const {
dx = x-posI[0];
dy = y-posI[1];
dz = z-posI[2];
if (periodic) {
dx -= round(dx*invBoxSize[0])*boxSize[0];
dy -= round(dy*invBoxSize[1])*boxSize[1];
dz -= round(dz*invBoxSize[2])*boxSize[2];
}
r2 = dx*dx + dy*dy + dz*dz;
}
fvec4 CpuGBSAOBCForce::fastLog(fvec4 x) {
// Evaluate log(x) using a lookup table for speed.
float y[4];
fvec4 x1 = (x-0.5f)*logDXInv;
ivec4 index = floor(x1);
fvec4 coeff[4];
coeff[1] = x1-index;
coeff[0] = 1.0f-coeff[1];
coeff[2] = coeff[0]*coeff[0]*coeff[0]-coeff[0];
coeff[3] = coeff[1]*coeff[1]*coeff[1]-coeff[1];
transpose(coeff[0], coeff[1], coeff[2], coeff[3]);
static float maxdiff = 0.0f;
for (int i = 0; i < 4; i++) {
if (index[i] >= 0 && index[i] < NUM_TABLE_POINTS)
y[i] = dot4(coeff[i], fvec4(&logTable[4*index[i]]));
else
y[i] = logf(x[i]);
}
return fvec4(y);
}
...@@ -38,8 +38,12 @@ ...@@ -38,8 +38,12 @@
using namespace OpenMM; using namespace OpenMM;
KernelImpl* CpuKernelFactory::createKernelImpl(std::string name, const Platform& platform, ContextImpl& context) const { KernelImpl* CpuKernelFactory::createKernelImpl(std::string name, const Platform& platform, ContextImpl& context) const {
ReferencePlatform::PlatformData& data = *static_cast<ReferencePlatform::PlatformData*>(context.getPlatformData()); CpuPlatform::PlatformData& data = CpuPlatform::getPlatformData(context);
if (name == CalcForcesAndEnergyKernel::Name())
return new CpuCalcForcesAndEnergyKernel(name, platform, data, context);
if (name == CalcNonbondedForceKernel::Name()) if (name == CalcNonbondedForceKernel::Name())
return new CpuCalcNonbondedForceKernel(name, platform); return new CpuCalcNonbondedForceKernel(name, platform, data);
if (name == CalcGBSAOBCForceKernel::Name())
return new CpuCalcGBSAOBCForceKernel(name, platform, data);
throw OpenMMException((std::string("Tried to create kernel with illegal kernel name '") + name + "'").c_str()); throw OpenMMException((std::string("Tried to create kernel with illegal kernel name '") + name + "'").c_str());
} }
...@@ -31,11 +31,14 @@ ...@@ -31,11 +31,14 @@
#include "CpuKernels.h" #include "CpuKernels.h"
#include "ReferenceBondForce.h" #include "ReferenceBondForce.h"
#include "ReferenceKernelFactory.h"
#include "ReferenceKernels.h"
#include "ReferenceLJCoulomb14.h" #include "ReferenceLJCoulomb14.h"
#include "openmm/Context.h" #include "openmm/Context.h"
#include "openmm/OpenMMException.h" #include "openmm/OpenMMException.h"
#include "openmm/internal/ContextImpl.h" #include "openmm/internal/ContextImpl.h"
#include "openmm/internal/NonbondedForceImpl.h" #include "openmm/internal/NonbondedForceImpl.h"
#include "openmm/internal/vectorize.h"
#include "RealVec.h" #include "RealVec.h"
using namespace OpenMM; using namespace OpenMM;
...@@ -61,6 +64,67 @@ static RealVec& extractBoxSize(ContextImpl& context) { ...@@ -61,6 +64,67 @@ static RealVec& extractBoxSize(ContextImpl& context) {
return *(RealVec*) data->periodicBoxSize; return *(RealVec*) data->periodicBoxSize;
} }
CpuCalcForcesAndEnergyKernel::CpuCalcForcesAndEnergyKernel(std::string name, const Platform& platform, CpuPlatform::PlatformData& data, ContextImpl& context) :
CalcForcesAndEnergyKernel(name, platform), data(data) {
// Create a Reference platform version of this kernel.
ReferenceKernelFactory referenceFactory;
referenceKernel = Kernel(referenceFactory.createKernelImpl(name, platform, context));
}
void CpuCalcForcesAndEnergyKernel::initialize(const System& system) {
referenceKernel.getAs<ReferenceCalcForcesAndEnergyKernel>().initialize(system);
}
void CpuCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups) {
referenceKernel.getAs<ReferenceCalcForcesAndEnergyKernel>().beginComputation(context, includeForce, includeEnergy, groups);
// Convert the positions to single precision and apply periodic boundary conditions
vector<float>& posq = data.posq;
vector<RealVec>& posData = extractPositions(context);
RealVec boxSize = extractBoxSize(context);
float floatBoxSize[3] = {(float) boxSize[0], (float) boxSize[1], (float) boxSize[2]};
int numParticles = context.getSystem().getNumParticles();
if (data.isPeriodic)
for (int i = 0; i < numParticles; i++)
for (int j = 0; j < 3; j++) {
RealOpenMM x = posData[i][j];
double base = floor(x/boxSize[j])*boxSize[j];
posq[4*i+j] = (float) (x-base);
}
else
for (int i = 0; i < numParticles; i++) {
posq[4*i] = (float) posData[i][0];
posq[4*i+1] = (float) posData[i][1];
posq[4*i+2] = (float) posData[i][2];
}
// Clear the forces.
fvec4 zero(0.0f);
for (int i = 0; i < (int) data.threadForce.size(); i++)
for (int j = 0; j < numParticles; j++)
zero.store(&data.threadForce[i][j*4]);
}
double CpuCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups) {
// Sum the forces from all the threads.
int numParticles = context.getSystem().getNumParticles();
int numThreads = data.threads.getNumThreads();
vector<RealVec>& forceData = extractForces(context);
for (int i = 0; i < numParticles; i++) {
fvec4 f(0.0f);
for (int j = 0; j < numThreads; j++)
f += fvec4(&data.threadForce[j][4*i]);
forceData[i][0] += f[0];
forceData[i][1] += f[1];
forceData[i][2] += f[2];
}
return referenceKernel.getAs<ReferenceCalcForcesAndEnergyKernel>().finishComputation(context, includeForce, includeEnergy, groups);
}
class CpuCalcNonbondedForceKernel::PmeIO : public CalcPmeReciprocalForceKernel::IO { class CpuCalcNonbondedForceKernel::PmeIO : public CalcPmeReciprocalForceKernel::IO {
public: public:
PmeIO(float* posq, float* force, int numParticles) : posq(posq), force(force), numParticles(numParticles) { PmeIO(float* posq, float* force, int numParticles) : posq(posq), force(force), numParticles(numParticles) {
...@@ -97,8 +161,6 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond ...@@ -97,8 +161,6 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
// Identify which exceptions are 1-4 interactions. // Identify which exceptions are 1-4 interactions.
numParticles = force.getNumParticles(); numParticles = force.getNumParticles();
posq.resize(4*numParticles, 0);
forces.resize(4*numParticles, 0);
exclusions.resize(numParticles); exclusions.resize(numParticles);
vector<int> nb14s; vector<int> nb14s;
for (int i = 0; i < force.getNumExceptions(); i++) { for (int i = 0; i < force.getNumExceptions(); i++) {
...@@ -125,7 +187,7 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond ...@@ -125,7 +187,7 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
for (int i = 0; i < numParticles; ++i) { for (int i = 0; i < numParticles; ++i) {
double charge, radius, depth; double charge, radius, depth;
force.getParticleParameters(i, charge, radius, depth); force.getParticleParameters(i, charge, radius, depth);
posq[4*i+3] = (float) charge; data.posq[4*i+3] = (float) charge;
particleParams[i] = make_pair((float) (0.5*radius), (float) (2.0*sqrt(depth))); particleParams[i] = make_pair((float) (0.5*radius), (float) (2.0*sqrt(depth)));
sumSquaredCharges += charge*charge; sumSquaredCharges += charge*charge;
} }
...@@ -173,6 +235,7 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond ...@@ -173,6 +235,7 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
else else
dispersionCoefficient = 0.0; dispersionCoefficient = 0.0;
lastPositions.resize(numParticles, Vec3(1e10, 1e10, 1e10)); lastPositions.resize(numParticles, Vec3(1e10, 1e10, 1e10));
data.isPeriodic = (nonbondedMethod == CutoffPeriodic || nonbondedMethod == Ewald || nonbondedMethod == PME);
} }
double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy, bool includeDirect, bool includeReciprocal) { double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy, bool includeDirect, bool includeReciprocal) {
...@@ -192,32 +255,14 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -192,32 +255,14 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
} }
} }
} }
vector<float>& posq = data.posq;
vector<RealVec>& posData = extractPositions(context); vector<RealVec>& posData = extractPositions(context);
vector<RealVec>& forceData = extractForces(context); vector<RealVec>& forceData = extractForces(context);
RealVec boxSize = extractBoxSize(context); RealVec boxSize = extractBoxSize(context);
float floatBoxSize[3] = {(float) boxSize[0], (float) boxSize[1], (float) boxSize[2]}; float floatBoxSize[3] = {(float) boxSize[0], (float) boxSize[1], (float) boxSize[2]};
double energy = ewaldSelfEnergy; double energy = ewaldSelfEnergy;
bool periodic = (nonbondedMethod == CutoffPeriodic);
bool ewald = (nonbondedMethod == Ewald); bool ewald = (nonbondedMethod == Ewald);
bool pme = (nonbondedMethod == PME); bool pme = (nonbondedMethod == PME);
// Convert the positions to single precision.
if (periodic || ewald || pme)
for (int i = 0; i < numParticles; i++)
for (int j = 0; j < 3; j++) {
RealOpenMM x = posData[i][j];
double base = floor(x/boxSize[j])*boxSize[j];
posq[4*i+j] = (float) (x-base);
}
else
for (int i = 0; i < numParticles; i++) {
posq[4*i] = (float) posData[i][0];
posq[4*i+1] = (float) posData[i][1];
posq[4*i+2] = (float) posData[i][2];
}
for (int i = 0; i < 4*numParticles; i++)
forces[i] = 0.0f;
if (nonbondedMethod != NoCutoff) { if (nonbondedMethod != NoCutoff) {
// Determine whether we need to recompute the neighbor list. // Determine whether we need to recompute the neighbor list.
...@@ -260,12 +305,12 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -260,12 +305,12 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
} }
} }
if (needRecompute) { if (needRecompute) {
neighborList.computeNeighborList(numParticles, posq, exclusions, floatBoxSize, periodic || ewald || pme, nonbondedCutoff+padding, threads); neighborList.computeNeighborList(numParticles, posq, exclusions, floatBoxSize, data.isPeriodic, nonbondedCutoff+padding, data.threads);
lastPositions = posData; lastPositions = posData;
} }
nonbonded.setUseCutoff(nonbondedCutoff, neighborList, rfDielectric); nonbonded.setUseCutoff(nonbondedCutoff, neighborList, rfDielectric);
} }
if (periodic || ewald || pme) { if (data.isPeriodic) {
double minAllowedSize = 1.999999*nonbondedCutoff; double minAllowedSize = 1.999999*nonbondedCutoff;
if (boxSize[0] < minAllowedSize || boxSize[1] < minAllowedSize || boxSize[2] < minAllowedSize) if (boxSize[0] < minAllowedSize || boxSize[1] < minAllowedSize || boxSize[2] < minAllowedSize)
throw OpenMMException("The periodic box size has decreased to less than twice the nonbonded cutoff."); throw OpenMMException("The periodic box size has decreased to less than twice the nonbonded cutoff.");
...@@ -277,12 +322,12 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -277,12 +322,12 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
nonbonded.setUsePME(ewaldAlpha, gridSize); nonbonded.setUsePME(ewaldAlpha, gridSize);
if (useSwitchingFunction) if (useSwitchingFunction)
nonbonded.setUseSwitchingFunction(switchingDistance); nonbonded.setUseSwitchingFunction(switchingDistance);
float nonbondedEnergy = 0; double nonbondedEnergy = 0;
if (includeDirect) if (includeDirect)
nonbonded.calculateDirectIxn(numParticles, &posq[0], posData, particleParams, exclusions, &forces[0], includeEnergy ? &nonbondedEnergy : NULL, threads); nonbonded.calculateDirectIxn(numParticles, &posq[0], posData, particleParams, exclusions, data.threadForce, includeEnergy ? &nonbondedEnergy : NULL, data.threads);
if (includeReciprocal) { if (includeReciprocal) {
if (useOptimizedPme) { if (useOptimizedPme) {
PmeIO io(&posq[0], &forces[0], numParticles); PmeIO io(&posq[0], &data.threadForce[0][0], numParticles);
Vec3 periodicBoxSize(boxSize[0], boxSize[1], boxSize[2]); Vec3 periodicBoxSize(boxSize[0], boxSize[1], boxSize[2]);
optimizedPme.getAs<CalcPmeReciprocalForceKernel>().beginComputation(io, periodicBoxSize, includeEnergy); optimizedPme.getAs<CalcPmeReciprocalForceKernel>().beginComputation(io, periodicBoxSize, includeEnergy);
optimizedPme.getAs<CalcPmeReciprocalForceKernel>().finishComputation(io); optimizedPme.getAs<CalcPmeReciprocalForceKernel>().finishComputation(io);
...@@ -291,16 +336,11 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo ...@@ -291,16 +336,11 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
nonbonded.calculateReciprocalIxn(numParticles, &posq[0], posData, particleParams, exclusions, forceData, includeEnergy ? &nonbondedEnergy : NULL); nonbonded.calculateReciprocalIxn(numParticles, &posq[0], posData, particleParams, exclusions, forceData, includeEnergy ? &nonbondedEnergy : NULL);
} }
energy += nonbondedEnergy; energy += nonbondedEnergy;
for (int i = 0; i < numParticles; i++) {
forceData[i][0] += forces[4*i];
forceData[i][1] += forces[4*i+1];
forceData[i][2] += forces[4*i+2];
}
if (includeDirect) { if (includeDirect) {
ReferenceBondForce refBondForce; ReferenceBondForce refBondForce;
ReferenceLJCoulomb14 nonbonded14; ReferenceLJCoulomb14 nonbonded14;
refBondForce.calculateForce(num14, bonded14IndexArray, posData, bonded14ParamArray, forceData, includeEnergy ? &energy : NULL, nonbonded14); refBondForce.calculateForce(num14, bonded14IndexArray, posData, bonded14ParamArray, forceData, includeEnergy ? &energy : NULL, nonbonded14);
if (periodic || ewald || pme) if (data.isPeriodic)
energy += dispersionCoefficient/(boxSize[0]*boxSize[1]*boxSize[2]); energy += dispersionCoefficient/(boxSize[0]*boxSize[1]*boxSize[2]);
} }
return energy; return energy;
...@@ -326,7 +366,7 @@ void CpuCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context, ...@@ -326,7 +366,7 @@ void CpuCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
for (int i = 0; i < numParticles; ++i) { for (int i = 0; i < numParticles; ++i) {
double charge, radius, depth; double charge, radius, depth;
force.getParticleParameters(i, charge, radius, depth); force.getParticleParameters(i, charge, radius, depth);
posq[4*i+3] = (float) charge; data.posq[4*i+3] = (float) charge;
particleParams[i] = make_pair((float) (0.5*radius), (float) (2.0*sqrt(depth))); particleParams[i] = make_pair((float) (0.5*radius), (float) (2.0*sqrt(depth)));
sumSquaredCharges += charge*charge; sumSquaredCharges += charge*charge;
} }
...@@ -351,3 +391,52 @@ void CpuCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context, ...@@ -351,3 +391,52 @@ void CpuCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
if (force.getUseDispersionCorrection() && (method == NonbondedForce::CutoffPeriodic || method == NonbondedForce::Ewald || method == NonbondedForce::PME)) if (force.getUseDispersionCorrection() && (method == NonbondedForce::CutoffPeriodic || method == NonbondedForce::Ewald || method == NonbondedForce::PME))
dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(context.getSystem(), force); dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(context.getSystem(), force);
} }
CpuCalcGBSAOBCForceKernel::~CpuCalcGBSAOBCForceKernel() {
}
void CpuCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCForce& force) {
int numParticles = system.getNumParticles();
particleParams.resize(numParticles);
for (int i = 0; i < numParticles; ++i) {
double charge, radius, scalingFactor;
force.getParticleParameters(i, charge, radius, scalingFactor);
data.posq[4*i+3] = (float) charge;
radius -= 0.009;
particleParams[i] = make_pair((float) radius, (float) (scalingFactor*radius));
}
obc.setParticleParameters(particleParams);
obc.setSolventDielectric((float) force.getSolventDielectric());
obc.setSoluteDielectric((float) force.getSoluteDielectric());
if (force.getNonbondedMethod() != GBSAOBCForce::NoCutoff)
obc.setUseCutoff((float) force.getCutoffDistance());
data.isPeriodic = (force.getNonbondedMethod() == GBSAOBCForce::CutoffPeriodic);
}
double CpuCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (data.isPeriodic) {
RealVec& boxSize = extractBoxSize(context);
float floatBoxSize[3] = {(float) boxSize[0], (float) boxSize[1], (float) boxSize[2]};
obc.setPeriodic(floatBoxSize);
}
double energy = 0.0;
obc.computeForce(data.posq, data.threadForce, includeEnergy ? &energy : NULL, data.threads);
return energy;
}
void CpuCalcGBSAOBCForceKernel::copyParametersToContext(ContextImpl& context, const GBSAOBCForce& force) {
int numParticles = force.getNumParticles();
if (numParticles != obc.getParticleParameters().size())
throw OpenMMException("updateParametersInContext: The number of particles has changed");
// Record the values.
for (int i = 0; i < numParticles; ++i) {
double charge, radius, scalingFactor;
force.getParticleParameters(i, charge, radius, scalingFactor);
data.posq[4*i+3] = (float) charge;
radius -= 0.009;
particleParams[i] = make_pair((float) radius, (float) (scalingFactor*radius));
}
obc.setParticleParameters(particleParams);
}
...@@ -164,8 +164,12 @@ public: ...@@ -164,8 +164,12 @@ public:
float refineCutoff = maxDistance-max(max(blockWidth[0], blockWidth[1]), blockWidth[2]); float refineCutoff = maxDistance-max(max(blockWidth[0], blockWidth[1]), blockWidth[2]);
float refineCutoffSquared = refineCutoff*refineCutoff; float refineCutoffSquared = refineCutoff*refineCutoff;
int dIndexX = min(nx/2, int((maxDistance+blockWidth[0])/voxelSizeX)+1); // How may voxels away do we have to look? int dIndexX = int((maxDistance+blockWidth[0])/voxelSizeX)+1; // How may voxels away do we have to look?
int dIndexY = min(ny/2, int((maxDistance+blockWidth[1])/voxelSizeY)+1); int dIndexY = int((maxDistance+blockWidth[1])/voxelSizeY)+1;
if (usePeriodic) {
dIndexX = min(nx/2, dIndexX);
dIndexY = min(ny/2, dIndexY);
}
float centerPos[4]; float centerPos[4];
blockCenter.store(centerPos); blockCenter.store(centerPos);
VoxelIndex centerVoxelIndex = getVoxelIndex(centerPos); VoxelIndex centerVoxelIndex = getVoxelIndex(centerPos);
...@@ -202,7 +206,9 @@ public: ...@@ -202,7 +206,9 @@ public:
float dz = maxDistance+blockWidth[2]; float dz = maxDistance+blockWidth[2];
dz = sqrtf(max(0.0f, dz*dz-dx*dx-dy*dy)); dz = sqrtf(max(0.0f, dz*dz-dx*dx-dy*dy));
bool needPeriodic = (voxelIndex.x != x || voxelIndex.y != y || centerPos[2]-dz < 0.0f || centerPos[2]+dz > periodicBoxSize[2]); bool needPeriodic = (centerPos[0]-blockWidth[0] < maxDistance || centerPos[0]+blockWidth[0] > periodicBoxSize[0]-maxDistance ||
centerPos[1]-blockWidth[1] < maxDistance || centerPos[1]+blockWidth[1] > periodicBoxSize[1]-maxDistance ||
centerPos[2]-dz < 0.0f || centerPos[2]+dz > periodicBoxSize[2]);
int rangeStart[2]; int rangeStart[2];
int rangeEnd[2]; int rangeEnd[2];
rangeStart[0] = findLowerBound(voxelIndex.x, voxelIndex.y, centerPos[2]-dz); rangeStart[0] = findLowerBound(voxelIndex.x, voxelIndex.y, centerPos[2]-dz);
......
...@@ -22,7 +22,6 @@ ...@@ -22,7 +22,6 @@
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#include <string.h>
#include <complex> #include <complex>
#include "SimTKOpenMMCommon.h" #include "SimTKOpenMMCommon.h"
...@@ -179,7 +178,7 @@ void CpuNonbondedForce::tabulateEwaldScaleFactor() { ...@@ -179,7 +178,7 @@ void CpuNonbondedForce::tabulateEwaldScaleFactor() {
void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, const vector<RealVec>& atomCoordinates, void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, const vector<RealVec>& atomCoordinates,
const vector<pair<float, float> >& atomParameters, const vector<set<int> >& exclusions, const vector<pair<float, float> >& atomParameters, const vector<set<int> >& exclusions,
vector<RealVec>& forces, float* totalEnergy) const { vector<RealVec>& forces, double* totalEnergy) const {
typedef std::complex<float> d_complex; typedef std::complex<float> d_complex;
static const float epsilon = 1.0; static const float epsilon = 1.0;
...@@ -292,7 +291,7 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c ...@@ -292,7 +291,7 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c
void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const vector<RealVec>& atomCoordinates, const vector<pair<float, float> >& atomParameters, void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const vector<RealVec>& atomCoordinates, const vector<pair<float, float> >& atomParameters,
const vector<set<int> >& exclusions, float* forces, float* totalEnergy, ThreadPool& threads) { const vector<set<int> >& exclusions, vector<vector<float> >& threadForce, double* totalEnergy, ThreadPool& threads) {
// Record the parameters for the threads. // Record the parameters for the threads.
this->numberOfAtoms = numberOfAtoms; this->numberOfAtoms = numberOfAtoms;
...@@ -300,9 +299,9 @@ void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const ...@@ -300,9 +299,9 @@ void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const
this->atomCoordinates = &atomCoordinates[0]; this->atomCoordinates = &atomCoordinates[0];
this->atomParameters = &atomParameters[0]; this->atomParameters = &atomParameters[0];
this->exclusions = &exclusions[0]; this->exclusions = &exclusions[0];
this->threadForce = &threadForce;
includeEnergy = (totalEnergy != NULL); includeEnergy = (totalEnergy != NULL);
threadEnergy.resize(threads.getNumThreads()); threadEnergy.resize(threads.getNumThreads());
threadForce.resize(threads.getNumThreads());
// Signal the threads to start running and wait for them to finish. // Signal the threads to start running and wait for them to finish.
...@@ -310,21 +309,15 @@ void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const ...@@ -310,21 +309,15 @@ void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const
threads.execute(task); threads.execute(task);
threads.waitForThreads(); threads.waitForThreads();
// Combine the results from all the threads. // Combine the energies from all the threads.
if (totalEnergy != NULL) {
double directEnergy = 0; double directEnergy = 0;
int numThreads = threads.getNumThreads(); int numThreads = threads.getNumThreads();
for (int i = 0; i < numThreads; i++) for (int i = 0; i < numThreads; i++)
directEnergy += threadEnergy[i]; directEnergy += threadEnergy[i];
for (int i = 0; i < numberOfAtoms; i++) { *totalEnergy += directEnergy;
fvec4 f(forces+4*i);
for (int j = 0; j < numThreads; j++)
f += fvec4(&threadForce[j][4*i]);
f.store(forces+4*i);
} }
if (totalEnergy != NULL)
*totalEnergy += (float) directEnergy;
} }
void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex) { void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex) {
...@@ -333,10 +326,7 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex ...@@ -333,10 +326,7 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
int numThreads = threads.getNumThreads(); int numThreads = threads.getNumThreads();
threadEnergy[threadIndex] = 0; threadEnergy[threadIndex] = 0;
double* energyPtr = (includeEnergy ? &threadEnergy[threadIndex] : NULL); double* energyPtr = (includeEnergy ? &threadEnergy[threadIndex] : NULL);
threadForce[threadIndex].resize(4*numberOfAtoms, 0.0f); float* forces = &(*threadForce)[threadIndex][0];
float* forces = &threadForce[threadIndex][0];
for (int i = 0; i < 4*numberOfAtoms; i++)
forces[i] = 0.0f;
fvec4 boxSize(periodicBoxSize[0], periodicBoxSize[1], periodicBoxSize[2], 0); fvec4 boxSize(periodicBoxSize[0], periodicBoxSize[1], periodicBoxSize[2], 0);
fvec4 invBoxSize((1/periodicBoxSize[0]), (1/periodicBoxSize[1]), (1/periodicBoxSize[2]), 0); fvec4 invBoxSize((1/periodicBoxSize[0]), (1/periodicBoxSize[1]), (1/periodicBoxSize[2]), 0);
if (ewald || pme) { if (ewald || pme) {
......
...@@ -35,6 +35,7 @@ ...@@ -35,6 +35,7 @@
#include "openmm/internal/hardware.h" #include "openmm/internal/hardware.h"
using namespace OpenMM; using namespace OpenMM;
using namespace std;
extern "C" OPENMM_EXPORT_CPU void registerPlatforms() { extern "C" OPENMM_EXPORT_CPU void registerPlatforms() {
// Only register this platform if the CPU supports SSE 4.1. // Only register this platform if the CPU supports SSE 4.1.
...@@ -43,9 +44,13 @@ extern "C" OPENMM_EXPORT_CPU void registerPlatforms() { ...@@ -43,9 +44,13 @@ extern "C" OPENMM_EXPORT_CPU void registerPlatforms() {
Platform::registerPlatform(new CpuPlatform()); Platform::registerPlatform(new CpuPlatform());
} }
map<ContextImpl*, CpuPlatform::PlatformData*> CpuPlatform::contextData;
CpuPlatform::CpuPlatform() { CpuPlatform::CpuPlatform() {
CpuKernelFactory* factory = new CpuKernelFactory(); CpuKernelFactory* factory = new CpuKernelFactory();
registerKernelFactory(CalcForcesAndEnergyKernel::Name(), factory);
registerKernelFactory(CalcNonbondedForceKernel::Name(), factory); registerKernelFactory(CalcNonbondedForceKernel::Name(), factory);
registerKernelFactory(CalcGBSAOBCForceKernel::Name(), factory);
} }
double CpuPlatform::getSpeed() const { double CpuPlatform::getSpeed() const {
...@@ -67,3 +72,28 @@ bool CpuPlatform::isProcessorSupported() { ...@@ -67,3 +72,28 @@ bool CpuPlatform::isProcessorSupported() {
} }
return false; return false;
} }
void CpuPlatform::contextCreated(ContextImpl& context, const map<string, string>& properties) const {
ReferencePlatform::contextCreated(context, properties);
PlatformData* data = new PlatformData(context.getSystem().getNumParticles());
contextData[&context] = data;
}
void CpuPlatform::contextDestroyed(ContextImpl& context) const {
PlatformData* data = contextData[&context];
delete data;
contextData.erase(&context);
}
CpuPlatform::PlatformData& CpuPlatform::getPlatformData(ContextImpl& context) {
return *contextData[&context];
}
CpuPlatform::PlatformData::PlatformData(int numParticles) {
posq.resize(4*numParticles);
int numThreads = threads.getNumThreads();
threadForce.resize(numThreads);
for (int i = 0; i < numThreads; i++)
threadForce[i].resize(4*numParticles);
isPeriodic = false;
}
...@@ -11,6 +11,7 @@ IF( INCLUDE_SERIALIZATION ) ...@@ -11,6 +11,7 @@ IF( INCLUDE_SERIALIZATION )
INCLUDE_DIRECTORIES(${OPENMM_DIR}/serialization/include) INCLUDE_DIRECTORIES(${OPENMM_DIR}/serialization/include)
SET( SHARED_OPENMM_SERIALIZATION "OpenMMSerialization" ) SET( SHARED_OPENMM_SERIALIZATION "OpenMMSerialization" )
ENDIF( INCLUDE_SERIALIZATION ) ENDIF( INCLUDE_SERIALIZATION )
GET_PROPERTY(COMPILE_FLAGS GLOBAL PROPERTY COMPILE_FLAGS)
# Automatically create tests using files named "Test*.cpp" # Automatically create tests using files named "Test*.cpp"
FILE(GLOB TEST_PROGS "*Test*.cpp") FILE(GLOB TEST_PROGS "*Test*.cpp")
...@@ -20,6 +21,7 @@ FOREACH(TEST_PROG ${TEST_PROGS}) ...@@ -20,6 +21,7 @@ FOREACH(TEST_PROG ${TEST_PROGS})
# Link with shared library # Link with shared library
ADD_EXECUTABLE(${TEST_ROOT} ${TEST_PROG}) ADD_EXECUTABLE(${TEST_ROOT} ${TEST_PROG})
TARGET_LINK_LIBRARIES(${TEST_ROOT} ${SHARED_TARGET}) TARGET_LINK_LIBRARIES(${TEST_ROOT} ${SHARED_TARGET})
SET_TARGET_PROPERTIES(${TEST_ROOT} PROPERTIES COMPILE_FLAGS "${COMPILE_FLAGS}" LINK_FLAGS "${COMPILE_FLAGS}")
ADD_TEST(${TEST_ROOT} ${EXECUTABLE_OUTPUT_PATH}/${TEST_ROOT} single) ADD_TEST(${TEST_ROOT} ${EXECUTABLE_OUTPUT_PATH}/${TEST_ROOT} single)
ENDFOREACH(TEST_PROG ${TEST_PROGS}) ENDFOREACH(TEST_PROG ${TEST_PROGS})
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2013 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This tests the CPU implementation of GBSAOBCForce.
*/
#include "openmm/internal/AssertionUtilities.h"
#include "openmm/Context.h"
#include "CpuPlatform.h"
#include "openmm/GBSAOBCForce.h"
#include "openmm/System.h"
#include "openmm/LangevinIntegrator.h"
#include "openmm/NonbondedForce.h"
#include "SimTKOpenMMRealType.h"
#include "sfmt/SFMT.h"
#include <iostream>
#include <vector>
using namespace OpenMM;
using namespace std;
const double TOL = 1e-5;
void testSingleParticle() {
CpuPlatform platform;
System system;
system.addParticle(2.0);
LangevinIntegrator integrator(0, 0.1, 0.01);
GBSAOBCForce* forceField = new GBSAOBCForce();
forceField->addParticle(0.5, 0.15, 1);
system.addForce(forceField);
Context context(system, integrator, platform);
vector<Vec3> positions(1);
positions[0] = Vec3(0, 0, 0);
context.setPositions(positions);
State state = context.getState(State::Energy);
double bornRadius = 0.15-0.009; // dielectric offset
double eps0 = EPSILON0;
double bornEnergy = (-0.5*0.5/(8*PI_M*eps0))*(1.0/forceField->getSoluteDielectric()-1.0/forceField->getSolventDielectric())/bornRadius;
double extendedRadius = 0.15+0.14; // probe radius
double nonpolarEnergy = CAL2JOULE*PI_M*0.0216*(10*extendedRadius)*(10*extendedRadius)*std::pow(0.15/bornRadius, 6.0); // Where did this formula come from? Just copied it from CpuImplicitSolvent.cpp
ASSERT_EQUAL_TOL((bornEnergy+nonpolarEnergy), state.getPotentialEnergy(), 0.01);
// Change the parameters and see if it is still correct.
forceField->setParticleParameters(0, 0.4, 0.25, 1);
forceField->updateParametersInContext(context);
state = context.getState(State::Energy);
bornRadius = 0.25-0.009; // dielectric offset
bornEnergy = (-0.4*0.4/(8*PI_M*eps0))*(1.0/forceField->getSoluteDielectric()-1.0/forceField->getSolventDielectric())/bornRadius;
extendedRadius = bornRadius+0.14;
nonpolarEnergy = CAL2JOULE*PI_M*0.0216*(10*extendedRadius)*(10*extendedRadius)*std::pow(0.25/bornRadius, 6.0);
ASSERT_EQUAL_TOL((bornEnergy+nonpolarEnergy), state.getPotentialEnergy(), 0.01);
}
void testCutoffAndPeriodic() {
CpuPlatform platform;
System system;
system.addParticle(1.0);
system.addParticle(1.0);
LangevinIntegrator integrator(0, 0.1, 0.01);
GBSAOBCForce* gbsa = new GBSAOBCForce();
NonbondedForce* nonbonded = new NonbondedForce();
gbsa->addParticle(-1, 0.15, 1);
nonbonded->addParticle(-1, 1, 0);
gbsa->addParticle(1, 0.15, 1);
nonbonded->addParticle(1, 1, 0);
const double cutoffDistance = 3.0;
const double boxSize = 10.0;
nonbonded->setCutoffDistance(cutoffDistance);
gbsa->setCutoffDistance(cutoffDistance);
system.setDefaultPeriodicBoxVectors(Vec3(boxSize, 0, 0), Vec3(0, boxSize, 0), Vec3(0, 0, boxSize));
system.addForce(gbsa);
system.addForce(nonbonded);
vector<Vec3> positions(2);
positions[0] = Vec3(0, 0, 0);
positions[1] = Vec3(2, 0, 0);
// Calculate the forces for both cutoff and periodic with two different atom positions.
nonbonded->setNonbondedMethod(NonbondedForce::CutoffNonPeriodic);
gbsa->setNonbondedMethod(GBSAOBCForce::CutoffNonPeriodic);
Context context(system, integrator, platform);
context.setPositions(positions);
State state1 = context.getState(State::Forces);
nonbonded->setNonbondedMethod(NonbondedForce::CutoffPeriodic);
gbsa->setNonbondedMethod(GBSAOBCForce::CutoffPeriodic);
context.reinitialize();
context.setPositions(positions);
State state2 = context.getState(State::Forces);
positions[1][0]+= boxSize;
nonbonded->setNonbondedMethod(NonbondedForce::CutoffNonPeriodic);
gbsa->setNonbondedMethod(GBSAOBCForce::CutoffNonPeriodic);
context.reinitialize();
context.setPositions(positions);
State state3 = context.getState(State::Forces);
nonbonded->setNonbondedMethod(NonbondedForce::CutoffPeriodic);
gbsa->setNonbondedMethod(GBSAOBCForce::CutoffPeriodic);
context.reinitialize();
context.setPositions(positions);
State state4 = context.getState(State::Forces);
// All forces should be identical, exception state3 which should be zero.
ASSERT_EQUAL_VEC(state1.getForces()[0], state2.getForces()[0], 0.01);
ASSERT_EQUAL_VEC(state1.getForces()[1], state2.getForces()[1], 0.01);
ASSERT_EQUAL_VEC(state1.getForces()[0], state4.getForces()[0], 0.01);
ASSERT_EQUAL_VEC(state1.getForces()[1], state4.getForces()[1], 0.01);
ASSERT_EQUAL_VEC(state3.getForces()[0], Vec3(0, 0, 0), 0.01);
ASSERT_EQUAL_VEC(state3.getForces()[1], Vec3(0, 0, 0), 0.01);
}
void testForce(int numParticles, NonbondedForce::NonbondedMethod method, GBSAOBCForce::NonbondedMethod method2) {
CpuPlatform platform;
ReferencePlatform reference;
System system;
GBSAOBCForce* gbsa = new GBSAOBCForce();
NonbondedForce* nonbonded = new NonbondedForce();
for (int i = 0; i < numParticles; ++i) {
system.addParticle(1.0);
double charge = i%2 == 0 ? -1 : 1;
gbsa->addParticle(charge, 0.15, 1);
nonbonded->addParticle(charge, 1, 0);
}
nonbonded->setNonbondedMethod(method);
gbsa->setNonbondedMethod(method2);
nonbonded->setCutoffDistance(3.0);
gbsa->setCutoffDistance(3.0);
int grid = (int) floor(0.5+pow(numParticles, 1.0/3.0));
if (method == NonbondedForce::CutoffPeriodic) {
double boxSize = (grid+1)*1.1;
system.setDefaultPeriodicBoxVectors(Vec3(boxSize, 0, 0), Vec3(0, boxSize, 0), Vec3(0, 0, boxSize));
}
system.addForce(gbsa);
system.addForce(nonbonded);
LangevinIntegrator integrator1(0, 0.1, 0.01);
LangevinIntegrator integrator2(0, 0.1, 0.01);
Context context(system, integrator1, platform);
Context refContext(system, integrator2, reference);
// Set random (but uniformly distributed) positions for all the particles.
vector<Vec3> positions(numParticles);
OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt);
for (int i = 0; i < grid; i++)
for (int j = 0; j < grid; j++)
for (int k = 0; k < grid; k++)
positions[i*grid*grid+j*grid+k] = Vec3(i*1.1, j*1.1, k*1.1);
for (int i = 0; i < numParticles; ++i)
positions[i] = positions[i] + Vec3(0.5*genrand_real2(sfmt), 0.5*genrand_real2(sfmt), 0.5*genrand_real2(sfmt));
context.setPositions(positions);
refContext.setPositions(positions);
State state = context.getState(State::Forces | State::Energy);
State refState = refContext.getState(State::Forces | State::Energy);
// Make sure the CPU and Reference platforms agree.
double norm = 0.0;
double diff = 0.0;
for (int i = 0; i < numParticles; ++i) {
Vec3 f = state.getForces()[i];
norm += f[0]*f[0] + f[1]*f[1] + f[2]*f[2];
Vec3 delta = f-refState.getForces()[i];
diff += delta[0]*delta[0] + delta[1]*delta[1] + delta[2]*delta[2];
}
norm = std::sqrt(norm);
diff = std::sqrt(diff);
ASSERT_EQUAL_TOL(0.0, diff, 0.001*norm);
ASSERT_EQUAL_TOL(state.getPotentialEnergy(), refState.getPotentialEnergy(), 1e-3);
// Take a small step in the direction of the energy gradient and see whether the potential energy changes by the expected amount.
// (This doesn't work with cutoffs, since the energy changes discontinuously at the cutoff distance.)
if (method == NonbondedForce::NoCutoff)
{
const double delta = 0.3;
double step = 0.5*delta/norm;
vector<Vec3> positions2(numParticles), positions3(numParticles);
for (int i = 0; i < numParticles; ++i) {
Vec3 p = positions[i];
Vec3 f = state.getForces()[i];
positions2[i] = Vec3(p[0]-f[0]*step, p[1]-f[1]*step, p[2]-f[2]*step);
positions3[i] = Vec3(p[0]+f[0]*step, p[1]+f[1]*step, p[2]+f[2]*step);
}
context.setPositions(positions2);
State state2 = context.getState(State::Energy);
context.setPositions(positions3);
State state3 = context.getState(State::Energy);
ASSERT_EQUAL_TOL(norm, (state2.getPotentialEnergy()-state3.getPotentialEnergy())/delta, 1e-2)
}
}
int main() {
try {
testSingleParticle();
testCutoffAndPeriodic();
for (int i = 5; i < 11; i++) {
testForce(i*i*i, NonbondedForce::NoCutoff, GBSAOBCForce::NoCutoff);
testForce(i*i*i, NonbondedForce::CutoffNonPeriodic, GBSAOBCForce::CutoffNonPeriodic);
testForce(i*i*i, NonbondedForce::CutoffPeriodic, GBSAOBCForce::CutoffPeriodic);
}
}
catch(const exception& e) {
cout << "exception: " << e.what() << endl;
return 1;
}
cout << "Done" << endl;
return 0;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment