Unverified Commit c8981916 authored by Peter Eastman's avatar Peter Eastman Committed by GitHub
Browse files

Vectorized CpuCustomNonbondedForce (#3568)

* Began vectorizing CustomNonbondedForce

* Refactored CpuCustomNonbondedForce to support multiple vector sizes

* AVX implementation of CpuCustomNonbondedForce

* Fixed compilation errors
parent fe21d5ee
...@@ -580,7 +580,7 @@ void CompiledVectorExpression::generateJitCode() { ...@@ -580,7 +580,7 @@ void CompiledVectorExpression::generateJitCode() {
CodeHolder code; CodeHolder code;
code.init(runtime.environment()); code.init(runtime.environment());
x86::Compiler c(&code); x86::Compiler c(&code);
FuncNode* funcNode = c.addFunc(FuncSignatureT<double>()); FuncNode* funcNode = c.addFunc(FuncSignatureT<void>());
funcNode->frame().setAvxEnabled(); funcNode->frame().setAvxEnabled();
vector<x86::Ymm> workspaceVar(workspace.size()/width); vector<x86::Ymm> workspaceVar(workspace.size()/width);
for (int i = 0; i < (int) workspaceVar.size(); i++) for (int i = 0; i < (int) workspaceVar.size(); i++)
......
...@@ -36,9 +36,6 @@ ...@@ -36,9 +36,6 @@
* This file defines a collection of functions for querying the specific hardware being used. * This file defines a collection of functions for querying the specific hardware being used.
*/ */
/**
* Get the number of CPU cores available.
*/
#ifdef __APPLE__ #ifdef __APPLE__
#include <sys/sysctl.h> #include <sys/sysctl.h>
#include <dlfcn.h> #include <dlfcn.h>
...@@ -56,6 +53,9 @@ ...@@ -56,6 +53,9 @@
#endif #endif
#endif #endif
/**
* Get the number of CPU cores available.
*/
static int getNumProcessors() { static int getNumProcessors() {
#ifdef __APPLE__ #ifdef __APPLE__
int ncpu; int ncpu;
...@@ -95,7 +95,7 @@ static int getNumProcessors() { ...@@ -95,7 +95,7 @@ static int getNumProcessors() {
#else #else
#if !defined(__ANDROID__) && !defined(__PNACL__) && !defined(__PPC__) \ #if !defined(__ANDROID__) && !defined(__PNACL__) && !defined(__PPC__) \
&& !defined(__ARM__) && !defined(__ARM64__) && !defined(__ARM__) && !defined(__ARM64__)
static void cpuid(int cpuInfo[4], int infoType){ static void cpuid(int cpuInfo[4], int infoType) {
#ifdef __LP64__ #ifdef __LP64__
__asm__ __volatile__ ( __asm__ __volatile__ (
"cpuid": "cpuid":
...@@ -119,7 +119,33 @@ static int getNumProcessors() { ...@@ -119,7 +119,33 @@ static int getNumProcessors() {
); );
#endif #endif
} }
#endif #else
static void cpuid(int cpuInfo[4], int infoType) {
cpuInfo[0] = cpuInfo[1] = cpuInfo[2] = 0;
}
#endif
#endif #endif
/**
* Get whether this is an x86 CPU that supports AVX.
*/
static bool isAvxSupported() {
int cpuInfo[4];
cpuid(cpuInfo, 0);
if (cpuInfo[0] >= 1) {
cpuid(cpuInfo, 1);
return ((cpuInfo[2] & ((int) 1 << 28)) != 0);
}
return false;
}
/**
* Get the maximum supported size for vectors in multiples of four bytes. This
* is the number of int or float values that can be contained in a vector.
*/
static int getVectorWidth() {
if (isAvxSupported())
return 8;
return 4;
}
#endif // OPENMM_HARDWARE_H_ #endif // OPENMM_HARDWARE_H_
...@@ -30,6 +30,8 @@ ...@@ -30,6 +30,8 @@
#include "openmm/internal/CompiledExpressionSet.h" #include "openmm/internal/CompiledExpressionSet.h"
#include "openmm/internal/ThreadPool.h" #include "openmm/internal/ThreadPool.h"
#include "openmm/internal/vectorize.h" #include "openmm/internal/vectorize.h"
#include "lepton/CompiledVectorExpression.h"
#include "lepton/ParsedExpression.h"
#include <atomic> #include <atomic>
#include <map> #include <map>
#include <set> #include <set>
...@@ -37,21 +39,21 @@ ...@@ -37,21 +39,21 @@
#include <vector> #include <vector>
namespace OpenMM { namespace OpenMM {
class CpuCustomNonbondedForce {
public:
class CpuCustomNonbondedForce {
public:
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
Constructor Constructor
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
CpuCustomNonbondedForce(const Lepton::CompiledExpression& energyExpression, const Lepton::CompiledExpression& forceExpression, CpuCustomNonbondedForce(ThreadPool& threads);
const std::vector<std::string>& parameterNames, const std::vector<std::set<int> >& exclusions,
const std::vector<Lepton::CompiledExpression> energyParamDerivExpressions, void initialize(const Lepton::ParsedExpression& energyExpression, const Lepton::ParsedExpression& forceExpression,
const std::vector<std::string>& computedValueNames, const std::vector<Lepton::CompiledExpression> computedValueExpressions, const std::vector<std::string>& parameterNames, const std::vector<std::set<int> >& exclusions,
ThreadPool& threads); const std::vector<Lepton::ParsedExpression> energyParamDerivExpressions,
const std::vector<std::string>& computedValueNames, const std::vector<Lepton::ParsedExpression> computedValueExpressions);
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
...@@ -59,7 +61,7 @@ class CpuCustomNonbondedForce { ...@@ -59,7 +61,7 @@ class CpuCustomNonbondedForce {
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
~CpuCustomNonbondedForce(); virtual ~CpuCustomNonbondedForce();
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
...@@ -123,7 +125,7 @@ class CpuCustomNonbondedForce { ...@@ -123,7 +125,7 @@ class CpuCustomNonbondedForce {
void calculatePairIxn(int numberOfAtoms, float* posq, std::vector<OpenMM::Vec3>& atomCoordinates, std::vector<std::vector<double> >& atomParameters, void calculatePairIxn(int numberOfAtoms, float* posq, std::vector<OpenMM::Vec3>& atomCoordinates, std::vector<std::vector<double> >& atomParameters,
const std::map<std::string, double>& globalParameters, std::vector<AlignedArray<float> >& threadForce, const std::map<std::string, double>& globalParameters, std::vector<AlignedArray<float> >& threadForce,
bool includeForce, bool includeEnergy, double& totalEnergy, double* energyParamDerivs); bool includeForce, bool includeEnergy, double& totalEnergy, double* energyParamDerivs);
private: protected:
class ThreadData; class ThreadData;
bool cutoff; bool cutoff;
...@@ -137,7 +139,7 @@ private: ...@@ -137,7 +139,7 @@ private:
AlignedArray<fvec4> periodicBoxVec4; AlignedArray<fvec4> periodicBoxVec4;
double cutoffDistance, switchingDistance; double cutoffDistance, switchingDistance;
ThreadPool& threads; ThreadPool& threads;
const std::vector<std::set<int> > exclusions; std::vector<std::set<int> > exclusions;
std::vector<ThreadData*> threadData; std::vector<ThreadData*> threadData;
std::vector<std::string> paramNames, computedValueNames; std::vector<std::string> paramNames, computedValueNames;
std::vector<std::pair<int, int> > groupInteractions; std::vector<std::pair<int, int> > groupInteractions;
...@@ -167,10 +169,22 @@ private: ...@@ -167,10 +169,22 @@ private:
* @param forces force array (forces added) * @param forces force array (forces added)
* @param totalEnergy total energy * @param totalEnergy total energy
* @param boxSize the size of the periodic box * @param boxSize the size of the periodic box
* @param boxSize the inverse size of the periodic box * @param invBoxSize the inverse size of the periodic box
*/ */
void calculateOneIxn(int atom1, int atom2, ThreadData& data, float* forces, double& totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize); void calculateOneIxn(int atom1, int atom2, ThreadData& data, float* forces, double& totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize);
/**
* Calculate all the interactions for one block of atoms.
*
* @param data workspace for the current thread
* @param blockIndex the index of the atom block
* @param forces force array (forces added)
* @param totalEnergy total energy
* @param boxSize the size of the periodic box
* @param invBoxSize the inverse size of the periodic box
*/
virtual void calculateBlockIxn(ThreadData& data, int blockIndex, float* forces, double& totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) = 0;
/** /**
* Compute the displacement and squared distance between two points, optionally using * Compute the displacement and squared distance between two points, optionally using
* periodic boundary conditions. * periodic boundary conditions.
...@@ -180,19 +194,26 @@ private: ...@@ -180,19 +194,26 @@ private:
class CpuCustomNonbondedForce::ThreadData { class CpuCustomNonbondedForce::ThreadData {
public: public:
ThreadData(const Lepton::CompiledExpression& energyExpression, const Lepton::CompiledExpression& forceExpression, const std::vector<std::string>& parameterNames, ThreadData(const Lepton::CompiledExpression& energyExpression, const Lepton::CompiledVectorExpression& energyVecExpression,
const Lepton::CompiledExpression& forceExpression, const Lepton::CompiledVectorExpression& forceVecExpression, const std::vector<std::string>& parameterNames,
const std::vector<Lepton::CompiledExpression> energyParamDerivExpressions, const std::vector<std::string>& computedValueNames, const std::vector<Lepton::CompiledExpression> energyParamDerivExpressions, const std::vector<std::string>& computedValueNames,
const std::vector<Lepton::CompiledExpression> computedValueExpressions, std::vector<std::vector<double> >& atomComputedValues); const std::vector<Lepton::CompiledExpression> computedValueExpressions, std::vector<std::vector<double> >& atomComputedValues);
Lepton::CompiledExpression energyExpression; Lepton::CompiledExpression energyExpression, forceExpression;
Lepton::CompiledExpression forceExpression; Lepton::CompiledVectorExpression energyVecExpression, forceVecExpression;
std::vector<Lepton::CompiledExpression> computedValueExpressions, energyParamDerivExpressions; std::vector<Lepton::CompiledExpression> computedValueExpressions, energyParamDerivExpressions;
CompiledExpressionSet expressionSet; CompiledExpressionSet expressionSet;
std::vector<double> particleParam, computedValues; std::vector<double> particleParam, computedValues;
std::vector<float> rvec, vecParticle1Params, vecParticle2Params, vecParticle1Values, vecParticle2Values;
double r; double r;
std::vector<double> energyParamDerivs; std::vector<double> energyParamDerivs;
std::vector<std::vector<double> >& atomComputedValues; std::vector<std::vector<double> >& atomComputedValues;
}; };
/**
* This function is called to create an instance of an appropriate subclass for the current CPU.
*/
CpuCustomNonbondedForce* createCpuCustomNonbondedForce(ThreadPool& threads);
} // namespace OpenMM } // namespace OpenMM
#endif // OPENMM_CPU_CUSTOM_NONBONDED_FORCE_H__ #endif // OPENMM_CPU_CUSTOM_NONBONDED_FORCE_H__
/* Portions copyright (c) 2009-2022 Stanford University and Simbios.
* Contributors: Peter Eastman
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject
* to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef OPENMM_CPU_CUSTOM_NONBONDED_FORCE_FVEC_H__
#define OPENMM_CPU_CUSTOM_NONBONDED_FORCE_FVEC_H__
#include "CpuCustomNonbondedForce.h"
namespace OpenMM {
enum PeriodicType {NoPeriodic, PeriodicPerAtom, PeriodicPerInteraction, PeriodicTriclinic};
template <typename FVEC, int BLOCK_SIZE>
class CpuCustomNonbondedForceFvec : public CpuCustomNonbondedForce {
public:
CpuCustomNonbondedForceFvec(ThreadPool& threads) : CpuCustomNonbondedForce(threads) {
}
protected:
/**
* Calculate all the interactions for one block of atoms.
*
* @param data workspace for the current thread
* @param blockIndex the index of the atom block
* @param forces force array (forces added)
* @param totalEnergy total energy
* @param boxSize the size of the periodic box
* @param invBoxSize the inverse size of the periodic box
*/
void calculateBlockIxn(ThreadData& data, int blockIndex, float* forces, double& totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize);
template <int PERIODIC_TYPE>
void calculateBlockIxnImpl(ThreadData& data, int blockIndex, float* forces, double& totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize, const fvec4& blockCenter);
/**
* Compute the displacement and squared distance between a collection of points, optionally using
* periodic boundary conditions.
*/
template <int PERIODIC_TYPE>
void getDeltaR(const fvec4& posI, const FVEC& x, const FVEC& y, const FVEC& z, FVEC& dx, FVEC& dy, FVEC& dz, FVEC& r2, const fvec4& boxSize, const fvec4& invBoxSize) const;
};
template<typename FVEC, int BLOCK_SIZE>
void CpuCustomNonbondedForceFvec<FVEC, BLOCK_SIZE>::calculateBlockIxn(ThreadData& data, int blockIndex, float* forces, double& totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) {
// Determine whether we need to apply periodic boundary conditions.
PeriodicType periodicType;
fvec4 blockCenter;
if (!periodic) {
periodicType = NoPeriodic;
blockCenter = 0.0f;
}
else {
const int32_t* blockAtom = &neighborList->getSortedAtoms()[BLOCK_SIZE*blockIndex];
float minx, maxx, miny, maxy, minz, maxz;
minx = maxx = posq[4*blockAtom[0]];
miny = maxy = posq[4*blockAtom[0]+1];
minz = maxz = posq[4*blockAtom[0]+2];
for (int i = 1; i < BLOCK_SIZE; i++) {
minx = std::min(minx, posq[4*blockAtom[i]]);
maxx = std::max(maxx, posq[4*blockAtom[i]]);
miny = std::min(miny, posq[4*blockAtom[i]+1]);
maxy = std::max(maxy, posq[4*blockAtom[i]+1]);
minz = std::min(minz, posq[4*blockAtom[i]+2]);
maxz = std::max(maxz, posq[4*blockAtom[i]+2]);
}
blockCenter = fvec4(0.5f*(minx+maxx), 0.5f*(miny+maxy), 0.5f*(minz+maxz), 0.0f);
if (!(minx < cutoffDistance || miny < cutoffDistance || minz < cutoffDistance ||
maxx > boxSize[0]-cutoffDistance || maxy > boxSize[1]-cutoffDistance || maxz > boxSize[2]-cutoffDistance))
periodicType = NoPeriodic;
else if (triclinic)
periodicType = PeriodicTriclinic;
else if (0.5f*(boxSize[0]-(maxx-minx)) >= cutoffDistance &&
0.5f*(boxSize[1]-(maxy-miny)) >= cutoffDistance &&
0.5f*(boxSize[2]-(maxz-minz)) >= cutoffDistance)
periodicType = PeriodicPerAtom;
else
periodicType = PeriodicPerInteraction;
}
// Call the appropriate version depending on what calculation is required for periodic boundary conditions.
if (periodicType == NoPeriodic)
calculateBlockIxnImpl<NoPeriodic>(data, blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
else if (periodicType == PeriodicPerAtom)
calculateBlockIxnImpl<PeriodicPerAtom>(data, blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
else if (periodicType == PeriodicPerInteraction)
calculateBlockIxnImpl<PeriodicPerInteraction>(data, blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
else if (periodicType == PeriodicTriclinic)
calculateBlockIxnImpl<PeriodicTriclinic>(data, blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
}
template<typename FVEC, int BLOCK_SIZE>
template <int PERIODIC_TYPE>
void CpuCustomNonbondedForceFvec<FVEC, BLOCK_SIZE>::calculateBlockIxnImpl(ThreadData& data, int blockIndex, float* forces, double& totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize, const fvec4& blockCenter) {
// Load the positions and parameters of the atoms in the block.
const int32_t* blockAtom = &neighborList->getSortedAtoms()[BLOCK_SIZE*blockIndex];
fvec4 blockAtomPosq[BLOCK_SIZE];
FVEC blockAtomForceX(0.0f), blockAtomForceY(0.0f), blockAtomForceZ(0.0f);
FVEC blockAtomX, blockAtomY, blockAtomZ, blockAtomCharge;
int numParams = paramNames.size();
int numComputed = computedValueNames.size();
for (int i = 0; i < BLOCK_SIZE; i++) {
blockAtomPosq[i] = fvec4(posq+4*blockAtom[i]);
if (PERIODIC_TYPE == PeriodicPerAtom)
blockAtomPosq[i] -= floor((blockAtomPosq[i]-blockCenter)*invBoxSize+0.5f)*boxSize;
for (int j = 0; j < numParams; j++)
data.vecParticle1Params[j*BLOCK_SIZE+i] = atomParameters[blockAtom[i]][j];
for (int j = 0; j < numComputed; j++)
data.vecParticle1Values[j*BLOCK_SIZE+i] = atomComputedValues[j][blockAtom[i]];
}
transpose(blockAtomPosq, blockAtomX, blockAtomY, blockAtomZ, blockAtomCharge);
const float invSwitchingInterval = 1/(cutoffDistance-switchingDistance);
const FVEC cutoffDistanceSquared = cutoffDistance * cutoffDistance;
// Loop over neighbors for this block.
const auto& neighbors = neighborList->getBlockNeighbors(blockIndex);
const auto& exclusions = neighborList->getBlockExclusions(blockIndex);
FVEC partialEnergy = {};
for (int i = 0; i < neighbors.size(); i++) {
// Load the next neighbor.
int atom = neighbors[i];
for (int j = 0; j < numParams; j++)
for (int k = 0; k < BLOCK_SIZE; k++)
data.vecParticle2Params[j*BLOCK_SIZE+k] = atomParameters[atom][j];
for (int j = 0; j < numComputed; j++)
for (int k = 0; k < BLOCK_SIZE; k++)
data.vecParticle2Values[j*BLOCK_SIZE+k] = atomComputedValues[j][atom];
// Compute the distances to the block atoms.
FVEC dx, dy, dz, r2;
fvec4 atomPos(posq+4*atom);
if (PERIODIC_TYPE == PeriodicPerAtom)
atomPos -= floor((atomPos-blockCenter)*invBoxSize+0.5f)*boxSize;
getDeltaR<PERIODIC_TYPE>(atomPos, blockAtomX, blockAtomY, blockAtomZ, dx, dy, dz, r2, boxSize, invBoxSize);
const auto exclNotMask = FVEC::expandBitsToMask(~exclusions[i]);
const auto include = blendZero(r2 < cutoffDistanceSquared, exclNotMask);
if (!any(include))
continue; // No interactions to compute.
// Compute the interactions.
const auto inverseR = rsqrt(r2);
const auto r = r2*inverseR;
r.store(data.rvec.data());
FVEC dEdR(data.forceVecExpression.evaluate());
FVEC energy;
if (includeEnergy)
energy = FVEC(data.energyVecExpression.evaluate());
if (useSwitch) {
const auto t = blendZero((r-switchingDistance)*invSwitchingInterval, r>switchingDistance);
const auto switchValue = 1+t*t*t*(-10.0f+t*(15.0f-t*6.0f));
const auto switchDeriv = t*t*(-30.0f+t*(60.0f-t*30.0f))*invSwitchingInterval;
dEdR = switchValue*dEdR + energy*switchDeriv;
energy *= switchValue;
}
dEdR *= inverseR;
// Accumulate forces and energies.
if (includeEnergy) {
energy = blendZero(energy, include);
partialEnergy += energy;
}
dEdR = blendZero(dEdR, include);
const auto fx = dx*dEdR;
const auto fy = dy*dEdR;
const auto fz = dz*dEdR;
blockAtomForceX -= fx;
blockAtomForceY -= fy;
blockAtomForceZ -= fz;
float* const atomForce = forces+4*atom;
const fvec4 newAtomForce = fvec4(atomForce) + reduceToVec3(fx, fy, fz);
newAtomForce.store(atomForce);
}
if (includeEnergy)
totalEnergy += reduceAdd(partialEnergy);
// Record the forces on the block atoms.
fvec4 f[BLOCK_SIZE];
transpose(blockAtomForceX, blockAtomForceY, blockAtomForceZ, 0.0f, f);
for (int j = 0; j < BLOCK_SIZE; j++)
(fvec4(forces+4*blockAtom[j])+f[j]).store(forces+4*blockAtom[j]);
}
template<typename FVEC, int BLOCK_SIZE>
template <int PERIODIC_TYPE>
void CpuCustomNonbondedForceFvec<FVEC, BLOCK_SIZE>::getDeltaR(const fvec4& posI, const FVEC& x, const FVEC& y, const FVEC& z, FVEC& dx, FVEC& dy, FVEC& dz, FVEC& r2, const fvec4& boxSize, const fvec4& invBoxSize) const {
dx = x-posI[0];
dy = y-posI[1];
dz = z-posI[2];
if (PERIODIC_TYPE == PeriodicTriclinic) {
const auto scale3 = floor(dz*recipBoxSize[2]+0.5f);
dx -= scale3*periodicBoxVectors[2][0];
dy -= scale3*periodicBoxVectors[2][1];
dz -= scale3*periodicBoxVectors[2][2];
const auto scale2 = floor(dy*recipBoxSize[1]+0.5f);
dx -= scale2*periodicBoxVectors[1][0];
dy -= scale2*periodicBoxVectors[1][1];
const auto scale1 = floor(dx*recipBoxSize[0]+0.5f);
dx -= scale1*periodicBoxVectors[0][0];
}
else if (PERIODIC_TYPE == PeriodicPerInteraction) {
dx -= round(dx*invBoxSize[0])*boxSize[0];
dy -= round(dy*invBoxSize[1])*boxSize[1];
dz -= round(dz*invBoxSize[2])*boxSize[2];
}
r2 = dx*dx + dy*dy + dz*dz;
}
} // namespace OpenMM
#endif // OPENMM_CPU_CUSTOM_NONBONDED_FORCE_FVEC_H__
...@@ -207,7 +207,6 @@ void CpuNonbondedForceFvec<FVEC>::calculateBlockIxnImpl(int blockIndex, float* f ...@@ -207,7 +207,6 @@ void CpuNonbondedForceFvec<FVEC>::calculateBlockIxnImpl(int blockIndex, float* f
// Ewald needs C6 data gathered from a table. Unused variable for non-ewald. // Ewald needs C6 data gathered from a table. Unused variable for non-ewald.
const FVEC C6s = (BLOCK_TYPE == BlockType::EWALD) ? FVEC(C6params, blockAtom) : FVEC(); const FVEC C6s = (BLOCK_TYPE == BlockType::EWALD) ? FVEC(C6params, blockAtom) : FVEC();
const bool needPeriodic = (PERIODIC_TYPE == PeriodicPerInteraction || PERIODIC_TYPE == PeriodicTriclinic);
const float invSwitchingInterval = 1/(cutoffDistance-switchingDistance); const float invSwitchingInterval = 1/(cutoffDistance-switchingDistance);
const FVEC cutoffDistanceSquared = cutoffDistance * cutoffDistance; const FVEC cutoffDistanceSquared = cutoffDistance * cutoffDistance;
......
...@@ -8,9 +8,11 @@ ENDFOREACH(file) ...@@ -8,9 +8,11 @@ ENDFOREACH(file)
IF(MSVC) IF(MSVC)
SET_SOURCE_FILES_PROPERTIES(${CMAKE_SOURCE_DIR}/platforms/cpu/src/CpuNonbondedForceAvx.cpp PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} /arch:AVX /D__AVX__") SET_SOURCE_FILES_PROPERTIES(${CMAKE_SOURCE_DIR}/platforms/cpu/src/CpuNonbondedForceAvx.cpp PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} /arch:AVX /D__AVX__")
SET_SOURCE_FILES_PROPERTIES(${CMAKE_SOURCE_DIR}/platforms/cpu/src/CpuNonbondedForceAvx2.cpp PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} /arch:AVX2 /D__AVX2__") SET_SOURCE_FILES_PROPERTIES(${CMAKE_SOURCE_DIR}/platforms/cpu/src/CpuNonbondedForceAvx2.cpp PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} /arch:AVX2 /D__AVX2__")
SET_SOURCE_FILES_PROPERTIES(${CMAKE_SOURCE_DIR}/platforms/cpu/src/CpuCustomNonbondedForceAvx.cpp PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} /arch:AVX /D__AVX__")
ELSEIF(X86) ELSEIF(X86)
SET_SOURCE_FILES_PROPERTIES(${CMAKE_SOURCE_DIR}/platforms/cpu/src/CpuNonbondedForceAvx.cpp PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -mavx") SET_SOURCE_FILES_PROPERTIES(${CMAKE_SOURCE_DIR}/platforms/cpu/src/CpuNonbondedForceAvx.cpp PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -mavx")
SET_SOURCE_FILES_PROPERTIES(${CMAKE_SOURCE_DIR}/platforms/cpu/src/CpuNonbondedForceAvx2.cpp PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -mavx2 -mfma") SET_SOURCE_FILES_PROPERTIES(${CMAKE_SOURCE_DIR}/platforms/cpu/src/CpuNonbondedForceAvx2.cpp PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -mavx2 -mfma")
SET_SOURCE_FILES_PROPERTIES(${CMAKE_SOURCE_DIR}/platforms/cpu/src/CpuCustomNonbondedForceAvx.cpp PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -mavx")
ENDIF() ENDIF()
ADD_LIBRARY(${SHARED_TARGET} SHARED ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_ABS_INCLUDE_FILES}) ADD_LIBRARY(${SHARED_TARGET} SHARED ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_ABS_INCLUDE_FILES})
......
...@@ -22,21 +22,21 @@ ...@@ -22,21 +22,21 @@
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
#include <string.h>
#include <sstream>
#include "SimTKOpenMMUtilities.h"
#include "ReferenceForce.h"
#include "CpuCustomNonbondedForce.h" #include "CpuCustomNonbondedForce.h"
#include "openmm/internal/hardware.h"
#include <cmath>
using namespace OpenMM; using namespace OpenMM;
using namespace Lepton;
using namespace std; using namespace std;
CpuCustomNonbondedForce::ThreadData::ThreadData(const Lepton::CompiledExpression& energyExpression, const Lepton::CompiledExpression& forceExpression, CpuCustomNonbondedForce::ThreadData::ThreadData(const CompiledExpression& energyExpression, const CompiledVectorExpression& energyVecExpression,
const vector<string>& parameterNames, const std::vector<Lepton::CompiledExpression> energyParamDerivExpressions, const CompiledExpression& forceExpression, const CompiledVectorExpression& forceVecExpression,
const vector<string>& computedValueNames, const vector<Lepton::CompiledExpression> computedValueExpressions, const vector<string>& parameterNames, const std::vector<CompiledExpression> energyParamDerivExpressions,
const vector<string>& computedValueNames, const vector<CompiledExpression> computedValueExpressions,
vector<vector<double> >& atomComputedValues) : vector<vector<double> >& atomComputedValues) :
energyExpression(energyExpression), forceExpression(forceExpression), energyParamDerivExpressions(energyParamDerivExpressions), energyExpression(energyExpression), energyVecExpression(energyVecExpression), forceExpression(forceExpression),
forceVecExpression(forceVecExpression), energyParamDerivExpressions(energyParamDerivExpressions),
computedValueExpressions(computedValueExpressions), atomComputedValues(atomComputedValues) { computedValueExpressions(computedValueExpressions), atomComputedValues(atomComputedValues) {
// Prepare for passing variables to expressions. // Prepare for passing variables to expressions.
...@@ -44,19 +44,13 @@ CpuCustomNonbondedForce::ThreadData::ThreadData(const Lepton::CompiledExpression ...@@ -44,19 +44,13 @@ CpuCustomNonbondedForce::ThreadData::ThreadData(const Lepton::CompiledExpression
variableLocations["r"] = &r; variableLocations["r"] = &r;
particleParam.resize(2*parameterNames.size()); particleParam.resize(2*parameterNames.size());
computedValues.resize(2*computedValueNames.size()); computedValues.resize(2*computedValueNames.size());
for (int i = 0; i < (int) parameterNames.size(); i++) { for (int i = 0; i < parameterNames.size(); i++) {
for (int j = 0; j < 2; j++) { variableLocations[parameterNames[i]+"1"] = &particleParam[i*2];
stringstream name; variableLocations[parameterNames[i]+"2"] = &particleParam[i*2+1];
name << parameterNames[i] << (j+1);
variableLocations[name.str()] = &particleParam[i*2+j];
}
} }
for (int i = 0; i < (int) computedValueNames.size(); i++) { for (int i = 0; i < computedValueNames.size(); i++) {
for (int j = 0; j < 2; j++) { variableLocations[computedValueNames[i]+"1"] = &computedValues[i*2];
stringstream name; variableLocations[computedValueNames[i]+"2"] = &computedValues[i*2+1];
name << computedValueNames[i] << (j+1);
variableLocations[name.str()] = &computedValues[i*2+j];
}
} }
energyParamDerivs.resize(energyParamDerivExpressions.size()); energyParamDerivs.resize(energyParamDerivExpressions.size());
this->energyExpression.setVariableLocations(variableLocations); this->energyExpression.setVariableLocations(variableLocations);
...@@ -68,6 +62,27 @@ CpuCustomNonbondedForce::ThreadData::ThreadData(const Lepton::CompiledExpression ...@@ -68,6 +62,27 @@ CpuCustomNonbondedForce::ThreadData::ThreadData(const Lepton::CompiledExpression
expressionSet.registerExpression(expression); expressionSet.registerExpression(expression);
} }
// Prepare for passing variables to vectorized expressions.
map<string, float*> vecVariableLocations;
int blockSize = getVectorWidth();
rvec.resize(blockSize);
vecParticle1Params.resize(blockSize*parameterNames.size());
vecParticle2Params.resize(blockSize*parameterNames.size());
vecParticle1Values.resize(blockSize*computedValueNames.size());
vecParticle2Values.resize(blockSize*computedValueNames.size());
vecVariableLocations["r"] = rvec.data();
for (int i = 0; i < parameterNames.size(); i++) {
vecVariableLocations[parameterNames[i]+"1"] = &vecParticle1Params[i*blockSize];
vecVariableLocations[parameterNames[i]+"2"] = &vecParticle2Params[i*blockSize];
}
for (int i = 0; i < computedValueNames.size(); i++) {
vecVariableLocations[computedValueNames[i]+"1"] = &vecParticle1Values[i*blockSize];
vecVariableLocations[computedValueNames[i]+"2"] = &vecParticle2Values[i*blockSize];
}
this->energyVecExpression.setVariableLocations(vecVariableLocations);
this->forceVecExpression.setVariableLocations(vecVariableLocations);
// Prepare for passing variables to the computed value expressions. // Prepare for passing variables to the computed value expressions.
map<string, double*> valueVariableLocations; map<string, double*> valueVariableLocations;
...@@ -79,14 +94,28 @@ CpuCustomNonbondedForce::ThreadData::ThreadData(const Lepton::CompiledExpression ...@@ -79,14 +94,28 @@ CpuCustomNonbondedForce::ThreadData::ThreadData(const Lepton::CompiledExpression
} }
} }
CpuCustomNonbondedForce::CpuCustomNonbondedForce(const Lepton::CompiledExpression& energyExpression, CpuCustomNonbondedForce::CpuCustomNonbondedForce(ThreadPool& threads) : cutoff(false), useSwitch(false), periodic(false), useInteractionGroups(false), threads(threads) {
const Lepton::CompiledExpression& forceExpression, const vector<string>& parameterNames, const vector<set<int> >& exclusions, }
const vector<Lepton::CompiledExpression> energyParamDerivExpressions, const vector<string>& computedValueNames,
const vector<Lepton::CompiledExpression> computedValueExpressions, ThreadPool& threads) : void CpuCustomNonbondedForce::initialize(const ParsedExpression& energyExpression,
cutoff(false), useSwitch(false), periodic(false), useInteractionGroups(false), paramNames(parameterNames), exclusions(exclusions), const ParsedExpression& forceExpression, const vector<string>& parameterNames, const vector<set<int> >& exclusions,
computedValueNames(computedValueNames), threads(threads) { const vector<ParsedExpression> energyParamDerivExpressions, const vector<string>& computedValueNames,
const vector<ParsedExpression> computedValueExpressions) {
this->paramNames = parameterNames;
this->exclusions = exclusions;
this->computedValueNames = computedValueNames;
CompiledExpression compiledEnergyExpression = energyExpression.createCompiledExpression();
CompiledExpression compiledForceExpression = forceExpression.createCompiledExpression();
CompiledVectorExpression energyVecExpression = energyExpression.createCompiledVectorExpression(getVectorWidth());
CompiledVectorExpression forceVecExpression = forceExpression.createCompiledVectorExpression(getVectorWidth());
vector<CompiledExpression> compiledDerivExpressions, compiledValueExpressions;
for (auto& exp : energyParamDerivExpressions)
compiledDerivExpressions.push_back(exp.createCompiledExpression());
for (auto& exp : computedValueExpressions)
compiledValueExpressions.push_back(exp.createCompiledExpression());
for (int i = 0; i < threads.getNumThreads(); i++) for (int i = 0; i < threads.getNumThreads(); i++)
threadData.push_back(new ThreadData(energyExpression, forceExpression, parameterNames, energyParamDerivExpressions, computedValueNames, computedValueExpressions, atomComputedValues)); threadData.push_back(new ThreadData(compiledEnergyExpression, energyVecExpression, compiledForceExpression, forceVecExpression, parameterNames,
compiledDerivExpressions, computedValueNames, compiledValueExpressions, atomComputedValues));
} }
CpuCustomNonbondedForce::~CpuCustomNonbondedForce() { CpuCustomNonbondedForce::~CpuCustomNonbondedForce() {
...@@ -186,9 +215,27 @@ void CpuCustomNonbondedForce::calculatePairIxn(int numberOfAtoms, float* posq, v ...@@ -186,9 +215,27 @@ void CpuCustomNonbondedForce::calculatePairIxn(int numberOfAtoms, float* posq, v
void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int threadIndex) { void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int threadIndex) {
int numThreads = threads.getNumThreads(); int numThreads = threads.getNumThreads();
int blockSize = getVectorWidth();
ThreadData& data = *threadData[threadIndex]; ThreadData& data = *threadData[threadIndex];
for (auto& param : *globalParameters) for (auto& param : *globalParameters) {
data.expressionSet.setVariable(data.expressionSet.getVariableIndex(param.first), param.second); data.expressionSet.setVariable(data.expressionSet.getVariableIndex(param.first), param.second);
try {
float* p = data.energyVecExpression.getVariablePointer(param.first);
for (int i = 0; i < blockSize; i++)
p[i] = param.second;
}
catch (...) {
// The expression doesn't use this parameter.
}
try {
float* p = data.forceVecExpression.getVariablePointer(param.first);
for (int i = 0; i < blockSize; i++)
p[i] = param.second;
}
catch (...) {
// The expression doesn't use this parameter.
}
}
// Process computed values for this thread's subset of interactions. // Process computed values for this thread's subset of interactions.
...@@ -244,20 +291,24 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread ...@@ -244,20 +291,24 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread
const int32_t* blockAtom = &neighborList->getSortedAtoms()[blockSize*blockIndex]; const int32_t* blockAtom = &neighborList->getSortedAtoms()[blockSize*blockIndex];
const vector<int>& neighbors = neighborList->getBlockNeighbors(blockIndex); const vector<int>& neighbors = neighborList->getBlockNeighbors(blockIndex);
const auto& exclusions = neighborList->getBlockExclusions(blockIndex); const auto& exclusions = neighborList->getBlockExclusions(blockIndex);
for (int i = 0; i < (int) neighbors.size(); i++) { if (data.energyParamDerivs.size() == 0)
int first = neighbors[i]; calculateBlockIxn(data, blockIndex, forces, energy, boxSize, invBoxSize);
for (int j = 0; j < paramNames.size(); j++) else {
data.particleParam[j*2] = atomParameters[first][j]; for (int i = 0; i < (int) neighbors.size(); i++) {
for (int j = 0; j < computedValueNames.size(); j++) int first = neighbors[i];
data.computedValues[j*2] = atomComputedValues[j][first]; for (int j = 0; j < paramNames.size(); j++)
for (int k = 0; k < blockSize; k++) { data.particleParam[j*2] = atomParameters[first][j];
if ((exclusions[i] & (1<<k)) == 0) { for (int j = 0; j < computedValueNames.size(); j++)
int second = blockAtom[k]; data.computedValues[j*2] = atomComputedValues[j][first];
for (int j = 0; j < paramNames.size(); j++) for (int k = 0; k < blockSize; k++) {
data.particleParam[j*2+1] = atomParameters[second][j]; if ((exclusions[i] & (1<<k)) == 0) {
for (int j = 0; j < computedValueNames.size(); j++) int second = blockAtom[k];
data.computedValues[j*2+1] = atomComputedValues[j][second]; for (int j = 0; j < paramNames.size(); j++)
calculateOneIxn(first, second, data, forces, energy, boxSize, invBoxSize); data.particleParam[j*2+1] = atomParameters[second][j];
for (int j = 0; j < computedValueNames.size(); j++)
data.computedValues[j*2+1] = atomComputedValues[j][second];
calculateOneIxn(first, second, data, forces, energy, boxSize, invBoxSize);
}
} }
} }
} }
......
/* Portions copyright (c) 2022 Stanford University and Simbios.
* Contributors: Peter Eastman
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject
* to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "CpuCustomNonbondedForceFvec.h"
#include "openmm/OpenMMException.h"
#ifdef __AVX__
#include "openmm/internal/vectorizeAvx.h"
OpenMM::CpuCustomNonbondedForce* createCpuCustomNonbondedForceAvx(OpenMM::ThreadPool& threads) {
return new OpenMM::CpuCustomNonbondedForceFvec<fvec8, 8>(threads);
}
#else
OpenMM::CpuCustomNonbondedForce* createCpuCustomNonbondedForceAvx(OpenMM::ThreadPool& threads) {
throw OpenMM::OpenMMException("Internal error: OpenMM was compiled without AVX support");
}
#endif
/* Portions copyright (c) 2022 Stanford University and Simbios.
* Contributors: Peter Eastman
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject
* to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "CpuCustomNonbondedForceFvec.h"
#include "openmm/internal/hardware.h"
OpenMM::CpuCustomNonbondedForce* createCpuCustomNonbondedForceVec4(OpenMM::ThreadPool& threads);
OpenMM::CpuCustomNonbondedForce* createCpuCustomNonbondedForceAvx(OpenMM::ThreadPool& threads);
OpenMM::CpuCustomNonbondedForce* OpenMM::createCpuCustomNonbondedForce(OpenMM::ThreadPool& threads) {
if (isAvxSupported())
return createCpuCustomNonbondedForceAvx(threads);
else
return createCpuCustomNonbondedForceVec4(threads);
}
/* Portions copyright (c) 2022 Stanford University and Simbios.
* Contributors: Peter Eastman
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject
* to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "CpuCustomNonbondedForceFvec.h"
OpenMM::CpuCustomNonbondedForce* createCpuCustomNonbondedForceVec4(OpenMM::ThreadPool& threads) {
return new OpenMM::CpuCustomNonbondedForceFvec<fvec4, 4>(threads);
}
...@@ -921,9 +921,8 @@ void CpuCalcCustomNonbondedForceKernel::createInteraction(const CustomNonbondedF ...@@ -921,9 +921,8 @@ void CpuCalcCustomNonbondedForceKernel::createInteraction(const CustomNonbondedF
// Parse the various expressions used to calculate the force. // Parse the various expressions used to calculate the force.
Lepton::ParsedExpression expression = Lepton::Parser::parse(force.getEnergyFunction(), functions).optimize(); Lepton::ParsedExpression energyExpression = Lepton::Parser::parse(force.getEnergyFunction(), functions).optimize();
Lepton::CompiledExpression energyExpression = expression.createCompiledExpression(); Lepton::ParsedExpression forceExpression = energyExpression.differentiate("r");
Lepton::CompiledExpression forceExpression = expression.differentiate("r").createCompiledExpression();
for (int i = 0; i < force.getNumPerParticleParameters(); i++) for (int i = 0; i < force.getNumPerParticleParameters(); i++)
parameterNames.push_back(force.getPerParticleParameterName(i)); parameterNames.push_back(force.getPerParticleParameterName(i));
for (int i = 0; i < force.getNumGlobalParameters(); i++) { for (int i = 0; i < force.getNumGlobalParameters(); i++) {
...@@ -940,25 +939,25 @@ void CpuCalcCustomNonbondedForceKernel::createInteraction(const CustomNonbondedF ...@@ -940,25 +939,25 @@ void CpuCalcCustomNonbondedForceKernel::createInteraction(const CustomNonbondedF
} }
particleVariables.insert(globalParameterNames.begin(), globalParameterNames.end()); particleVariables.insert(globalParameterNames.begin(), globalParameterNames.end());
pairVariables.insert(globalParameterNames.begin(), globalParameterNames.end()); pairVariables.insert(globalParameterNames.begin(), globalParameterNames.end());
vector<Lepton::CompiledExpression> computedValueExpressions, energyParamDerivExpressions; vector<Lepton::ParsedExpression> computedValueExpressions, energyParamDerivExpressions;
for (int i = 0; i < force.getNumComputedValues(); i++) { for (int i = 0; i < force.getNumComputedValues(); i++) {
string name, exp; string name, exp;
force.getComputedValueParameters(i, name, exp); force.getComputedValueParameters(i, name, exp);
Lepton::ParsedExpression parsed = Lepton::Parser::parse(exp, functions); Lepton::ParsedExpression parsed = Lepton::Parser::parse(exp, functions);
validateVariables(parsed.getRootNode(), particleVariables); validateVariables(parsed.getRootNode(), particleVariables);
computedValueNames.push_back(name); computedValueNames.push_back(name);
computedValueExpressions.push_back(parsed.createCompiledExpression()); computedValueExpressions.push_back(parsed);
} }
for (int i = 0; i < force.getNumEnergyParameterDerivatives(); i++) { for (int i = 0; i < force.getNumEnergyParameterDerivatives(); i++) {
string param = force.getEnergyParameterDerivativeName(i); string param = force.getEnergyParameterDerivativeName(i);
energyParamDerivNames.push_back(param); energyParamDerivNames.push_back(param);
energyParamDerivExpressions.push_back(expression.differentiate(param).createCompiledExpression()); energyParamDerivExpressions.push_back(energyExpression.differentiate(param));
} }
for (auto& name : computedValueNames) { for (auto& name : computedValueNames) {
pairVariables.insert(name+"1"); pairVariables.insert(name+"1");
pairVariables.insert(name+"2"); pairVariables.insert(name+"2");
} }
validateVariables(expression.getRootNode(), pairVariables); validateVariables(energyExpression.getRootNode(), pairVariables);
// Delete the custom functions. // Delete the custom functions.
...@@ -967,8 +966,9 @@ void CpuCalcCustomNonbondedForceKernel::createInteraction(const CustomNonbondedF ...@@ -967,8 +966,9 @@ void CpuCalcCustomNonbondedForceKernel::createInteraction(const CustomNonbondedF
// Create the object that computes the interaction. // Create the object that computes the interaction.
nonbonded = new CpuCustomNonbondedForce(energyExpression, forceExpression, parameterNames, exclusions, energyParamDerivExpressions, nonbonded = createCpuCustomNonbondedForce(data.threads);
computedValueNames, computedValueExpressions, data.threads); nonbonded->initialize(energyExpression, forceExpression, parameterNames, exclusions, energyParamDerivExpressions,
computedValueNames, computedValueExpressions);
if (interactionGroups.size() > 0) if (interactionGroups.size() > 0)
nonbonded->setInteractionGroups(interactionGroups); nonbonded->setInteractionGroups(interactionGroups);
} }
......
...@@ -26,29 +26,13 @@ ...@@ -26,29 +26,13 @@
#include "openmm/OpenMMException.h" #include "openmm/OpenMMException.h"
#ifdef __AVX__ #ifdef __AVX__
#include "openmm/internal/vectorizeAvx.h" #include "openmm/internal/vectorizeAvx.h"
bool isAvxSupported() {
// Make sure the CPU supports AVX.
int cpuInfo[4];
cpuid(cpuInfo, 0);
if (cpuInfo[0] >= 1) {
cpuid(cpuInfo, 1);
return ((cpuInfo[2] & ((int) 1 << 28)) != 0);
}
return false;
}
OpenMM::CpuNonbondedForce* createCpuNonbondedForceAvx() { OpenMM::CpuNonbondedForce* createCpuNonbondedForceAvx() {
return new OpenMM::CpuNonbondedForceFvec<fvec8>(); return new OpenMM::CpuNonbondedForceFvec<fvec8>();
} }
#else #else
bool isAvxSupported() {
return false;
}
OpenMM::CpuNonbondedForce* createCpuNonbondedForceAvx() { OpenMM::CpuNonbondedForce* createCpuNonbondedForceAvx() {
throw OpenMM::OpenMMException("Internal error: OpenMM was compiled without AVX support"); throw OpenMM::OpenMMException("Internal error: OpenMM was compiled without AVX support");
} }
......
/* Portions copyright (c) 2006-2015 Stanford University and Simbios. /* Portions copyright (c) 2006-2022 Stanford University and Simbios.
* Contributors: Daniel Towner * Contributors: Daniel Towner, Peter Eastman
* *
* Permission is hereby granted, free of charge, to any person obtaining * Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the * a copy of this software and associated documentation files (the
...@@ -23,12 +23,12 @@ ...@@ -23,12 +23,12 @@
*/ */
#include "CpuNonbondedForceFvec.h" #include "CpuNonbondedForceFvec.h"
#include "openmm/internal/hardware.h"
OpenMM::CpuNonbondedForce* createCpuNonbondedForceVec4(); OpenMM::CpuNonbondedForce* createCpuNonbondedForceVec4();
OpenMM::CpuNonbondedForce* createCpuNonbondedForceAvx(); OpenMM::CpuNonbondedForce* createCpuNonbondedForceAvx();
OpenMM::CpuNonbondedForce* createCpuNonbondedForceAvx2(); OpenMM::CpuNonbondedForce* createCpuNonbondedForceAvx2();
bool isAvxSupported();
bool isAvx2Supported(); bool isAvx2Supported();
#include <iostream> #include <iostream>
...@@ -41,10 +41,3 @@ OpenMM::CpuNonbondedForce* createCpuNonbondedForceVec() { ...@@ -41,10 +41,3 @@ OpenMM::CpuNonbondedForce* createCpuNonbondedForceVec() {
else else
return createCpuNonbondedForceVec4(); return createCpuNonbondedForceVec4();
} }
int getVecBlockSize() {
if (isAvx2Supported() || isAvxSupported())
return 8;
else
return 4;
}
...@@ -165,14 +165,9 @@ CpuPlatform::PlatformData::~PlatformData() { ...@@ -165,14 +165,9 @@ CpuPlatform::PlatformData::~PlatformData() {
delete neighborList; delete neighborList;
} }
/**
* Return how much vectorisation is supported for host platform.
*/
int getVecBlockSize();
void CpuPlatform::PlatformData::requestNeighborList(double cutoffDistance, double padding, bool useExclusions, const vector<set<int> >& exclusionList) { void CpuPlatform::PlatformData::requestNeighborList(double cutoffDistance, double padding, bool useExclusions, const vector<set<int> >& exclusionList) {
if (neighborList == NULL) if (neighborList == NULL)
neighborList = new CpuNeighborList(getVecBlockSize()); neighborList = new CpuNeighborList(getVectorWidth());
if (cutoffDistance > cutoff) if (cutoffDistance > cutoff)
cutoff = cutoffDistance; cutoff = cutoffDistance;
if (cutoffDistance+padding > paddedCutoff) if (cutoffDistance+padding > paddedCutoff)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment