"vscode:/vscode.git/clone" did not exist on "983513e6635ad904fbc06d7710b4d3248880abe9"
Unverified Commit c8981916 authored by Peter Eastman's avatar Peter Eastman Committed by GitHub
Browse files

Vectorized CpuCustomNonbondedForce (#3568)

* Began vectorizing CustomNonbondedForce

* Refactored CpuCustomNonbondedForce to support multiple vector sizes

* AVX implementation of CpuCustomNonbondedForce

* Fixed compilation errors
parent fe21d5ee
......@@ -580,7 +580,7 @@ void CompiledVectorExpression::generateJitCode() {
CodeHolder code;
code.init(runtime.environment());
x86::Compiler c(&code);
FuncNode* funcNode = c.addFunc(FuncSignatureT<double>());
FuncNode* funcNode = c.addFunc(FuncSignatureT<void>());
funcNode->frame().setAvxEnabled();
vector<x86::Ymm> workspaceVar(workspace.size()/width);
for (int i = 0; i < (int) workspaceVar.size(); i++)
......
......@@ -36,9 +36,6 @@
* This file defines a collection of functions for querying the specific hardware being used.
*/
/**
* Get the number of CPU cores available.
*/
#ifdef __APPLE__
#include <sys/sysctl.h>
#include <dlfcn.h>
......@@ -56,6 +53,9 @@
#endif
#endif
/**
* Get the number of CPU cores available.
*/
static int getNumProcessors() {
#ifdef __APPLE__
int ncpu;
......@@ -95,7 +95,7 @@ static int getNumProcessors() {
#else
#if !defined(__ANDROID__) && !defined(__PNACL__) && !defined(__PPC__) \
&& !defined(__ARM__) && !defined(__ARM64__)
static void cpuid(int cpuInfo[4], int infoType){
static void cpuid(int cpuInfo[4], int infoType) {
#ifdef __LP64__
__asm__ __volatile__ (
"cpuid":
......@@ -119,7 +119,33 @@ static int getNumProcessors() {
);
#endif
}
#endif
#else
static void cpuid(int cpuInfo[4], int infoType) {
cpuInfo[0] = cpuInfo[1] = cpuInfo[2] = 0;
}
#endif
#endif
/**
* Get whether this is an x86 CPU that supports AVX.
*/
static bool isAvxSupported() {
int cpuInfo[4];
cpuid(cpuInfo, 0);
if (cpuInfo[0] >= 1) {
cpuid(cpuInfo, 1);
return ((cpuInfo[2] & ((int) 1 << 28)) != 0);
}
return false;
}
/**
* Get the maximum supported size for vectors in multiples of four bytes. This
* is the number of int or float values that can be contained in a vector.
*/
static int getVectorWidth() {
if (isAvxSupported())
return 8;
return 4;
}
#endif // OPENMM_HARDWARE_H_
......@@ -30,6 +30,8 @@
#include "openmm/internal/CompiledExpressionSet.h"
#include "openmm/internal/ThreadPool.h"
#include "openmm/internal/vectorize.h"
#include "lepton/CompiledVectorExpression.h"
#include "lepton/ParsedExpression.h"
#include <atomic>
#include <map>
#include <set>
......@@ -37,21 +39,21 @@
#include <vector>
namespace OpenMM {
class CpuCustomNonbondedForce {
public:
class CpuCustomNonbondedForce {
public:
/**---------------------------------------------------------------------------------------
Constructor
--------------------------------------------------------------------------------------- */
CpuCustomNonbondedForce(const Lepton::CompiledExpression& energyExpression, const Lepton::CompiledExpression& forceExpression,
const std::vector<std::string>& parameterNames, const std::vector<std::set<int> >& exclusions,
const std::vector<Lepton::CompiledExpression> energyParamDerivExpressions,
const std::vector<std::string>& computedValueNames, const std::vector<Lepton::CompiledExpression> computedValueExpressions,
ThreadPool& threads);
CpuCustomNonbondedForce(ThreadPool& threads);
void initialize(const Lepton::ParsedExpression& energyExpression, const Lepton::ParsedExpression& forceExpression,
const std::vector<std::string>& parameterNames, const std::vector<std::set<int> >& exclusions,
const std::vector<Lepton::ParsedExpression> energyParamDerivExpressions,
const std::vector<std::string>& computedValueNames, const std::vector<Lepton::ParsedExpression> computedValueExpressions);
/**---------------------------------------------------------------------------------------
......@@ -59,7 +61,7 @@ class CpuCustomNonbondedForce {
--------------------------------------------------------------------------------------- */
~CpuCustomNonbondedForce();
virtual ~CpuCustomNonbondedForce();
/**---------------------------------------------------------------------------------------
......@@ -123,7 +125,7 @@ class CpuCustomNonbondedForce {
void calculatePairIxn(int numberOfAtoms, float* posq, std::vector<OpenMM::Vec3>& atomCoordinates, std::vector<std::vector<double> >& atomParameters,
const std::map<std::string, double>& globalParameters, std::vector<AlignedArray<float> >& threadForce,
bool includeForce, bool includeEnergy, double& totalEnergy, double* energyParamDerivs);
private:
protected:
class ThreadData;
bool cutoff;
......@@ -137,7 +139,7 @@ private:
AlignedArray<fvec4> periodicBoxVec4;
double cutoffDistance, switchingDistance;
ThreadPool& threads;
const std::vector<std::set<int> > exclusions;
std::vector<std::set<int> > exclusions;
std::vector<ThreadData*> threadData;
std::vector<std::string> paramNames, computedValueNames;
std::vector<std::pair<int, int> > groupInteractions;
......@@ -167,10 +169,22 @@ private:
* @param forces force array (forces added)
* @param totalEnergy total energy
* @param boxSize the size of the periodic box
* @param boxSize the inverse size of the periodic box
* @param invBoxSize the inverse size of the periodic box
*/
void calculateOneIxn(int atom1, int atom2, ThreadData& data, float* forces, double& totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize);
/**
* Calculate all the interactions for one block of atoms.
*
* @param data workspace for the current thread
* @param blockIndex the index of the atom block
* @param forces force array (forces added)
* @param totalEnergy total energy
* @param boxSize the size of the periodic box
* @param invBoxSize the inverse size of the periodic box
*/
virtual void calculateBlockIxn(ThreadData& data, int blockIndex, float* forces, double& totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) = 0;
/**
* Compute the displacement and squared distance between two points, optionally using
* periodic boundary conditions.
......@@ -180,19 +194,26 @@ private:
class CpuCustomNonbondedForce::ThreadData {
public:
ThreadData(const Lepton::CompiledExpression& energyExpression, const Lepton::CompiledExpression& forceExpression, const std::vector<std::string>& parameterNames,
ThreadData(const Lepton::CompiledExpression& energyExpression, const Lepton::CompiledVectorExpression& energyVecExpression,
const Lepton::CompiledExpression& forceExpression, const Lepton::CompiledVectorExpression& forceVecExpression, const std::vector<std::string>& parameterNames,
const std::vector<Lepton::CompiledExpression> energyParamDerivExpressions, const std::vector<std::string>& computedValueNames,
const std::vector<Lepton::CompiledExpression> computedValueExpressions, std::vector<std::vector<double> >& atomComputedValues);
Lepton::CompiledExpression energyExpression;
Lepton::CompiledExpression forceExpression;
Lepton::CompiledExpression energyExpression, forceExpression;
Lepton::CompiledVectorExpression energyVecExpression, forceVecExpression;
std::vector<Lepton::CompiledExpression> computedValueExpressions, energyParamDerivExpressions;
CompiledExpressionSet expressionSet;
std::vector<double> particleParam, computedValues;
std::vector<float> rvec, vecParticle1Params, vecParticle2Params, vecParticle1Values, vecParticle2Values;
double r;
std::vector<double> energyParamDerivs;
std::vector<std::vector<double> >& atomComputedValues;
};
/**
* This function is called to create an instance of an appropriate subclass for the current CPU.
*/
CpuCustomNonbondedForce* createCpuCustomNonbondedForce(ThreadPool& threads);
} // namespace OpenMM
#endif // OPENMM_CPU_CUSTOM_NONBONDED_FORCE_H__
/* Portions copyright (c) 2009-2022 Stanford University and Simbios.
* Contributors: Peter Eastman
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject
* to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef OPENMM_CPU_CUSTOM_NONBONDED_FORCE_FVEC_H__
#define OPENMM_CPU_CUSTOM_NONBONDED_FORCE_FVEC_H__
#include "CpuCustomNonbondedForce.h"
namespace OpenMM {
enum PeriodicType {NoPeriodic, PeriodicPerAtom, PeriodicPerInteraction, PeriodicTriclinic};
template <typename FVEC, int BLOCK_SIZE>
class CpuCustomNonbondedForceFvec : public CpuCustomNonbondedForce {
public:
CpuCustomNonbondedForceFvec(ThreadPool& threads) : CpuCustomNonbondedForce(threads) {
}
protected:
/**
* Calculate all the interactions for one block of atoms.
*
* @param data workspace for the current thread
* @param blockIndex the index of the atom block
* @param forces force array (forces added)
* @param totalEnergy total energy
* @param boxSize the size of the periodic box
* @param invBoxSize the inverse size of the periodic box
*/
void calculateBlockIxn(ThreadData& data, int blockIndex, float* forces, double& totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize);
template <int PERIODIC_TYPE>
void calculateBlockIxnImpl(ThreadData& data, int blockIndex, float* forces, double& totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize, const fvec4& blockCenter);
/**
* Compute the displacement and squared distance between a collection of points, optionally using
* periodic boundary conditions.
*/
template <int PERIODIC_TYPE>
void getDeltaR(const fvec4& posI, const FVEC& x, const FVEC& y, const FVEC& z, FVEC& dx, FVEC& dy, FVEC& dz, FVEC& r2, const fvec4& boxSize, const fvec4& invBoxSize) const;
};
template<typename FVEC, int BLOCK_SIZE>
void CpuCustomNonbondedForceFvec<FVEC, BLOCK_SIZE>::calculateBlockIxn(ThreadData& data, int blockIndex, float* forces, double& totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) {
// Determine whether we need to apply periodic boundary conditions.
PeriodicType periodicType;
fvec4 blockCenter;
if (!periodic) {
periodicType = NoPeriodic;
blockCenter = 0.0f;
}
else {
const int32_t* blockAtom = &neighborList->getSortedAtoms()[BLOCK_SIZE*blockIndex];
float minx, maxx, miny, maxy, minz, maxz;
minx = maxx = posq[4*blockAtom[0]];
miny = maxy = posq[4*blockAtom[0]+1];
minz = maxz = posq[4*blockAtom[0]+2];
for (int i = 1; i < BLOCK_SIZE; i++) {
minx = std::min(minx, posq[4*blockAtom[i]]);
maxx = std::max(maxx, posq[4*blockAtom[i]]);
miny = std::min(miny, posq[4*blockAtom[i]+1]);
maxy = std::max(maxy, posq[4*blockAtom[i]+1]);
minz = std::min(minz, posq[4*blockAtom[i]+2]);
maxz = std::max(maxz, posq[4*blockAtom[i]+2]);
}
blockCenter = fvec4(0.5f*(minx+maxx), 0.5f*(miny+maxy), 0.5f*(minz+maxz), 0.0f);
if (!(minx < cutoffDistance || miny < cutoffDistance || minz < cutoffDistance ||
maxx > boxSize[0]-cutoffDistance || maxy > boxSize[1]-cutoffDistance || maxz > boxSize[2]-cutoffDistance))
periodicType = NoPeriodic;
else if (triclinic)
periodicType = PeriodicTriclinic;
else if (0.5f*(boxSize[0]-(maxx-minx)) >= cutoffDistance &&
0.5f*(boxSize[1]-(maxy-miny)) >= cutoffDistance &&
0.5f*(boxSize[2]-(maxz-minz)) >= cutoffDistance)
periodicType = PeriodicPerAtom;
else
periodicType = PeriodicPerInteraction;
}
// Call the appropriate version depending on what calculation is required for periodic boundary conditions.
if (periodicType == NoPeriodic)
calculateBlockIxnImpl<NoPeriodic>(data, blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
else if (periodicType == PeriodicPerAtom)
calculateBlockIxnImpl<PeriodicPerAtom>(data, blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
else if (periodicType == PeriodicPerInteraction)
calculateBlockIxnImpl<PeriodicPerInteraction>(data, blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
else if (periodicType == PeriodicTriclinic)
calculateBlockIxnImpl<PeriodicTriclinic>(data, blockIndex, forces, totalEnergy, boxSize, invBoxSize, blockCenter);
}
template<typename FVEC, int BLOCK_SIZE>
template <int PERIODIC_TYPE>
void CpuCustomNonbondedForceFvec<FVEC, BLOCK_SIZE>::calculateBlockIxnImpl(ThreadData& data, int blockIndex, float* forces, double& totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize, const fvec4& blockCenter) {
// Load the positions and parameters of the atoms in the block.
const int32_t* blockAtom = &neighborList->getSortedAtoms()[BLOCK_SIZE*blockIndex];
fvec4 blockAtomPosq[BLOCK_SIZE];
FVEC blockAtomForceX(0.0f), blockAtomForceY(0.0f), blockAtomForceZ(0.0f);
FVEC blockAtomX, blockAtomY, blockAtomZ, blockAtomCharge;
int numParams = paramNames.size();
int numComputed = computedValueNames.size();
for (int i = 0; i < BLOCK_SIZE; i++) {
blockAtomPosq[i] = fvec4(posq+4*blockAtom[i]);
if (PERIODIC_TYPE == PeriodicPerAtom)
blockAtomPosq[i] -= floor((blockAtomPosq[i]-blockCenter)*invBoxSize+0.5f)*boxSize;
for (int j = 0; j < numParams; j++)
data.vecParticle1Params[j*BLOCK_SIZE+i] = atomParameters[blockAtom[i]][j];
for (int j = 0; j < numComputed; j++)
data.vecParticle1Values[j*BLOCK_SIZE+i] = atomComputedValues[j][blockAtom[i]];
}
transpose(blockAtomPosq, blockAtomX, blockAtomY, blockAtomZ, blockAtomCharge);
const float invSwitchingInterval = 1/(cutoffDistance-switchingDistance);
const FVEC cutoffDistanceSquared = cutoffDistance * cutoffDistance;
// Loop over neighbors for this block.
const auto& neighbors = neighborList->getBlockNeighbors(blockIndex);
const auto& exclusions = neighborList->getBlockExclusions(blockIndex);
FVEC partialEnergy = {};
for (int i = 0; i < neighbors.size(); i++) {
// Load the next neighbor.
int atom = neighbors[i];
for (int j = 0; j < numParams; j++)
for (int k = 0; k < BLOCK_SIZE; k++)
data.vecParticle2Params[j*BLOCK_SIZE+k] = atomParameters[atom][j];
for (int j = 0; j < numComputed; j++)
for (int k = 0; k < BLOCK_SIZE; k++)
data.vecParticle2Values[j*BLOCK_SIZE+k] = atomComputedValues[j][atom];
// Compute the distances to the block atoms.
FVEC dx, dy, dz, r2;
fvec4 atomPos(posq+4*atom);
if (PERIODIC_TYPE == PeriodicPerAtom)
atomPos -= floor((atomPos-blockCenter)*invBoxSize+0.5f)*boxSize;
getDeltaR<PERIODIC_TYPE>(atomPos, blockAtomX, blockAtomY, blockAtomZ, dx, dy, dz, r2, boxSize, invBoxSize);
const auto exclNotMask = FVEC::expandBitsToMask(~exclusions[i]);
const auto include = blendZero(r2 < cutoffDistanceSquared, exclNotMask);
if (!any(include))
continue; // No interactions to compute.
// Compute the interactions.
const auto inverseR = rsqrt(r2);
const auto r = r2*inverseR;
r.store(data.rvec.data());
FVEC dEdR(data.forceVecExpression.evaluate());
FVEC energy;
if (includeEnergy)
energy = FVEC(data.energyVecExpression.evaluate());
if (useSwitch) {
const auto t = blendZero((r-switchingDistance)*invSwitchingInterval, r>switchingDistance);
const auto switchValue = 1+t*t*t*(-10.0f+t*(15.0f-t*6.0f));
const auto switchDeriv = t*t*(-30.0f+t*(60.0f-t*30.0f))*invSwitchingInterval;
dEdR = switchValue*dEdR + energy*switchDeriv;
energy *= switchValue;
}
dEdR *= inverseR;
// Accumulate forces and energies.
if (includeEnergy) {
energy = blendZero(energy, include);
partialEnergy += energy;
}
dEdR = blendZero(dEdR, include);
const auto fx = dx*dEdR;
const auto fy = dy*dEdR;
const auto fz = dz*dEdR;
blockAtomForceX -= fx;
blockAtomForceY -= fy;
blockAtomForceZ -= fz;
float* const atomForce = forces+4*atom;
const fvec4 newAtomForce = fvec4(atomForce) + reduceToVec3(fx, fy, fz);
newAtomForce.store(atomForce);
}
if (includeEnergy)
totalEnergy += reduceAdd(partialEnergy);
// Record the forces on the block atoms.
fvec4 f[BLOCK_SIZE];
transpose(blockAtomForceX, blockAtomForceY, blockAtomForceZ, 0.0f, f);
for (int j = 0; j < BLOCK_SIZE; j++)
(fvec4(forces+4*blockAtom[j])+f[j]).store(forces+4*blockAtom[j]);
}
template<typename FVEC, int BLOCK_SIZE>
template <int PERIODIC_TYPE>
void CpuCustomNonbondedForceFvec<FVEC, BLOCK_SIZE>::getDeltaR(const fvec4& posI, const FVEC& x, const FVEC& y, const FVEC& z, FVEC& dx, FVEC& dy, FVEC& dz, FVEC& r2, const fvec4& boxSize, const fvec4& invBoxSize) const {
dx = x-posI[0];
dy = y-posI[1];
dz = z-posI[2];
if (PERIODIC_TYPE == PeriodicTriclinic) {
const auto scale3 = floor(dz*recipBoxSize[2]+0.5f);
dx -= scale3*periodicBoxVectors[2][0];
dy -= scale3*periodicBoxVectors[2][1];
dz -= scale3*periodicBoxVectors[2][2];
const auto scale2 = floor(dy*recipBoxSize[1]+0.5f);
dx -= scale2*periodicBoxVectors[1][0];
dy -= scale2*periodicBoxVectors[1][1];
const auto scale1 = floor(dx*recipBoxSize[0]+0.5f);
dx -= scale1*periodicBoxVectors[0][0];
}
else if (PERIODIC_TYPE == PeriodicPerInteraction) {
dx -= round(dx*invBoxSize[0])*boxSize[0];
dy -= round(dy*invBoxSize[1])*boxSize[1];
dz -= round(dz*invBoxSize[2])*boxSize[2];
}
r2 = dx*dx + dy*dy + dz*dz;
}
} // namespace OpenMM
#endif // OPENMM_CPU_CUSTOM_NONBONDED_FORCE_FVEC_H__
......@@ -207,7 +207,6 @@ void CpuNonbondedForceFvec<FVEC>::calculateBlockIxnImpl(int blockIndex, float* f
// Ewald needs C6 data gathered from a table. Unused variable for non-ewald.
const FVEC C6s = (BLOCK_TYPE == BlockType::EWALD) ? FVEC(C6params, blockAtom) : FVEC();
const bool needPeriodic = (PERIODIC_TYPE == PeriodicPerInteraction || PERIODIC_TYPE == PeriodicTriclinic);
const float invSwitchingInterval = 1/(cutoffDistance-switchingDistance);
const FVEC cutoffDistanceSquared = cutoffDistance * cutoffDistance;
......
......@@ -8,9 +8,11 @@ ENDFOREACH(file)
IF(MSVC)
SET_SOURCE_FILES_PROPERTIES(${CMAKE_SOURCE_DIR}/platforms/cpu/src/CpuNonbondedForceAvx.cpp PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} /arch:AVX /D__AVX__")
SET_SOURCE_FILES_PROPERTIES(${CMAKE_SOURCE_DIR}/platforms/cpu/src/CpuNonbondedForceAvx2.cpp PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} /arch:AVX2 /D__AVX2__")
SET_SOURCE_FILES_PROPERTIES(${CMAKE_SOURCE_DIR}/platforms/cpu/src/CpuCustomNonbondedForceAvx.cpp PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} /arch:AVX /D__AVX__")
ELSEIF(X86)
SET_SOURCE_FILES_PROPERTIES(${CMAKE_SOURCE_DIR}/platforms/cpu/src/CpuNonbondedForceAvx.cpp PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -mavx")
SET_SOURCE_FILES_PROPERTIES(${CMAKE_SOURCE_DIR}/platforms/cpu/src/CpuNonbondedForceAvx2.cpp PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -mavx2 -mfma")
SET_SOURCE_FILES_PROPERTIES(${CMAKE_SOURCE_DIR}/platforms/cpu/src/CpuCustomNonbondedForceAvx.cpp PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -mavx")
ENDIF()
ADD_LIBRARY(${SHARED_TARGET} SHARED ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_ABS_INCLUDE_FILES})
......
......@@ -22,21 +22,21 @@
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include <string.h>
#include <sstream>
#include "SimTKOpenMMUtilities.h"
#include "ReferenceForce.h"
#include "CpuCustomNonbondedForce.h"
#include "openmm/internal/hardware.h"
#include <cmath>
using namespace OpenMM;
using namespace Lepton;
using namespace std;
CpuCustomNonbondedForce::ThreadData::ThreadData(const Lepton::CompiledExpression& energyExpression, const Lepton::CompiledExpression& forceExpression,
const vector<string>& parameterNames, const std::vector<Lepton::CompiledExpression> energyParamDerivExpressions,
const vector<string>& computedValueNames, const vector<Lepton::CompiledExpression> computedValueExpressions,
CpuCustomNonbondedForce::ThreadData::ThreadData(const CompiledExpression& energyExpression, const CompiledVectorExpression& energyVecExpression,
const CompiledExpression& forceExpression, const CompiledVectorExpression& forceVecExpression,
const vector<string>& parameterNames, const std::vector<CompiledExpression> energyParamDerivExpressions,
const vector<string>& computedValueNames, const vector<CompiledExpression> computedValueExpressions,
vector<vector<double> >& atomComputedValues) :
energyExpression(energyExpression), forceExpression(forceExpression), energyParamDerivExpressions(energyParamDerivExpressions),
energyExpression(energyExpression), energyVecExpression(energyVecExpression), forceExpression(forceExpression),
forceVecExpression(forceVecExpression), energyParamDerivExpressions(energyParamDerivExpressions),
computedValueExpressions(computedValueExpressions), atomComputedValues(atomComputedValues) {
// Prepare for passing variables to expressions.
......@@ -44,19 +44,13 @@ CpuCustomNonbondedForce::ThreadData::ThreadData(const Lepton::CompiledExpression
variableLocations["r"] = &r;
particleParam.resize(2*parameterNames.size());
computedValues.resize(2*computedValueNames.size());
for (int i = 0; i < (int) parameterNames.size(); i++) {
for (int j = 0; j < 2; j++) {
stringstream name;
name << parameterNames[i] << (j+1);
variableLocations[name.str()] = &particleParam[i*2+j];
}
for (int i = 0; i < parameterNames.size(); i++) {
variableLocations[parameterNames[i]+"1"] = &particleParam[i*2];
variableLocations[parameterNames[i]+"2"] = &particleParam[i*2+1];
}
for (int i = 0; i < (int) computedValueNames.size(); i++) {
for (int j = 0; j < 2; j++) {
stringstream name;
name << computedValueNames[i] << (j+1);
variableLocations[name.str()] = &computedValues[i*2+j];
}
for (int i = 0; i < computedValueNames.size(); i++) {
variableLocations[computedValueNames[i]+"1"] = &computedValues[i*2];
variableLocations[computedValueNames[i]+"2"] = &computedValues[i*2+1];
}
energyParamDerivs.resize(energyParamDerivExpressions.size());
this->energyExpression.setVariableLocations(variableLocations);
......@@ -68,6 +62,27 @@ CpuCustomNonbondedForce::ThreadData::ThreadData(const Lepton::CompiledExpression
expressionSet.registerExpression(expression);
}
// Prepare for passing variables to vectorized expressions.
map<string, float*> vecVariableLocations;
int blockSize = getVectorWidth();
rvec.resize(blockSize);
vecParticle1Params.resize(blockSize*parameterNames.size());
vecParticle2Params.resize(blockSize*parameterNames.size());
vecParticle1Values.resize(blockSize*computedValueNames.size());
vecParticle2Values.resize(blockSize*computedValueNames.size());
vecVariableLocations["r"] = rvec.data();
for (int i = 0; i < parameterNames.size(); i++) {
vecVariableLocations[parameterNames[i]+"1"] = &vecParticle1Params[i*blockSize];
vecVariableLocations[parameterNames[i]+"2"] = &vecParticle2Params[i*blockSize];
}
for (int i = 0; i < computedValueNames.size(); i++) {
vecVariableLocations[computedValueNames[i]+"1"] = &vecParticle1Values[i*blockSize];
vecVariableLocations[computedValueNames[i]+"2"] = &vecParticle2Values[i*blockSize];
}
this->energyVecExpression.setVariableLocations(vecVariableLocations);
this->forceVecExpression.setVariableLocations(vecVariableLocations);
// Prepare for passing variables to the computed value expressions.
map<string, double*> valueVariableLocations;
......@@ -79,14 +94,28 @@ CpuCustomNonbondedForce::ThreadData::ThreadData(const Lepton::CompiledExpression
}
}
CpuCustomNonbondedForce::CpuCustomNonbondedForce(const Lepton::CompiledExpression& energyExpression,
const Lepton::CompiledExpression& forceExpression, const vector<string>& parameterNames, const vector<set<int> >& exclusions,
const vector<Lepton::CompiledExpression> energyParamDerivExpressions, const vector<string>& computedValueNames,
const vector<Lepton::CompiledExpression> computedValueExpressions, ThreadPool& threads) :
cutoff(false), useSwitch(false), periodic(false), useInteractionGroups(false), paramNames(parameterNames), exclusions(exclusions),
computedValueNames(computedValueNames), threads(threads) {
CpuCustomNonbondedForce::CpuCustomNonbondedForce(ThreadPool& threads) : cutoff(false), useSwitch(false), periodic(false), useInteractionGroups(false), threads(threads) {
}
void CpuCustomNonbondedForce::initialize(const ParsedExpression& energyExpression,
const ParsedExpression& forceExpression, const vector<string>& parameterNames, const vector<set<int> >& exclusions,
const vector<ParsedExpression> energyParamDerivExpressions, const vector<string>& computedValueNames,
const vector<ParsedExpression> computedValueExpressions) {
this->paramNames = parameterNames;
this->exclusions = exclusions;
this->computedValueNames = computedValueNames;
CompiledExpression compiledEnergyExpression = energyExpression.createCompiledExpression();
CompiledExpression compiledForceExpression = forceExpression.createCompiledExpression();
CompiledVectorExpression energyVecExpression = energyExpression.createCompiledVectorExpression(getVectorWidth());
CompiledVectorExpression forceVecExpression = forceExpression.createCompiledVectorExpression(getVectorWidth());
vector<CompiledExpression> compiledDerivExpressions, compiledValueExpressions;
for (auto& exp : energyParamDerivExpressions)
compiledDerivExpressions.push_back(exp.createCompiledExpression());
for (auto& exp : computedValueExpressions)
compiledValueExpressions.push_back(exp.createCompiledExpression());
for (int i = 0; i < threads.getNumThreads(); i++)
threadData.push_back(new ThreadData(energyExpression, forceExpression, parameterNames, energyParamDerivExpressions, computedValueNames, computedValueExpressions, atomComputedValues));
threadData.push_back(new ThreadData(compiledEnergyExpression, energyVecExpression, compiledForceExpression, forceVecExpression, parameterNames,
compiledDerivExpressions, computedValueNames, compiledValueExpressions, atomComputedValues));
}
CpuCustomNonbondedForce::~CpuCustomNonbondedForce() {
......@@ -186,9 +215,27 @@ void CpuCustomNonbondedForce::calculatePairIxn(int numberOfAtoms, float* posq, v
void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int threadIndex) {
int numThreads = threads.getNumThreads();
int blockSize = getVectorWidth();
ThreadData& data = *threadData[threadIndex];
for (auto& param : *globalParameters)
for (auto& param : *globalParameters) {
data.expressionSet.setVariable(data.expressionSet.getVariableIndex(param.first), param.second);
try {
float* p = data.energyVecExpression.getVariablePointer(param.first);
for (int i = 0; i < blockSize; i++)
p[i] = param.second;
}
catch (...) {
// The expression doesn't use this parameter.
}
try {
float* p = data.forceVecExpression.getVariablePointer(param.first);
for (int i = 0; i < blockSize; i++)
p[i] = param.second;
}
catch (...) {
// The expression doesn't use this parameter.
}
}
// Process computed values for this thread's subset of interactions.
......@@ -244,20 +291,24 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread
const int32_t* blockAtom = &neighborList->getSortedAtoms()[blockSize*blockIndex];
const vector<int>& neighbors = neighborList->getBlockNeighbors(blockIndex);
const auto& exclusions = neighborList->getBlockExclusions(blockIndex);
for (int i = 0; i < (int) neighbors.size(); i++) {
int first = neighbors[i];
for (int j = 0; j < paramNames.size(); j++)
data.particleParam[j*2] = atomParameters[first][j];
for (int j = 0; j < computedValueNames.size(); j++)
data.computedValues[j*2] = atomComputedValues[j][first];
for (int k = 0; k < blockSize; k++) {
if ((exclusions[i] & (1<<k)) == 0) {
int second = blockAtom[k];
for (int j = 0; j < paramNames.size(); j++)
data.particleParam[j*2+1] = atomParameters[second][j];
for (int j = 0; j < computedValueNames.size(); j++)
data.computedValues[j*2+1] = atomComputedValues[j][second];
calculateOneIxn(first, second, data, forces, energy, boxSize, invBoxSize);
if (data.energyParamDerivs.size() == 0)
calculateBlockIxn(data, blockIndex, forces, energy, boxSize, invBoxSize);
else {
for (int i = 0; i < (int) neighbors.size(); i++) {
int first = neighbors[i];
for (int j = 0; j < paramNames.size(); j++)
data.particleParam[j*2] = atomParameters[first][j];
for (int j = 0; j < computedValueNames.size(); j++)
data.computedValues[j*2] = atomComputedValues[j][first];
for (int k = 0; k < blockSize; k++) {
if ((exclusions[i] & (1<<k)) == 0) {
int second = blockAtom[k];
for (int j = 0; j < paramNames.size(); j++)
data.particleParam[j*2+1] = atomParameters[second][j];
for (int j = 0; j < computedValueNames.size(); j++)
data.computedValues[j*2+1] = atomComputedValues[j][second];
calculateOneIxn(first, second, data, forces, energy, boxSize, invBoxSize);
}
}
}
}
......
/* Portions copyright (c) 2022 Stanford University and Simbios.
* Contributors: Peter Eastman
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject
* to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "CpuCustomNonbondedForceFvec.h"
#include "openmm/OpenMMException.h"
#ifdef __AVX__
#include "openmm/internal/vectorizeAvx.h"
OpenMM::CpuCustomNonbondedForce* createCpuCustomNonbondedForceAvx(OpenMM::ThreadPool& threads) {
return new OpenMM::CpuCustomNonbondedForceFvec<fvec8, 8>(threads);
}
#else
OpenMM::CpuCustomNonbondedForce* createCpuCustomNonbondedForceAvx(OpenMM::ThreadPool& threads) {
throw OpenMM::OpenMMException("Internal error: OpenMM was compiled without AVX support");
}
#endif
/* Portions copyright (c) 2022 Stanford University and Simbios.
* Contributors: Peter Eastman
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject
* to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "CpuCustomNonbondedForceFvec.h"
#include "openmm/internal/hardware.h"
OpenMM::CpuCustomNonbondedForce* createCpuCustomNonbondedForceVec4(OpenMM::ThreadPool& threads);
OpenMM::CpuCustomNonbondedForce* createCpuCustomNonbondedForceAvx(OpenMM::ThreadPool& threads);
OpenMM::CpuCustomNonbondedForce* OpenMM::createCpuCustomNonbondedForce(OpenMM::ThreadPool& threads) {
if (isAvxSupported())
return createCpuCustomNonbondedForceAvx(threads);
else
return createCpuCustomNonbondedForceVec4(threads);
}
/* Portions copyright (c) 2022 Stanford University and Simbios.
* Contributors: Peter Eastman
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject
* to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "CpuCustomNonbondedForceFvec.h"
OpenMM::CpuCustomNonbondedForce* createCpuCustomNonbondedForceVec4(OpenMM::ThreadPool& threads) {
return new OpenMM::CpuCustomNonbondedForceFvec<fvec4, 4>(threads);
}
......@@ -921,9 +921,8 @@ void CpuCalcCustomNonbondedForceKernel::createInteraction(const CustomNonbondedF
// Parse the various expressions used to calculate the force.
Lepton::ParsedExpression expression = Lepton::Parser::parse(force.getEnergyFunction(), functions).optimize();
Lepton::CompiledExpression energyExpression = expression.createCompiledExpression();
Lepton::CompiledExpression forceExpression = expression.differentiate("r").createCompiledExpression();
Lepton::ParsedExpression energyExpression = Lepton::Parser::parse(force.getEnergyFunction(), functions).optimize();
Lepton::ParsedExpression forceExpression = energyExpression.differentiate("r");
for (int i = 0; i < force.getNumPerParticleParameters(); i++)
parameterNames.push_back(force.getPerParticleParameterName(i));
for (int i = 0; i < force.getNumGlobalParameters(); i++) {
......@@ -940,25 +939,25 @@ void CpuCalcCustomNonbondedForceKernel::createInteraction(const CustomNonbondedF
}
particleVariables.insert(globalParameterNames.begin(), globalParameterNames.end());
pairVariables.insert(globalParameterNames.begin(), globalParameterNames.end());
vector<Lepton::CompiledExpression> computedValueExpressions, energyParamDerivExpressions;
vector<Lepton::ParsedExpression> computedValueExpressions, energyParamDerivExpressions;
for (int i = 0; i < force.getNumComputedValues(); i++) {
string name, exp;
force.getComputedValueParameters(i, name, exp);
Lepton::ParsedExpression parsed = Lepton::Parser::parse(exp, functions);
validateVariables(parsed.getRootNode(), particleVariables);
computedValueNames.push_back(name);
computedValueExpressions.push_back(parsed.createCompiledExpression());
computedValueExpressions.push_back(parsed);
}
for (int i = 0; i < force.getNumEnergyParameterDerivatives(); i++) {
string param = force.getEnergyParameterDerivativeName(i);
energyParamDerivNames.push_back(param);
energyParamDerivExpressions.push_back(expression.differentiate(param).createCompiledExpression());
energyParamDerivExpressions.push_back(energyExpression.differentiate(param));
}
for (auto& name : computedValueNames) {
pairVariables.insert(name+"1");
pairVariables.insert(name+"2");
}
validateVariables(expression.getRootNode(), pairVariables);
validateVariables(energyExpression.getRootNode(), pairVariables);
// Delete the custom functions.
......@@ -967,8 +966,9 @@ void CpuCalcCustomNonbondedForceKernel::createInteraction(const CustomNonbondedF
// Create the object that computes the interaction.
nonbonded = new CpuCustomNonbondedForce(energyExpression, forceExpression, parameterNames, exclusions, energyParamDerivExpressions,
computedValueNames, computedValueExpressions, data.threads);
nonbonded = createCpuCustomNonbondedForce(data.threads);
nonbonded->initialize(energyExpression, forceExpression, parameterNames, exclusions, energyParamDerivExpressions,
computedValueNames, computedValueExpressions);
if (interactionGroups.size() > 0)
nonbonded->setInteractionGroups(interactionGroups);
}
......
......@@ -26,29 +26,13 @@
#include "openmm/OpenMMException.h"
#ifdef __AVX__
#include "openmm/internal/vectorizeAvx.h"
bool isAvxSupported() {
// Make sure the CPU supports AVX.
int cpuInfo[4];
cpuid(cpuInfo, 0);
if (cpuInfo[0] >= 1) {
cpuid(cpuInfo, 1);
return ((cpuInfo[2] & ((int) 1 << 28)) != 0);
}
return false;
}
OpenMM::CpuNonbondedForce* createCpuNonbondedForceAvx() {
return new OpenMM::CpuNonbondedForceFvec<fvec8>();
}
#else
bool isAvxSupported() {
return false;
}
OpenMM::CpuNonbondedForce* createCpuNonbondedForceAvx() {
throw OpenMM::OpenMMException("Internal error: OpenMM was compiled without AVX support");
}
......
/* Portions copyright (c) 2006-2015 Stanford University and Simbios.
* Contributors: Daniel Towner
/* Portions copyright (c) 2006-2022 Stanford University and Simbios.
* Contributors: Daniel Towner, Peter Eastman
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
......@@ -23,12 +23,12 @@
*/
#include "CpuNonbondedForceFvec.h"
#include "openmm/internal/hardware.h"
OpenMM::CpuNonbondedForce* createCpuNonbondedForceVec4();
OpenMM::CpuNonbondedForce* createCpuNonbondedForceAvx();
OpenMM::CpuNonbondedForce* createCpuNonbondedForceAvx2();
bool isAvxSupported();
bool isAvx2Supported();
#include <iostream>
......@@ -41,10 +41,3 @@ OpenMM::CpuNonbondedForce* createCpuNonbondedForceVec() {
else
return createCpuNonbondedForceVec4();
}
int getVecBlockSize() {
if (isAvx2Supported() || isAvxSupported())
return 8;
else
return 4;
}
......@@ -165,14 +165,9 @@ CpuPlatform::PlatformData::~PlatformData() {
delete neighborList;
}
/**
* Return how much vectorisation is supported for host platform.
*/
int getVecBlockSize();
void CpuPlatform::PlatformData::requestNeighborList(double cutoffDistance, double padding, bool useExclusions, const vector<set<int> >& exclusionList) {
if (neighborList == NULL)
neighborList = new CpuNeighborList(getVecBlockSize());
neighborList = new CpuNeighborList(getVectorWidth());
if (cutoffDistance > cutoff)
cutoff = cutoffDistance;
if (cutoffDistance+padding > paddedCutoff)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment