Commit e22808f3 authored by peastman's avatar peastman
Browse files

Created AVX implementation of CPU nonbonded force

parent ddad7c6c
......@@ -132,6 +132,12 @@ public:
operator __m256i() const {
return val;
}
ivec4 lowerVec() const {
return _mm256_castsi256_si128(val);
}
ivec4 upperVec() const {
return _mm256_extractf128_si256(val, 1);
}
void store(int* v) const {
_mm256_storeu_si256((__m256i*) v, val);
}
......
......@@ -45,8 +45,7 @@ class OPENMM_EXPORT_CPU CpuNeighborList {
public:
class ThreadTask;
class Voxels;
static const int BlockSize;
CpuNeighborList();
CpuNeighborList(int blockSize);
void computeNeighborList(int numAtoms, const AlignedArray<float>& atomLocations, const std::vector<std::set<int> >& exclusions,
const float* periodicBoxSize, bool usePeriodic, float maxDistance, ThreadPool& threads);
int getNumBlocks() const;
......@@ -59,6 +58,7 @@ public:
void threadComputeNeighborList(ThreadPool& threads, int threadIndex);
void runThread(int index);
private:
int blockSize;
std::vector<int> sortedAtoms;
std::vector<std::vector<int> > blockNeighbors;
std::vector<std::vector<char> > blockExclusions;
......
/* Portions copyright (c) 2006-2013 Stanford University and Simbios.
* Contributors: Pande Group
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject
* to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef OPENMM_CPU_NONBONDED_FORCE_H__
#define OPENMM_CPU_NONBONDED_FORCE_H__
#include "AlignedArray.h"
#include "CpuNeighborList.h"
#include "ReferencePairIxn.h"
#include "openmm/internal/ThreadPool.h"
#include "openmm/internal/vectorize8.h"
#include <set>
#include <utility>
#include <vector>
// ---------------------------------------------------------------------------------------
namespace OpenMM {
class CpuNonbondedForceVec8 {
public:
class ComputeDirectTask;
/**---------------------------------------------------------------------------------------
Constructor
--------------------------------------------------------------------------------------- */
CpuNonbondedForceVec8();
/**---------------------------------------------------------------------------------------
Set the force to use a cutoff.
@param distance the cutoff distance
@param neighbors the neighbor list to use
@param solventDielectric the dielectric constant of the bulk solvent
--------------------------------------------------------------------------------------- */
void setUseCutoff(float distance, const CpuNeighborList& neighbors, float solventDielectric);
/**---------------------------------------------------------------------------------------
Set the force to use a switching function on the Lennard-Jones interaction.
@param distance the switching distance
--------------------------------------------------------------------------------------- */
void setUseSwitchingFunction(float distance);
/**---------------------------------------------------------------------------------------
Set the force to use periodic boundary conditions. This requires that a cutoff has
already been set, and the smallest side of the periodic box is at least twice the cutoff
distance.
@param boxSize the X, Y, and Z widths of the periodic box
--------------------------------------------------------------------------------------- */
void setPeriodic(float* periodicBoxSize);
/**---------------------------------------------------------------------------------------
Set the force to use Ewald summation.
@param alpha the Ewald separation parameter
@param kmaxx the largest wave vector in the x direction
@param kmaxy the largest wave vector in the y direction
@param kmaxz the largest wave vector in the z direction
--------------------------------------------------------------------------------------- */
void setUseEwald(float alpha, int kmaxx, int kmaxy, int kmaxz);
/**---------------------------------------------------------------------------------------
Set the force to use Particle-Mesh Ewald (PME) summation.
@param alpha the Ewald separation parameter
@param gridSize the dimensions of the mesh
--------------------------------------------------------------------------------------- */
void setUsePME(float alpha, int meshSize[3]);
/**---------------------------------------------------------------------------------------
Calculate Ewald ixn
@param numberOfAtoms number of atoms
@param posq atom coordinates and charges
@param atomCoordinates atom coordinates (in format needed by PME)
@param atomParameters atom parameters (sigma/2, 2*sqrt(epsilon))
@param exclusions atom exclusion indices
exclusions[atomIndex] contains the list of exclusions for that atom
@param forces force array (forces added)
@param totalEnergy total energy
--------------------------------------------------------------------------------------- */
void calculateReciprocalIxn(int numberOfAtoms, float* posq, const std::vector<RealVec>& atomCoordinates,
const std::vector<std::pair<float, float> >& atomParameters, const std::vector<std::set<int> >& exclusions,
std::vector<RealVec>& forces, double* totalEnergy) const;
/**---------------------------------------------------------------------------------------
Calculate LJ Coulomb pair ixn
@param numberOfAtoms number of atoms
@param posq atom coordinates and charges
@param atomCoordinates atom coordinates (periodic boundary conditions not applied)
@param atomParameters atom parameters (sigma/2, 2*sqrt(epsilon))
@param exclusions atom exclusion indices
exclusions[atomIndex] contains the list of exclusions for that atom
@param forces force array (forces added)
@param totalEnergy total energy
@param threads the thread pool to use
--------------------------------------------------------------------------------------- */
void calculateDirectIxn(int numberOfAtoms, float* posq, const std::vector<RealVec>& atomCoordinates, const std::vector<std::pair<float, float> >& atomParameters,
const std::vector<std::set<int> >& exclusions, std::vector<AlignedArray<float> >& threadForce, double* totalEnergy, ThreadPool& threads);
/**
* This routine contains the code executed by each thread.
*/
void threadComputeDirect(ThreadPool& threads, int threadIndex);
private:
bool cutoff;
bool useSwitch;
bool periodic;
bool ewald;
bool pme;
bool tableIsValid;
const CpuNeighborList* neighborList;
float periodicBoxSize[3];
float cutoffDistance, switchingDistance;
float krf, crf;
float alphaEwald;
int numRx, numRy, numRz;
int meshDim[3];
std::vector<float> ewaldScaleTable;
float ewaldDX, ewaldDXInv;
std::vector<double> threadEnergy;
// The following variables are used to make information accessible to the individual threads.
int numberOfAtoms;
float* posq;
RealVec const* atomCoordinates;
std::pair<float, float> const* atomParameters;
std::set<int> const* exclusions;
std::vector<AlignedArray<float> >* threadForce;
bool includeEnergy;
void* atomicCounter;
static const float TWO_OVER_SQRT_PI;
static const int NUM_TABLE_POINTS;
/**---------------------------------------------------------------------------------------
Calculate LJ Coulomb pair ixn between two atoms
@param atom1 the index of the first atom
@param atom2 the index of the second atom
@param forces force array (forces added)
@param totalEnergy total energy
--------------------------------------------------------------------------------------- */
void calculateOneIxn(int atom1, int atom2, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize);
/**---------------------------------------------------------------------------------------
Calculate all the interactions for one atom block.
@param blockIndex the index of the atom block
@param forces force array (forces added)
@param totalEnergy total energy
--------------------------------------------------------------------------------------- */
void calculateBlockIxn(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize);
/**---------------------------------------------------------------------------------------
Calculate all the interactions for one atom block.
@param blockIndex the index of the atom block
@param forces force array (forces added)
@param totalEnergy total energy
--------------------------------------------------------------------------------------- */
void calculateBlockEwaldIxn(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize);
/**
* Compute the displacement and squared distance between two points, optionally using
* periodic boundary conditions.
*/
void getDeltaR(const fvec4& posI, const fvec4& posJ, fvec4& deltaR, float& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const;
/**
* Compute the displacement and squared distance between a collection of points, optionally using
* periodic boundary conditions.
*/
void getDeltaR(const fvec4& posI, const fvec8& x, const fvec8& y, const fvec8& z, fvec8& dx, fvec8& dy, fvec8& dz, fvec8& r2, bool periodic, const fvec4& boxSize, const fvec4& invBoxSize) const;
/**
* Compute a fast approximation to erfc(x).
*/
static fvec8 erfcApprox(fvec8 x);
/**
* Create a lookup table for the scale factor used with Ewald and PME.
*/
void tabulateEwaldScaleFactor();
/**
* Evaluate the scale factor used with Ewald and PME: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
*/
fvec8 ewaldScaleFunction(fvec8 x);
};
} // namespace OpenMM
// ---------------------------------------------------------------------------------------
#endif // OPENMM_CPU_NONBONDED_FORCE_H__
......@@ -43,8 +43,6 @@ using namespace std;
namespace OpenMM {
const int CpuNeighborList::BlockSize = 4;
class VoxelIndex
{
public:
......@@ -62,8 +60,8 @@ public:
*/
class CpuNeighborList::Voxels {
public:
Voxels(float vsx, float vsy, float minx, float maxx, float miny, float maxy, const float* periodicBoxSize, bool usePeriodic) :
voxelSizeX(vsx), voxelSizeY(vsy), minx(minx), maxx(maxx), miny(miny), maxy(maxy), periodicBoxSize(periodicBoxSize), usePeriodic(usePeriodic) {
Voxels(int blockSize, float vsx, float vsy, float minx, float maxx, float miny, float maxy, const float* periodicBoxSize, bool usePeriodic) :
blockSize(blockSize), voxelSizeX(vsx), voxelSizeY(vsy), minx(minx), maxx(maxx), miny(miny), maxy(maxy), periodicBoxSize(periodicBoxSize), usePeriodic(usePeriodic) {
if (usePeriodic) {
nx = (int) floorf(periodicBoxSize[0]/voxelSizeX+0.5f);
ny = (int) floorf(periodicBoxSize[1]/voxelSizeY+0.5f);
......@@ -156,7 +154,7 @@ public:
return VoxelIndex(x, y);
}
void getNeighbors(vector<int>& neighbors, int blockIndex, fvec4 blockCenter, fvec4 blockWidth, const vector<int>& sortedAtoms, vector<char>& exclusions, float maxDistance, const vector<int> blockAtoms, const float* atomLocations) const {
void getNeighbors(vector<int>& neighbors, int blockIndex, fvec4 blockCenter, fvec4 blockWidth, const vector<int>& sortedAtoms, vector<char>& exclusions, float maxDistance, const vector<int>& blockAtoms, const float* atomLocations, const vector<VoxelIndex>& atomVoxelIndex) const {
neighbors.resize(0);
exclusions.resize(0);
fvec4 boxSize(periodicBoxSize[0], periodicBoxSize[1], periodicBoxSize[2], 0);
......@@ -175,9 +173,6 @@ public:
float centerPos[4];
blockCenter.store(centerPos);
VoxelIndex centerVoxelIndex = getVoxelIndex(centerPos);
VoxelIndex atomVoxelIndex[BlockSize];
for (int i = 0; i < (int) blockAtoms.size(); i++)
atomVoxelIndex[i] = getVoxelIndex(&atomLocations[4*blockAtoms[i]]);
int startx = centerVoxelIndex.x-dIndexX;
int starty = centerVoxelIndex.y-dIndexY;
int endx = centerVoxelIndex.x+dIndexX;
......@@ -193,7 +188,7 @@ public:
endx = min(endx, nx-1);
endy = min(endy, ny-1);
}
int lastSortedIndex = BlockSize*(blockIndex+1);
int lastSortedIndex = blockSize*(blockIndex+1);
VoxelIndex voxelIndex(0, 0);
for (int x = startx; x <= endx; ++x) {
voxelIndex.x = x;
......@@ -300,10 +295,12 @@ public:
// Add this atom to the list of neighbors.
neighbors.push_back(sortedAtoms[sortedIndex]);
if (sortedIndex < BlockSize*blockIndex)
if (sortedIndex < blockSize*blockIndex)
exclusions.push_back(0);
else
exclusions.push_back(0xF & (0xF<<(sortedIndex-BlockSize*blockIndex)));
else {
int mask = (1<<blockSize)-1;
exclusions.push_back(mask & (mask<<(sortedIndex-blockSize*blockIndex)));
}
}
}
}
......@@ -311,6 +308,7 @@ public:
}
private:
int blockSize;
float voxelSizeX, voxelSizeY;
float minx, maxx, miny, maxy;
int nx, ny;
......@@ -329,12 +327,12 @@ public:
CpuNeighborList& owner;
};
CpuNeighborList::CpuNeighborList() {
CpuNeighborList::CpuNeighborList(int blockSize) : blockSize(blockSize) {
}
void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float>& atomLocations, const vector<set<int> >& exclusions,
const float* periodicBoxSize, bool usePeriodic, float maxDistance, ThreadPool& threads) {
int numBlocks = (numAtoms+BlockSize-1)/BlockSize;
int numBlocks = (numAtoms+blockSize-1)/blockSize;
blockNeighbors.resize(numBlocks);
blockExclusions.resize(numBlocks);
sortedAtoms.resize(numAtoms);
......@@ -381,7 +379,7 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
edgeSizeX = 0.6f*periodicBoxSize[0]/floorf(periodicBoxSize[0]/maxDistance);
edgeSizeY = 0.6f*periodicBoxSize[1]/floorf(periodicBoxSize[1]/maxDistance);
}
Voxels voxels(edgeSizeX, edgeSizeY, minx, maxx, miny, maxy, periodicBoxSize, usePeriodic);
Voxels voxels(blockSize, edgeSizeX, edgeSizeY, minx, maxx, miny, maxy, periodicBoxSize, usePeriodic);
for (int i = 0; i < numAtoms; i++) {
int atomIndex = atomBins[i].second;
sortedAtoms[i] = atomIndex;
......@@ -397,9 +395,9 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
// Add padding atoms to fill up the last block.
int numPadding = numBlocks*BlockSize-numAtoms;
int numPadding = numBlocks*blockSize-numAtoms;
if (numPadding > 0) {
char mask = (0xF0 >> numPadding) & 0xF;
char mask = ((0xFFFF-(1<<blockSize)+1) >> numPadding);
for (int i = 0; i < numPadding; i++)
sortedAtoms.push_back(0);
vector<char>& exc = blockExclusions[blockExclusions.size()-1];
......@@ -409,7 +407,7 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
}
int CpuNeighborList::getNumBlocks() const {
return sortedAtoms.size()/BlockSize;
return sortedAtoms.size()/blockSize;
}
const std::vector<int>& CpuNeighborList::getSortedAtoms() const {
......@@ -446,14 +444,18 @@ void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadI
int numBlocks = blockNeighbors.size();
vector<int> blockAtoms;
vector<VoxelIndex> atomVoxelIndex;
for (int i = threadIndex; i < numBlocks; i += numThreads) {
// Find the atoms in this block and compute their bounding box.
int firstIndex = BlockSize*i;
int atomsInBlock = min(BlockSize, numAtoms-firstIndex);
int firstIndex = blockSize*i;
int atomsInBlock = min(blockSize, numAtoms-firstIndex);
blockAtoms.resize(atomsInBlock);
for (int j = 0; j < atomsInBlock; j++)
atomVoxelIndex.resize(atomsInBlock);
for (int j = 0; j < atomsInBlock; j++) {
blockAtoms[j] = sortedAtoms[firstIndex+j];
atomVoxelIndex[j] = voxels->getVoxelIndex(&atomLocations[4*blockAtoms[j]]);
}
fvec4 minPos(&atomLocations[4*sortedAtoms[firstIndex]]);
fvec4 maxPos = minPos;
for (int j = 1; j < atomsInBlock; j++) {
......@@ -461,7 +463,7 @@ void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadI
minPos = min(minPos, pos);
maxPos = max(maxPos, pos);
}
voxels->getNeighbors(blockNeighbors[i], i, (maxPos+minPos)*0.5f, (maxPos-minPos)*0.5f, sortedAtoms, blockExclusions[i], maxDistance, blockAtoms, atomLocations);
voxels->getNeighbors(blockNeighbors[i], i, (maxPos+minPos)*0.5f, (maxPos-minPos)*0.5f, sortedAtoms, blockExclusions[i], maxDistance, blockAtoms, atomLocations, atomVoxelIndex);
// Record the exclusions for this block.
......
This diff is collapsed.
......@@ -51,6 +51,7 @@ void testNeighborList(bool periodic) {
const int numParticles = 500;
const float cutoff = 2.0f;
const float boxSize[3] = {20.0f, 15.0f, 22.0f};
const int blockSize = 8;
OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt);
AlignedArray<float> positions(4*numParticles);
......@@ -66,15 +67,15 @@ void testNeighborList(bool periodic) {
}
}
ThreadPool threads;
CpuNeighborList neighborList;
CpuNeighborList neighborList(blockSize);
neighborList.computeNeighborList(numParticles, positions, exclusions, boxSize, periodic, cutoff, threads);
// Convert the neighbor list to a set for faster lookup.
set<pair<int, int> > neighbors;
for (int i = 0; i < (int) neighborList.getSortedAtoms().size(); i++) {
int blockIndex = i/CpuNeighborList::BlockSize;
int indexInBlock = i-blockIndex*CpuNeighborList::BlockSize;
int blockIndex = i/blockSize;
int indexInBlock = i-blockIndex*blockSize;
char mask = 1<<indexInBlock;
for (int j = 0; j < (int) neighborList.getBlockExclusions(blockIndex).size(); j++) {
if ((neighborList.getBlockExclusions(blockIndex)[j] & mask) == 0) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment