"plugins/amoeba/vscode:/vscode.git/clone" did not exist on "8ea429504207523c64e6b82e41fdf90d4ffe55e4"
Commit a8e9c99a authored by Peter Eastman's avatar Peter Eastman
Browse files

Finished implementing cutoffs and periodic boundary conditions

parent 6b0b0a26
......@@ -29,8 +29,10 @@
#include "OpenCLForceInfo.h"
#include "OpenCLIntegrationUtilities.h"
#include "OpenCLNonbondedUtilities.h"
#include "hilbert.h"
#include "openmm/Platform.h"
#include "openmm/System.h"
#include <cmath>
#include <fstream>
#include <iostream>
#include <sstream>
......@@ -38,7 +40,7 @@
using namespace OpenMM;
using namespace std;
OpenCLContext::OpenCLContext(int numParticles, int deviceIndex) : time(0.0), stepCount(0) {
OpenCLContext::OpenCLContext(int numParticles, int deviceIndex) : time(0.0), stepCount(0), computeForceCount(0) {
try {
context = cl::Context(CL_DEVICE_TYPE_ALL);
vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
......@@ -71,6 +73,7 @@ OpenCLContext::OpenCLContext(int numParticles, int deviceIndex) : time(0.0), ste
nonbonded = new OpenCLNonbondedUtilities(*this);
posq = new OpenCLArray<mm_float4>(*this, paddedNumAtoms, "posq", true);
velm = new OpenCLArray<mm_float4>(*this, paddedNumAtoms, "velm", true);
posCellOffsets.resize(paddedNumAtoms, (mm_int4) {0, 0, 0, 0});
}
catch (cl::Error err) {
std::stringstream str;
......@@ -120,6 +123,7 @@ void OpenCLContext::initialize(const System& system) {
for (int i = 0; i < paddedNumAtoms; ++i)
(*atomIndex)[i] = i;
atomIndex->upload();
findMoleculeGroups(system);
integration = new OpenCLIntegrationUtilities(*this, system);
nonbonded->initialize(system);
}
......@@ -206,3 +210,284 @@ void OpenCLContext::reduceBuffer(OpenCLArray<mm_float4>& array, int numBuffers)
reduceFloat4Kernel.setArg<cl_int>(2, numBuffers);
executeKernel(reduceFloat4Kernel, bufferSize);
}
void OpenCLContext::tagAtomsInMolecule(int atom, int molecule, vector<int>& atomMolecule, vector<vector<int> >& atomBonds) {
// Recursively tag atoms as belonging to a particular molecule.
atomMolecule[atom] = molecule;
for (int i = 0; i < (int) atomBonds[atom].size(); i++)
if (atomMolecule[atomBonds[atom][i]] == -1)
tagAtomsInMolecule(atomBonds[atom][i], molecule, atomMolecule, atomBonds);
}
struct OpenCLContext::Molecule {
vector<int> atoms;
vector<int> constraints;
vector<vector<int> > groups;
};
void OpenCLContext::findMoleculeGroups(const System& system) {
// First make a list of every other atom to which each atom is connect by a constraint or force group.
vector<vector<int> > atomBonds(system.getNumParticles());
for (int i = 0; i < system.getNumConstraints(); i++) {
int particle1, particle2;
double distance;
system.getConstraintParameters(i, particle1, particle2, distance);
atomBonds[particle1].push_back(particle2);
atomBonds[particle2].push_back(particle1);
}
for (int i = 0; i < forces.size(); i++) {
for (int j = 0; j < forces[i]->getNumParticleGroups(); j++) {
vector<int> particles;
forces[i]->getParticlesInGroup(j, particles);
for (int k = 0; k < particles.size(); k++)
for (int m = 0; m < particles.size(); m++)
if (k != m)
atomBonds[particles[k]].push_back(particles[m]);
}
}
// Now tag atoms by which molecule they belong to.
vector<int> atomMolecule(numAtoms, -1);
int numMolecules = 0;
for (int i = 0; i < numAtoms; i++)
if (atomMolecule[i] == -1)
tagAtomsInMolecule(i, numMolecules++, atomMolecule, atomBonds);
vector<vector<int> > atomIndices(numMolecules);
for (int i = 0; i < numAtoms; i++)
atomIndices[atomMolecule[i]].push_back(i);
// Construct a description of each molecule.
vector<Molecule> molecules(numMolecules);
for (int i = 0; i < numMolecules; i++) {
molecules[i].atoms = atomIndices[i];
molecules[i].groups.resize(forces.size());
}
for (int i = 0; i < system.getNumConstraints(); i++) {
int particle1, particle2;
double distance;
system.getConstraintParameters(i, particle1, particle2, distance);
molecules[atomMolecule[particle1]].constraints.push_back(i);
}
for (int i = 0; i < forces.size(); i++)
for (int j = 0; j < forces[i]->getNumParticleGroups(); j++) {
vector<int> particles;
forces[i]->getParticlesInGroup(j, particles);
molecules[atomMolecule[particles[0]]].groups[i].push_back(j);
}
// Sort them into groups of identical molecules.
vector<Molecule> uniqueMolecules;
vector<vector<int> > moleculeInstances;
for (int molIndex = 0; molIndex < (int) molecules.size(); molIndex++) {
Molecule& mol = molecules[molIndex];
// See if it is identical to another molecule.
bool isNew = true;
for (int j = 0; j < (int) uniqueMolecules.size() && isNew; j++) {
Molecule& mol2 = uniqueMolecules[j];
bool identical = (mol.atoms.size() == mol2.atoms.size() && mol.constraints.size() == mol2.constraints.size());
// See if the atoms are identical.
int atomOffset = mol2.atoms[0]-mol.atoms[0];
for (int i = 0; i < (int) mol.atoms.size() && identical; i++) {
if (mol.atoms[i] != mol2.atoms[i]-atomOffset || system.getParticleMass(mol.atoms[i]) != system.getParticleMass(mol2.atoms[i]))
identical = false;
for (int k = 0; k < forces.size(); k++)
if (!forces[k]->areParticlesIdentical(mol.atoms[i], mol2.atoms[i]))
identical = false;
}
// See if the constraints are identical.
for (int i = 0; i < (int) mol.constraints.size() && identical; i++) {
int c1particle1, c1particle2, c2particle1, c2particle2;
double distance1, distance2;
system.getConstraintParameters(mol.constraints[i], c1particle1, c1particle2, distance1);
system.getConstraintParameters(mol2.constraints[i], c2particle1, c2particle2, distance2);
if (c1particle1 != c2particle1-atomOffset || c1particle2 != c2particle2 || distance1 != distance2)
identical = false;
}
// See if the force groups are identical.
for (int i = 0; i < forces.size() && identical; i++) {
if (mol.groups[i].size() != mol2.groups[i].size())
identical = false;
for (int k = 0; k < mol.groups[i].size() && identical; k++)
if (!forces[i]->areGroupsIdentical(mol.groups[i][k], mol2.groups[i][k]))
identical = false;
}
if (identical) {
moleculeInstances[j].push_back(mol.atoms[0]);
isNew = false;
}
}
if (isNew) {
uniqueMolecules.push_back(mol);
moleculeInstances.push_back(vector<int>());
moleculeInstances[moleculeInstances.size()-1].push_back(mol.atoms[0]);
}
}
moleculeGroups.resize(moleculeInstances.size());
for (int i = 0; i < (int) moleculeInstances.size(); i++)
{
moleculeGroups[i].instances = moleculeInstances[i];
vector<int>& atoms = uniqueMolecules[i].atoms;
moleculeGroups[i].atoms.resize(atoms.size());
for (int j = 0; j < (int) atoms.size(); j++)
moleculeGroups[i].atoms[j] = atoms[j]-atoms[0];
}
}
void OpenCLContext::reorderAtoms() {
if (numAtoms == 0 || nonbonded == NULL || !nonbonded->getUseCutoff())
return;
// Find the range of positions and the number of bins along each axis.
posq->download();
velm->download();
mm_float4 periodicBoxSize = nonbonded->getPeriodicBoxSize();
float minx = posq->get(0).x, maxx = posq->get(0).x;
float miny = posq->get(0).y, maxy = posq->get(0).y;
float minz = posq->get(0).z, maxz = posq->get(0).z;
if (nonbonded->getUsePeriodic()) {
minx = miny = minz = 0.0;
maxx = periodicBoxSize.x;
maxy = periodicBoxSize.y;
maxz = periodicBoxSize.z;
}
else {
for (int i = 1; i < numAtoms; i++) {
minx = min(minx, posq->get(i).x);
maxx = max(maxx, posq->get(i).x);
miny = min(miny, posq->get(i).y);
maxy = max(maxy, posq->get(i).y);
minz = min(minz, posq->get(i).z);
maxz = max(maxz, posq->get(i).z);
}
}
// Loop over each group of identical molecules and reorder them.
vector<int> originalIndex(numAtoms);
vector<mm_float4> newPosq(numAtoms);
vector<mm_float4> newVelm(numAtoms);
vector<mm_int4> newCellOffsets(numAtoms);
for (int group = 0; group < (int) moleculeGroups.size(); group++) {
// Find the center of each molecule.
MoleculeGroup& mol = moleculeGroups[group];
int numMolecules = mol.instances.size();
vector<int>& atoms = mol.atoms;
vector<mm_float4> molPos(numMolecules);
for (int i = 0; i < numMolecules; i++) {
molPos[i].x = 0.0f;
molPos[i].y = 0.0f;
molPos[i].z = 0.0f;
for (int j = 0; j < (int)atoms.size(); j++) {
int atom = atoms[j]+mol.instances[i];
molPos[i].x += posq->get(atom).x;
molPos[i].y += posq->get(atom).y;
molPos[i].z += posq->get(atom).z;
}
molPos[i].x /= atoms.size();
molPos[i].y /= atoms.size();
molPos[i].z /= atoms.size();
}
if (nonbonded->getUsePeriodic()) {
// Move each molecule position into the same box.
for (int i = 0; i < numMolecules; i++) {
int xcell = (int) floor(molPos[i].x/periodicBoxSize.x);
int ycell = (int) floor(molPos[i].y/periodicBoxSize.y);
int zcell = (int) floor(molPos[i].z/periodicBoxSize.z);
float dx = xcell*periodicBoxSize.x;
float dy = ycell*periodicBoxSize.y;
float dz = zcell*periodicBoxSize.z;
if (dx != 0.0f || dy != 0.0f || dz != 0.0f) {
molPos[i].x -= dx;
molPos[i].y -= dy;
molPos[i].z -= dz;
for (int j = 0; j < (int) atoms.size(); j++) {
int atom = atoms[j]+mol.instances[i];
mm_float4 p = posq->get(atom);
p.x -= dx;
p.y -= dy;
p.z -= dz;
posq->set(atom, p);
posCellOffsets[atom].x -= xcell;
posCellOffsets[atom].y -= ycell;
posCellOffsets[atom].z -= zcell;
}
}
}
}
// Select a bin for each molecule, then sort them by bin.
bool useHilbert = (numMolecules > 5000 || atoms.size() > 8); // For small systems, a simple zigzag curve works better than a Hilbert curve.
float binWidth;
if (useHilbert)
binWidth = (float)(max(max(maxx-minx, maxy-miny), maxz-minz)/255.0);
else
binWidth = (float)(0.2*nonbonded->getCutoffDistance());
int xbins = 1 + (int) ((maxx-minx)/binWidth);
int ybins = 1 + (int) ((maxy-miny)/binWidth);
vector<pair<int, int> > molBins(numMolecules);
bitmask_t coords[3];
for (int i = 0; i < numMolecules; i++) {
int x = (int) ((molPos[i].x-minx)/binWidth);
int y = (int) ((molPos[i].y-miny)/binWidth);
int z = (int) ((molPos[i].z-minz)/binWidth);
int bin;
if (useHilbert) {
coords[0] = x;
coords[1] = y;
coords[2] = z;
bin = (int) hilbert_c2i(3, 8, coords);
}
else {
int yodd = y&1;
int zodd = z&1;
bin = z*xbins*ybins;
bin += (zodd ? ybins-y : y)*xbins;
bin += (yodd ? xbins-x : x);
}
molBins[i] = pair<int, int>(bin, i);
}
sort(molBins.begin(), molBins.end());
// Reorder the atoms.
for (int i = 0; i < numMolecules; i++) {
for (int j = 0; j < (int)atoms.size(); j++) {
int oldIndex = mol.instances[molBins[i].second]+atoms[j];
int newIndex = mol.instances[i]+atoms[j];
originalIndex[newIndex] = atomIndex->get(oldIndex);
newPosq[newIndex] = posq->get(oldIndex);
newVelm[newIndex] = velm->get(oldIndex);
newCellOffsets[newIndex] = posCellOffsets[oldIndex];
}
}
}
// Update the streams.
for (int i = 0; i < numAtoms; i++) {
posq->set(i, newPosq[i]);
velm->set(i, newVelm[i]);
atomIndex->set(i, originalIndex[i]);
posCellOffsets[i] = newCellOffsets[i];
}
posq->upload();
velm->upload();
atomIndex->upload();
}
......@@ -133,6 +133,12 @@ public:
OpenCLArray<cl_int>& getAtomIndex() {
return *atomIndex;
}
/**
* Get the number of cells by which the positions are offset.
*/
std::vector<mm_int4>& getPosCellOffsets() {
return posCellOffsets;
}
/**
* Load OpenCL source code from a file in the kernels directory.
*/
......@@ -200,7 +206,19 @@ public:
* Set the number of integration steps that have been taken.
*/
void setStepCount(int steps) {
stepCount = steps;;
stepCount = steps;
}
/**
* Get the number of times forces or energy has been computed.
*/
int getComputeForceCount() {
return computeForceCount;
}
/**
* Set the number of times forces or energy has been computed.
*/
void setComputeForceCount(int count) {
computeForceCount = count;
}
/**
* Get the number of atoms.
......@@ -245,9 +263,19 @@ public:
OpenCLNonbondedUtilities& getNonbondedUtilities() {
return *nonbonded;
}
/**
* Reorder the internal arrays of atoms to try to keep spatially contiguous atoms close
* together in the arrays.
*/
void reorderAtoms();
private:
struct Molecule;
struct MoleculeGroup;
void findMoleculeGroups(const System& system);
static void tagAtomsInMolecule(int atom, int molecule, std::vector<int>& atomMolecule, std::vector<std::vector<int> >& atomBonds);
double time;
int stepCount;
int computeForceCount;
int numAtoms;
int paddedNumAtoms;
int numAtomBlocks;
......@@ -261,6 +289,8 @@ private:
cl::Kernel clearBufferKernel;
cl::Kernel reduceFloat4Kernel;
std::vector<OpenCLForceInfo*> forces;
std::vector<MoleculeGroup> moleculeGroups;
std::vector<mm_int4> posCellOffsets;
OpenCLArray<mm_float4>* posq;
OpenCLArray<mm_float4>* velm;
OpenCLArray<mm_float4>* force;
......@@ -271,6 +301,11 @@ private:
OpenCLNonbondedUtilities* nonbonded;
};
struct OpenCLContext::MoleculeGroup {
std::vector<int> atoms;
std::vector<int> instances;
};
} // namespace OpenMM
#endif /*OPENMM_OPENCLCONTEXT_H_*/
......@@ -46,6 +46,9 @@ void OpenCLCalcForcesAndEnergyKernel::initialize(const System& system) {
}
void OpenCLCalcForcesAndEnergyKernel::beginForceComputation(ContextImpl& context) {
if (cl.getNonbondedUtilities().getUseCutoff() && cl.getComputeForceCount()%100 == 0)
cl.reorderAtoms();
cl.setComputeForceCount(cl.getComputeForceCount()+1);
cl.clearBuffer(cl.getForceBuffers());
cl.getNonbondedUtilities().prepareInteractions();
}
......@@ -56,6 +59,9 @@ void OpenCLCalcForcesAndEnergyKernel::finishForceComputation(ContextImpl& contex
}
void OpenCLCalcForcesAndEnergyKernel::beginEnergyComputation(ContextImpl& context) {
if (cl.getNonbondedUtilities().getUseCutoff() && cl.getComputeForceCount()%100 == 0)
cl.reorderAtoms();
cl.setComputeForceCount(cl.getComputeForceCount()+1);
cl.clearBuffer(cl.getEnergyBuffer());
cl.getNonbondedUtilities().prepareInteractions();
}
......@@ -86,11 +92,11 @@ void OpenCLUpdateStateDataKernel::getPositions(ContextImpl& context, std::vector
OpenCLArray<cl_int>& order = cl.getAtomIndex();
int numParticles = context.getSystem().getNumParticles();
positions.resize(numParticles);
mm_float4 periodicBoxSize = cl.getNonbondedUtilities().getPeriodicBoxSize();
for (int i = 0; i < numParticles; ++i) {
mm_float4 pos = posq[i];
// int3 offset = gpu->posCellOffsets[i];
// positions[order[i]] = Vec3(pos.x-offset.x*gpu->sim.periodicBoxSizeX, pos.y-offset.y*gpu->sim.periodicBoxSizeY, pos.z-offset.z*gpu->sim.periodicBoxSizeZ);
positions[order[i]] = Vec3(pos.x, pos.y, pos.z);
mm_int4 offset = cl.getPosCellOffsets()[i];
positions[order[i]] = Vec3(pos.x-offset.x*periodicBoxSize.x, pos.y-offset.y*periodicBoxSize.y, pos.z-offset.z*periodicBoxSize.z);
}
}
......@@ -106,8 +112,8 @@ void OpenCLUpdateStateDataKernel::setPositions(ContextImpl& context, const std::
pos.z = p[2];
}
posq.upload();
// for (int i = 0; i < gpu->posCellOffsets.size(); i++)
// gpu->posCellOffsets[i] = make_int3(0, 0, 0);
for (int i = 0; i < cl.getPosCellOffsets().size(); i++)
cl.getPosCellOffsets()[i] = (mm_int4) {0, 0, 0, 0};
}
void OpenCLUpdateStateDataKernel::getVelocities(ContextImpl& context, std::vector<Vec3>& velocities) {
......
......@@ -298,7 +298,7 @@ void OpenCLNonbondedUtilities::prepareInteractions() {
findInteractionsWithinBlocksKernel.setArg<cl_float>(0, cutoff*cutoff);
findInteractionsWithinBlocksKernel.setArg<mm_float4>(1, periodicBoxSize);
findInteractionsWithinBlocksKernel.setArg<cl::Buffer>(2, context.getPosq().getDeviceBuffer());
findInteractionsWithinBlocksKernel.setArg<cl::Buffer>(3, tiles->getDeviceBuffer());
findInteractionsWithinBlocksKernel.setArg<cl::Buffer>(3, interactingTiles->getDeviceBuffer());
findInteractionsWithinBlocksKernel.setArg<cl::Buffer>(4, blockCenter->getDeviceBuffer());
findInteractionsWithinBlocksKernel.setArg<cl::Buffer>(5, blockBoundingBox->getDeviceBuffer());
findInteractionsWithinBlocksKernel.setArg<cl::Buffer>(6, interactionFlags->getDeviceBuffer());
......@@ -311,24 +311,28 @@ void OpenCLNonbondedUtilities::computeInteractions() {
if (hasComputedInteractions)
return;
hasComputedInteractions = true;
forceKernel.setArg<cl_int>(0, tiles->getSize());
forceKernel.setArg<cl_int>(1, context.getPaddedNumAtoms());
forceKernel.setArg<cl::Buffer>(2, context.getForceBuffers().getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(3, context.getEnergyBuffer().getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(4, context.getPosq().getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(5, tiles->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(6, exclusions->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(7, exclusionIndex->getDeviceBuffer());
forceKernel.setArg(8, OpenCLContext::ThreadBlockSize*sizeof(cl_float4), NULL);
forceKernel.setArg(9, OpenCLContext::ThreadBlockSize*sizeof(cl_float4), NULL);
forceKernel.setArg<cl_int>(0, context.getPaddedNumAtoms());
forceKernel.setArg<cl::Buffer>(1, context.getForceBuffers().getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(2, context.getEnergyBuffer().getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(3, context.getPosq().getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(4, exclusions->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(5, exclusionIndex->getDeviceBuffer());
forceKernel.setArg(6, OpenCLContext::ThreadBlockSize*sizeof(cl_float4), NULL);
forceKernel.setArg(7, OpenCLContext::ThreadBlockSize*sizeof(cl_float4), NULL);
int paramBase = 10;
if (useCutoff) {
paramBase = 14;
forceKernel.setArg<cl_float>(10, cutoff*cutoff);
forceKernel.setArg<mm_float4>(11, periodicBoxSize);
forceKernel.setArg<cl::Buffer>(12, interactionFlags->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(8, interactingTiles->getDeviceBuffer());
forceKernel.setArg<cl_float>(9, cutoff*cutoff);
forceKernel.setArg<mm_float4>(10, periodicBoxSize);
forceKernel.setArg<cl::Buffer>(11, interactionFlags->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(12, interactionCount->getDeviceBuffer());
forceKernel.setArg(13, OpenCLContext::ThreadBlockSize*sizeof(cl_float4), NULL);
}
else {
forceKernel.setArg<cl::Buffer>(8, tiles->getDeviceBuffer());
forceKernel.setArg<cl_uint>(9, tiles->getSize());
}
for (int i = 0; i < (int) parameters.size(); i++) {
forceKernel.setArg<cl::Buffer>(i*2+paramBase, *parameters[i].buffer);
forceKernel.setArg(i*2+paramBase+1, OpenCLContext::ThreadBlockSize*parameters[i].size, NULL);
......
......@@ -74,6 +74,24 @@ public:
int getNumForceBuffers() {
return numForceBuffers;
}
/**
* Get whether a cutoff is being used.
*/
bool getUseCutoff() {
return useCutoff;
}
/**
* Get whether periodic boundary conditions are being used.
*/
bool getUsePeriodic() {
return usePeriodic;
}
/**
* Get the cutoff distance.
*/
double getCutoffDistance() {
return cutoff;
}
/**
* Get the periodic box size.
*/
......
This diff is collapsed.
/* C header file for Hilbert curve functions */
#if !defined(_hilbert_h_)
#define _hilbert_h_
#ifdef __cplusplus
extern "C" {
#endif
#ifdef _MSC_VER
/* define the bitmask_t type as an integer of sufficient size */
typedef unsigned long long bitmask_t;
/* define the halfmask_t type as an integer of 1/2 the size of bitmask_t */
typedef unsigned int halfmask_t;
#else
#include <stdint.h>
/* define the bitmask_t type as an integer of sufficient size */
typedef uint64_t bitmask_t;
/* define the halfmask_t type as an integer of 1/2 the size of bitmask_t */
typedef uint32_t halfmask_t;
#endif
/*****************************************************************
* hilbert_i2c
*
* Convert an index into a Hilbert curve to a set of coordinates.
* Inputs:
* nDims: Number of coordinate axes.
* nBits: Number of bits per axis.
* index: The index, contains nDims*nBits bits (so nDims*nBits must be <= 8*sizeof(bitmask_t)).
* Outputs:
* coord: The list of nDims coordinates, each with nBits bits.
* Assumptions:
* nDims*nBits <= (sizeof index) * (bits_per_byte)
*/
void hilbert_i2c(unsigned nDims, unsigned nBits, bitmask_t index, bitmask_t coord[]);
/*****************************************************************
* hilbert_c2i
*
* Convert coordinates of a point on a Hilbert curve to its index.
* Inputs:
* nDims: Number of coordinates.
* nBits: Number of bits/coordinate.
* coord: Array of n nBits-bit coordinates.
* Outputs:
* index: Output index value. nDims*nBits bits.
* Assumptions:
* nDims*nBits <= (sizeof bitmask_t) * (bits_per_byte)
*/
bitmask_t hilbert_c2i(unsigned nDims, unsigned nBits, bitmask_t const coord[]);
/*****************************************************************
* hilbert_cmp, hilbert_ieee_cmp
*
* Determine which of two points lies further along the Hilbert curve
* Inputs:
* nDims: Number of coordinates.
* nBytes: Number of bytes of storage/coordinate (hilbert_cmp only)
* nBits: Number of bits/coordinate. (hilbert_cmp only)
* coord1: Array of nDims nBytes-byte coordinates (or doubles for ieee_cmp).
* coord2: Array of nDims nBytes-byte coordinates (or doubles for ieee_cmp).
* Return value:
* -1, 0, or 1 according to whether
coord1<coord2, coord1==coord2, coord1>coord2
* Assumptions:
* nBits <= (sizeof bitmask_t) * (bits_per_byte)
*/
int hilbert_cmp(unsigned nDims, unsigned nBytes, unsigned nBits, void const* coord1, void const* coord2);
int hilbert_ieee_cmp(unsigned nDims, double const* coord1, double const* coord2);
/*****************************************************************
* hilbert_box_vtx
*
* Determine the first or last vertex of a box to lie on a Hilbert curve
* Inputs:
* nDims: Number of coordinates.
* nBytes: Number of bytes/coordinate.
* nBits: Number of bits/coordinate. (hilbert_cmp only)
* findMin: Is it the least vertex sought?
* coord1: Array of nDims nBytes-byte coordinates - one corner of box
* coord2: Array of nDims nBytes-byte coordinates - opposite corner
* Output:
* c1 and c2 modified to refer to selected corner
* value returned is log2 of size of largest power-of-two-aligned box that
* contains the selected corner and no other corners
* Assumptions:
* nBits <= (sizeof bitmask_t) * (bits_per_byte)
*/
unsigned
hilbert_box_vtx(unsigned nDims, unsigned nBytes, unsigned nBits,
int findMin, void* c1, void* c2);
unsigned
hilbert_ieee_box_vtx(unsigned nDims,
int findMin, double* c1, double* c2);
/*****************************************************************
* hilbert_box_pt
*
* Determine the first or last point of a box to lie on a Hilbert curve
* Inputs:
* nDims: Number of coordinates.
* nBytes: Number of bytes/coordinate.
* nBits: Number of bits/coordinate.
* findMin: Is it the least vertex sought?
* coord1: Array of nDims nBytes-byte coordinates - one corner of box
* coord2: Array of nDims nBytes-byte coordinates - opposite corner
* Output:
* c1 and c2 modified to refer to least point
* Assumptions:
* nBits <= (sizeof bitmask_t) * (bits_per_byte)
*/
unsigned
hilbert_box_pt(unsigned nDims, unsigned nBytes, unsigned nBits,
int findMin, void* coord1, void* coord2);
unsigned
hilbert_ieee_box_pt(unsigned nDims,
int findMin, double* c1, double* c2);
/*****************************************************************
* hilbert_nextinbox
*
* Determine the first point of a box after a given point to lie on a Hilbert curve
* Inputs:
* nDims: Number of coordinates.
* nBytes: Number of bytes/coordinate.
* nBits: Number of bits/coordinate.
* findPrev: Is the previous point sought?
* coord1: Array of nDims nBytes-byte coordinates - one corner of box
* coord2: Array of nDims nBytes-byte coordinates - opposite corner
* point: Array of nDims nBytes-byte coordinates - lower bound on point returned
*
* Output:
if returns 1:
* c1 and c2 modified to refer to least point after "point" in box
else returns 0:
arguments unchanged; "point" is beyond the last point of the box
* Assumptions:
* nBits <= (sizeof bitmask_t) * (bits_per_byte)
*/
int
hilbert_nextinbox(unsigned nDims, unsigned nBytes, unsigned nBits,
int findPrev, void* coord1, void* coord2,
void const* point);
/*****************************************************************
* hilbert_incr
*
* Advance from one point to its successor on a Hilbert curve
* Inputs:
* nDims: Number of coordinates.
* nBits: Number of bits/coordinate.
* coord: Array of nDims nBits-bit coordinates.
* Output:
* coord: Next point on Hilbert curve
* Assumptions:
* nBits <= (sizeof bitmask_t) * (bits_per_byte)
*/
void
hilbert_incr(unsigned nDims, unsigned nBits, bitmask_t coord[]);
#ifdef __cplusplus
}
#endif
#endif /* _hilbert_h_ */
const int BlockSize = 32;
const int TileSize = 32;
/**
* Find a bounding box for the atoms in each block.
*/
__kernel void findBlockBounds(int numAtoms, float4 periodicBoxSize, __global float4* posq, __global float4* blockCenter, __global float4* blockBoundingBox) {
int index = get_global_id(0);
int base = index*BlockSize;
int base = index*TileSize;
while (base < numAtoms) {
float4 pos = posq[base];
#ifdef USE_PERIODIC
......@@ -16,7 +16,7 @@ __kernel void findBlockBounds(int numAtoms, float4 periodicBoxSize, __global flo
#endif
float4 minPos = pos;
float4 maxPos = pos;
int last = min(base+BlockSize, numAtoms);
int last = min(base+TileSize, numAtoms);
for (int i = base+1; i < last; i++) {
pos = posq[i];
#ifdef USE_PERIODIC
......@@ -30,7 +30,7 @@ __kernel void findBlockBounds(int numAtoms, float4 periodicBoxSize, __global flo
blockBoundingBox[index] = 0.5f*(maxPos-minPos);
blockCenter[index] = 0.5f*(maxPos+minPos);
index += get_global_size(0);
base = index*BlockSize;
base = index*TileSize;
}
}
......@@ -72,37 +72,34 @@ __kernel void findBlocksWithInteractions(int numTiles, float cutoffSquared, floa
*/
__kernel void findInteractionsWithinBlocks(float cutoffSquared, float4 periodicBoxSize, __global float4* posq, __global unsigned int* tiles, __global float4* blockCenter,
__global float4* blockBoundingBox, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, __local unsigned int* flags) {
unsigned int totalWarps = get_global_size(0)/BlockSize;
unsigned int warp = get_global_id(0)/BlockSize;
unsigned int totalWarps = get_global_size(0)/TileSize;
unsigned int warp = get_global_id(0)/TileSize;
unsigned int numTiles = interactionCount[0];
unsigned int pos = warp*numTiles/totalWarps;
unsigned int end = (warp+1)*numTiles/totalWarps;
unsigned int index = get_local_id(0) & (BlockSize - 1);
unsigned int index = get_local_id(0) & (TileSize - 1);
unsigned int lasty = 0xFFFFFFFF;
float4 apos;
while (pos < end)
{
// Extract cell coordinates from appropriate work unit
while (pos < end) {
// Extract the coordinates of this tile
unsigned int x = tiles[pos];
unsigned int y = ((x >> 2) & 0x7fff);
bool bExclusionFlag = (x & 0x1);
bool hasExclusions = (x & 0x1);
x = (x >> 17);
if (x == y || bExclusionFlag)
{
// Assume this block will be dense.
if (x == y || hasExclusions) {
// Assume this tile will be dense.
if (index == 0)
interactionFlags[pos] = 0xFFFFFFFF;
}
else
{
else {
// Load the bounding box for x and the atom positions for y.
float4 center = blockCenter[x];
float4 boxSize = blockBoundingBox[x];
if (y != lasty)
apos = posq[(y*BlockSize)+index];
apos = posq[y*TileSize+index];
// Find the distance of the atom from the bounding box.
......@@ -142,8 +139,7 @@ __kernel void findInteractionsWithinBlocks(float cutoffSquared, float4 periodicB
flags[thread] += flags[thread+8];
barrier(CLK_LOCAL_MEM_FENCE);
#endif
if (index == 0)
{
if (index == 0) {
unsigned int allFlags = flags[thread] + flags[thread+16];
// Count how many flags are set, and based on that decide whether to compute all interactions
......
......@@ -4,13 +4,17 @@ const unsigned int TileSize = 32;
* Compute nonbonded interactions.
*/
__kernel void computeNonbonded(int numTiles, int paddedNumAtoms,
__global float4* forceBuffers, __global float* energyBuffer, __global float4* posq, __global unsigned int* tiles,
__global unsigned int* exclusions, __global unsigned int* exclusionIndices, __local float4* local_posq, __local float4* local_force
__kernel void computeNonbonded(int paddedNumAtoms, __global float4* forceBuffers, __global float* energyBuffer, __global float4* posq,
__global unsigned int* exclusions, __global unsigned int* exclusionIndices, __local float4* local_posq, __local float4* local_force, __global unsigned int* tiles,
#ifdef USE_CUTOFF
, float cutoffSquared, float4 periodicBoxSize, __global unsigned int* interactionFlags, __local float4* tempBuffer
float cutoffSquared, float4 periodicBoxSize, __global unsigned int* interactionFlags, __global unsigned int* interactionCount, __local float4* tempBuffer
#else
unsigned int numTiles
#endif
PARAMETER_ARGUMENTS) {
#ifdef USE_CUTOFF
unsigned int numTiles = interactionCount[0];
#endif
unsigned int totalWarps = get_global_size(0)/TileSize;
unsigned int warp = get_global_id(0)/TileSize;
unsigned int pos = warp*numTiles/totalWarps;
......
......@@ -567,14 +567,6 @@ void testBlockInteractions(bool periodic) {
dy -= (float)(floor(0.5+dy/boxSize)*boxSize);
dz -= (float)(floor(0.5+dz/boxSize)*boxSize);
}
if (dx*dx+dy*dy+dz*dz < cutoff*cutoff) {
dx = pos2.x-center1.x;
dy = pos2.y-center1.y;
dz = pos2.z-center1.z;
dx = max(0.0f, abs(dx)-gridSize1.x);
dy = max(0.0f, abs(dy)-gridSize1.y);
dz = max(0.0f, abs(dz)-gridSize1.z);
}
ASSERT(dx*dx+dy*dy+dz*dz > cutoff*cutoff);
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment