"openmmapi/vscode:/vscode.git/clone" did not exist on "d3d352c121dfa51aff1415d5e23dd31dfe5cc302"
Commit 2e9c418a authored by peastman's avatar peastman
Browse files

Merge branch 'master' into gayberne

parents 8f532e31 a4d327f5
/* Portions copyright (c) 2009-2014 Stanford University and Simbios.
/* Portions copyright (c) 2009-2016 Stanford University and Simbios.
* Contributors: Peter Eastman
*
* Permission is hereby granted, free of charge, to any person obtaining
......@@ -190,7 +190,8 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread
int blockIndex = gmx_atomic_fetch_add(reinterpret_cast<gmx_atomic_t*>(atomicCounter), 1);
if (blockIndex >= neighborList->getNumBlocks())
break;
const int* blockAtom = &neighborList->getSortedAtoms()[4*blockIndex];
const int blockSize = neighborList->getBlockSize();
const int* blockAtom = &neighborList->getSortedAtoms()[blockSize*blockIndex];
const vector<int>& neighbors = neighborList->getBlockNeighbors(blockIndex);
const vector<char>& exclusions = neighborList->getBlockExclusions(blockIndex);
for (int i = 0; i < (int) neighbors.size(); i++) {
......@@ -199,7 +200,7 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread
ReferenceForce::setVariable(data.energyParticleParams[j*2], atomParameters[first][j]);
ReferenceForce::setVariable(data.forceParticleParams[j*2], atomParameters[first][j]);
}
for (int k = 0; k < 4; k++) {
for (int k = 0; k < blockSize; k++) {
if ((exclusions[i] & (1<<k)) == 0) {
int second = blockAtom[k];
for (int j = 0; j < (int) paramNames.size(); j++) {
......
......@@ -230,6 +230,7 @@ CpuCalcForcesAndEnergyKernel::CpuCalcForcesAndEnergyKernel(std::string name, con
void CpuCalcForcesAndEnergyKernel::initialize(const System& system) {
referenceKernel.getAs<ReferenceCalcForcesAndEnergyKernel>().initialize(system);
lastPositions.resize(system.getNumParticles(), Vec3(1e10, 1e10, 1e10));
}
void CpuCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups) {
......@@ -237,11 +238,60 @@ void CpuCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool i
// Convert positions to single precision and clear the forces.
InitForceTask task(context.getSystem().getNumParticles(), context, data);
int numParticles = context.getSystem().getNumParticles();
InitForceTask task(numParticles, context, data);
data.threads.execute(task);
data.threads.waitForThreads();
if (!task.positionsValid)
throw OpenMMException("Particle coordinate is nan");
// Determine whether we need to recompute the neighbor list.
if (data.neighborList != NULL) {
double padding = data.paddedCutoff-data.cutoff;;
bool needRecompute = false;
double closeCutoff2 = 0.25*padding*padding;
double farCutoff2 = 0.5*padding*padding;
int maxNumMoved = numParticles/10;
vector<int> moved;
vector<RealVec>& posData = extractPositions(context);
for (int i = 0; i < numParticles; i++) {
RealVec delta = posData[i]-lastPositions[i];
double dist2 = delta.dot(delta);
if (dist2 > closeCutoff2) {
moved.push_back(i);
if (dist2 > farCutoff2 || moved.size() > maxNumMoved) {
needRecompute = true;
break;
}
}
}
if (!needRecompute && moved.size() > 0) {
// Some particles have moved further than half the padding distance. Look for pairs
// that are missing from the neighbor list.
int numMoved = moved.size();
double cutoff2 = data.cutoff*data.cutoff;
double paddedCutoff2 = data.paddedCutoff*data.paddedCutoff;
for (int i = 1; i < numMoved && !needRecompute; i++)
for (int j = 0; j < i; j++) {
RealVec delta = posData[moved[i]]-posData[moved[j]];
if (delta.dot(delta) < cutoff2) {
// These particles should interact. See if they are in the neighbor list.
RealVec oldDelta = lastPositions[moved[i]]-lastPositions[moved[j]];
if (oldDelta.dot(oldDelta) > paddedCutoff2) {
needRecompute = true;
break;
}
}
}
}
if (needRecompute) {
data.neighborList->computeNeighborList(numParticles, data.posq, data.exclusions, extractBoxVectors(context), data.isPeriodic, data.paddedCutoff, data.threads);
lastPositions = posData;
}
}
}
double CpuCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForce, bool includeEnergy, int groups, bool& valid) {
......@@ -283,6 +333,7 @@ void CpuCalcHarmonicAngleForceKernel::initialize(const System& system, const Har
angleParamArray[i][1] = (RealOpenMM) k;
}
bondForce.initialize(system.getNumParticles(), numAngles, 3, angleIndexArray, data.threads);
usePeriodic = force.usesPeriodicBoundaryConditions();
}
double CpuCalcHarmonicAngleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
......@@ -290,6 +341,8 @@ double CpuCalcHarmonicAngleForceKernel::execute(ContextImpl& context, bool inclu
vector<RealVec>& forceData = extractForces(context);
RealOpenMM energy = 0;
ReferenceAngleBondIxn angleBond;
if (usePeriodic)
angleBond.setPeriodic(extractBoxVectors(context));
bondForce.calculateForce(posData, angleParamArray, forceData, includeEnergy ? &energy : NULL, angleBond);
return energy;
}
......@@ -343,6 +396,7 @@ void CpuCalcPeriodicTorsionForceKernel::initialize(const System& system, const P
torsionParamArray[i][2] = (RealOpenMM) periodicity;
}
bondForce.initialize(system.getNumParticles(), numTorsions, 4, torsionIndexArray, data.threads);
usePeriodic = force.usesPeriodicBoundaryConditions();
}
double CpuCalcPeriodicTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
......@@ -350,6 +404,8 @@ double CpuCalcPeriodicTorsionForceKernel::execute(ContextImpl& context, bool inc
vector<RealVec>& forceData = extractForces(context);
RealOpenMM energy = 0;
ReferenceProperDihedralBond periodicTorsionBond;
if (usePeriodic)
periodicTorsionBond.setPeriodic(extractBoxVectors(context));
bondForce.calculateForce(posData, torsionParamArray, forceData, includeEnergy ? &energy : NULL, periodicTorsionBond);
return energy;
}
......@@ -407,6 +463,7 @@ void CpuCalcRBTorsionForceKernel::initialize(const System& system, const RBTorsi
torsionParamArray[i][5] = (RealOpenMM) c5;
}
bondForce.initialize(system.getNumParticles(), numTorsions, 4, torsionIndexArray, data.threads);
usePeriodic = force.usesPeriodicBoundaryConditions();
}
double CpuCalcRBTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
......@@ -414,6 +471,8 @@ double CpuCalcRBTorsionForceKernel::execute(ContextImpl& context, bool includeFo
vector<RealVec>& forceData = extractForces(context);
RealOpenMM energy = 0;
ReferenceRbDihedralBond rbTorsionBond;
if (usePeriodic)
rbTorsionBond.setPeriodic(extractBoxVectors(context));
bondForce.calculateForce(posData, torsionParamArray, forceData, includeEnergy ? &energy : NULL, rbTorsionBond);
return energy;
}
......@@ -464,15 +523,11 @@ CpuNonbondedForce* createCpuNonbondedForceVec4();
CpuNonbondedForce* createCpuNonbondedForceVec8();
CpuCalcNonbondedForceKernel::CpuCalcNonbondedForceKernel(string name, const Platform& platform, CpuPlatform::PlatformData& data) : CalcNonbondedForceKernel(name, platform),
data(data), bonded14IndexArray(NULL), bonded14ParamArray(NULL), hasInitializedPme(false), neighborList(NULL), nonbonded(NULL) {
if (isVec8Supported()) {
neighborList = new CpuNeighborList(8);
data(data), bonded14IndexArray(NULL), bonded14ParamArray(NULL), hasInitializedPme(false), nonbonded(NULL) {
if (isVec8Supported())
nonbonded = createCpuNonbondedForceVec8();
}
else {
neighborList = new CpuNeighborList(4);
else
nonbonded = createCpuNonbondedForceVec4();
}
}
CpuCalcNonbondedForceKernel::~CpuCalcNonbondedForceKernel() {
......@@ -486,8 +541,6 @@ CpuCalcNonbondedForceKernel::~CpuCalcNonbondedForceKernel() {
}
if (nonbonded != NULL)
delete nonbonded;
if (neighborList != NULL)
delete neighborList;
}
void CpuCalcNonbondedForceKernel::initialize(const System& system, const NonbondedForce& force) {
......@@ -547,6 +600,7 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
if (nonbondedMethod == NoCutoff)
useSwitchingFunction = false;
else {
data.requestNeighborList(nonbondedCutoff, 0.25*nonbondedCutoff, true, exclusions);
useSwitchingFunction = force.getUseSwitchingFunction();
switchingDistance = force.getSwitchingDistance();
}
......@@ -569,7 +623,6 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(system, force);
else
dispersionCoefficient = 0.0;
lastPositions.resize(numParticles, Vec3(1e10, 1e10, 1e10));
data.isPeriodic = (nonbondedMethod == CutoffPeriodic || nonbondedMethod == Ewald || nonbondedMethod == PME);
}
......@@ -596,53 +649,8 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
double energy = (includeReciprocal ? ewaldSelfEnergy : 0.0);
bool ewald = (nonbondedMethod == Ewald);
bool pme = (nonbondedMethod == PME);
if (nonbondedMethod != NoCutoff) {
// Determine whether we need to recompute the neighbor list.
double padding = 0.25*nonbondedCutoff;
bool needRecompute = false;
double closeCutoff2 = 0.25*padding*padding;
double farCutoff2 = 0.5*padding*padding;
int maxNumMoved = numParticles/10;
vector<int> moved;
for (int i = 0; i < numParticles; i++) {
RealVec delta = posData[i]-lastPositions[i];
double dist2 = delta.dot(delta);
if (dist2 > closeCutoff2) {
moved.push_back(i);
if (dist2 > farCutoff2 || moved.size() > maxNumMoved) {
needRecompute = true;
break;
}
}
}
if (!needRecompute && moved.size() > 0) {
// Some particles have moved further than half the padding distance. Look for pairs
// that are missing from the neighbor list.
int numMoved = moved.size();
double cutoff2 = nonbondedCutoff*nonbondedCutoff;
double paddedCutoff2 = (nonbondedCutoff+padding)*(nonbondedCutoff+padding);
for (int i = 1; i < numMoved && !needRecompute; i++)
for (int j = 0; j < i; j++) {
RealVec delta = posData[moved[i]]-posData[moved[j]];
if (delta.dot(delta) < cutoff2) {
// These particles should interact. See if they are in the neighbor list.
RealVec oldDelta = lastPositions[moved[i]]-lastPositions[moved[j]];
if (oldDelta.dot(oldDelta) > paddedCutoff2) {
needRecompute = true;
break;
}
}
}
}
if (needRecompute) {
neighborList->computeNeighborList(numParticles, posq, exclusions, boxVectors, data.isPeriodic, nonbondedCutoff+padding, data.threads);
lastPositions = posData;
}
nonbonded->setUseCutoff(nonbondedCutoff, *neighborList, rfDielectric);
}
if (nonbondedMethod != NoCutoff)
nonbonded->setUseCutoff(nonbondedCutoff, *data.neighborList, rfDielectric);
if (data.isPeriodic) {
RealVec* boxVectors = extractBoxVectors(context);
double minAllowedSize = 1.999999*nonbondedCutoff;
......@@ -739,7 +747,7 @@ void CpuCalcNonbondedForceKernel::getPMEParameters(double& alpha, int& nx, int&
}
CpuCalcCustomNonbondedForceKernel::CpuCalcCustomNonbondedForceKernel(string name, const Platform& platform, CpuPlatform::PlatformData& data) :
CalcCustomNonbondedForceKernel(name, platform), data(data), forceCopy(NULL), neighborList(NULL), nonbonded(NULL) {
CalcCustomNonbondedForceKernel(name, platform), data(data), forceCopy(NULL), nonbonded(NULL) {
}
CpuCalcCustomNonbondedForceKernel::~CpuCalcCustomNonbondedForceKernel() {
......@@ -748,8 +756,6 @@ CpuCalcCustomNonbondedForceKernel::~CpuCalcCustomNonbondedForceKernel() {
delete[] particleParamArray[i];
delete[] particleParamArray;
}
if (neighborList != NULL)
delete neighborList;
if (nonbonded != NULL)
delete nonbonded;
if (forceCopy != NULL)
......@@ -786,7 +792,7 @@ void CpuCalcCustomNonbondedForceKernel::initialize(const System& system, const C
if (nonbondedMethod == NoCutoff)
useSwitchingFunction = false;
else {
neighborList = new CpuNeighborList(4);
data.requestNeighborList(nonbondedCutoff, 0.25*nonbondedCutoff, true, exclusions);
useSwitchingFunction = force.getUseSwitchingFunction();
switchingDistance = force.getSwitchingDistance();
}
......@@ -852,10 +858,8 @@ double CpuCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool inc
RealVec* boxVectors = extractBoxVectors(context);
double energy = 0;
bool periodic = (nonbondedMethod == CutoffPeriodic);
if (nonbondedMethod != NoCutoff) {
neighborList->computeNeighborList(numParticles, data.posq, exclusions, boxVectors, data.isPeriodic, nonbondedCutoff, data.threads);
nonbonded->setUseCutoff(nonbondedCutoff, *neighborList);
}
if (nonbondedMethod != NoCutoff)
nonbonded->setUseCutoff(nonbondedCutoff, *data.neighborList);
if (periodic) {
double minAllowedSize = 2*nonbondedCutoff;
if (boxVectors[0][0] < minAllowedSize || boxVectors[1][1] < minAllowedSize || boxVectors[2][2] < minAllowedSize)
......@@ -963,8 +967,6 @@ CpuCalcCustomGBForceKernel::~CpuCalcCustomGBForceKernel() {
delete[] particleParamArray[i];
delete[] particleParamArray;
}
if (neighborList != NULL)
delete neighborList;
if (ixn != NULL)
delete ixn;
}
......@@ -1012,10 +1014,8 @@ void CpuCalcCustomGBForceKernel::initialize(const System& system, const CustomGB
globalParameterNames.push_back(force.getGlobalParameterName(i));
nonbondedMethod = CalcCustomGBForceKernel::NonbondedMethod(force.getNonbondedMethod());
nonbondedCutoff = (RealOpenMM) force.getCutoffDistance();
if (nonbondedMethod == NoCutoff)
neighborList = NULL;
else
neighborList = new CpuNeighborList(4);
if (nonbondedMethod != NoCutoff)
data.requestNeighborList(nonbondedCutoff, 0.25*nonbondedCutoff, true, exclusions);
// Create custom functions for the tabulated functions.
......@@ -1112,8 +1112,7 @@ double CpuCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFor
ixn->setPeriodic(extractBoxSize(context));
if (nonbondedMethod != NoCutoff) {
vector<set<int> > noExclusions(numParticles);
neighborList->computeNeighborList(numParticles, data.posq, exclusions, boxVectors, data.isPeriodic, nonbondedCutoff, data.threads);
ixn->setUseCutoff(nonbondedCutoff, *neighborList);
ixn->setUseCutoff(nonbondedCutoff, *data.neighborList);
}
map<string, double> globalParameters;
for (int i = 0; i < (int) globalParameterNames.size(); i++)
......
......@@ -225,38 +225,78 @@ public:
if (usePeriodic)
voxelIndex.y = (y < 0 ? y+ny : (y >= ny ? y-ny : y));
float boxy = floor((float) y/ny);
float xoffset = (float) (usePeriodic ? boxy*periodicBoxVectors[1][0]+boxz*periodicBoxVectors[2][0] : 0);
// Identify the range of atoms within this bin we need to search. When using periodic boundary
// conditions, there may be two separate ranges.
float minx = centerPos[0];
float maxx = centerPos[0];
fvec4 offset(-xoffset, -yoffset+voxelSizeY*y+(usePeriodic ? 0.0f : miny), voxelSizeZ*z+(usePeriodic ? 0.0f : minz), 0);
for (int k = 0; k < (int) blockAtoms.size(); k++) {
const float* atomPos = &sortedPositions[4*(blockSize*blockIndex+k)];
fvec4 posVec(atomPos);
fvec4 delta1 = offset-posVec;
fvec4 delta2 = delta1+fvec4(0, voxelSizeY, voxelSizeZ, 0);
if (usePeriodic) {
delta1 -= round(delta1*invBoxSize)*boxSize;
delta2 -= round(delta2*invBoxSize)*boxSize;
if (usePeriodic && triclinic) {
for (int k = 0; k < (int) blockAtoms.size(); k++) {
const float* atomPos = &sortedPositions[4*(blockSize*blockIndex+k)];
fvec4 delta1(0, voxelSizeY*voxelIndex.y-atomPos[1], voxelSizeZ*voxelIndex.z-atomPos[2], 0);
fvec4 delta2 = delta1+fvec4(0, voxelSizeY, 0, 0);
fvec4 delta3 = delta1+fvec4(0, 0, voxelSizeZ, 0);
fvec4 delta4 = delta1+fvec4(0, voxelSizeY, voxelSizeZ, 0);
delta1 -= periodicBoxVec4[2]*floorf(delta1[2]*recipBoxSize[2]+0.5f);
delta1 -= periodicBoxVec4[1]*floorf(delta1[1]*recipBoxSize[1]+0.5f);
delta1 -= periodicBoxVec4[0]*floorf(delta1[0]*recipBoxSize[0]+0.5f);
delta2 -= periodicBoxVec4[2]*floorf(delta2[2]*recipBoxSize[2]+0.5f);
delta2 -= periodicBoxVec4[1]*floorf(delta2[1]*recipBoxSize[1]+0.5f);
delta2 -= periodicBoxVec4[0]*floorf(delta2[0]*recipBoxSize[0]+0.5f);
delta3 -= periodicBoxVec4[2]*floorf(delta3[2]*recipBoxSize[2]+0.5f);
delta3 -= periodicBoxVec4[1]*floorf(delta3[1]*recipBoxSize[1]+0.5f);
delta3 -= periodicBoxVec4[0]*floorf(delta3[0]*recipBoxSize[0]+0.5f);
delta4 -= periodicBoxVec4[2]*floorf(delta4[2]*recipBoxSize[2]+0.5f);
delta4 -= periodicBoxVec4[1]*floorf(delta4[1]*recipBoxSize[1]+0.5f);
delta4 -= periodicBoxVec4[0]*floorf(delta4[0]*recipBoxSize[0]+0.5f);
if (delta1[1] < 0 && delta1[1]+voxelSizeY > 0)
delta1 = fvec4(delta1[0], 0, delta1[2], 0);
if (delta1[2] < 0 && delta1[2]+voxelSizeZ > 0)
delta1 = fvec4(delta1[0], delta1[1], 0, 0);
if (delta3[1] < 0 && delta3[1]+voxelSizeY > 0)
delta3 = fvec4(delta3[0], 0, delta3[2], 0);
if (delta2[2] < 0 && delta2[2]+voxelSizeZ > 0)
delta2 = fvec4(delta2[0], delta2[1], 0, 0);
fvec4 delta = min(min(min(abs(delta1), abs(delta2)), abs(delta3)), abs(delta4));
float dy = (voxelIndex.y == atomVoxelIndex[k].y ? 0.0f : delta[1]);
float dz = (voxelIndex.z == atomVoxelIndex[k].z ? 0.0f : delta[2]);
float dist2 = maxDistanceSquared-dy*dy-dz*dz;
if (dist2 > 0) {
float dist = sqrtf(dist2);
minx = min(minx, atomPos[0]-dist-max(max(max(delta1[0], delta2[0]), delta3[0]), delta4[0]));
maxx = max(maxx, atomPos[0]+dist-min(min(min(delta1[0], delta2[0]), delta3[0]), delta4[0]));
}
}
fvec4 delta = min(abs(delta1), abs(delta2));
float dy = (y == atomVoxelIndex[k].y ? 0.0f : delta[1]);
float dz = (z == atomVoxelIndex[k].z ? 0.0f : delta[2]);
float dist2 = maxDistanceSquared-dy*dy-dz*dz;
if (dist2 > 0) {
float dist = sqrtf(dist2);
minx = min(minx, atomPos[0]-dist-xoffset);
maxx = max(maxx, atomPos[0]+dist-xoffset);
}
else {
float xoffset = (float) (usePeriodic ? boxy*periodicBoxVectors[1][0]+boxz*periodicBoxVectors[2][0] : 0);
fvec4 offset(-xoffset, -yoffset+voxelSizeY*y+(usePeriodic ? 0.0f : miny), voxelSizeZ*z+(usePeriodic ? 0.0f : minz), 0);
for (int k = 0; k < (int) blockAtoms.size(); k++) {
const float* atomPos = &sortedPositions[4*(blockSize*blockIndex+k)];
fvec4 posVec(atomPos);
fvec4 delta1 = offset-posVec;
fvec4 delta2 = delta1+fvec4(0, voxelSizeY, voxelSizeZ, 0);
if (usePeriodic) {
delta1 -= round(delta1*invBoxSize)*boxSize;
delta2 -= round(delta2*invBoxSize)*boxSize;
}
fvec4 delta = min(abs(delta1), abs(delta2));
float dy = (y == atomVoxelIndex[k].y ? 0.0f : delta[1]);
float dz = (z == atomVoxelIndex[k].z ? 0.0f : delta[2]);
float dist2 = maxDistanceSquared-dy*dy-dz*dz;
if (dist2 > 0) {
float dist = sqrtf(dist2);
minx = min(minx, atomPos[0]-dist-xoffset);
maxx = max(maxx, atomPos[0]+dist-xoffset);
}
}
}
if (minx == maxx)
continue;
bool needPeriodic = (centerPos[1]-blockWidth[1] < maxDistance || centerPos[1]+blockWidth[1] > periodicBoxSize[1]-maxDistance ||
centerPos[2]-blockWidth[2] < maxDistance || centerPos[2]+blockWidth[2] > periodicBoxSize[2]-maxDistance ||
minx < 0.0f || maxx > periodicBoxVectors[0][0]);
bool needPeriodic = usePeriodic && (centerPos[1]-blockWidth[1] < maxDistance || centerPos[1]+blockWidth[1] > periodicBoxSize[1]-maxDistance ||
centerPos[2]-blockWidth[2] < maxDistance || centerPos[2]+blockWidth[2] > periodicBoxSize[2]-maxDistance ||
minx < 0.0f || maxx > periodicBoxVectors[0][0]);
int numRanges;
int rangeStart[2];
int rangeEnd[2];
......@@ -294,7 +334,7 @@ public:
continue;
fvec4 atomPos(&sortedPositions[4*sortedIndex]);
fvec4 delta = atomPos-centerPos;
fvec4 delta = atomPos-blockCenter;
if (periodicRectangular)
delta -= round(delta*invBoxSize)*boxSize;
else if (needPeriodic) {
......@@ -468,6 +508,10 @@ int CpuNeighborList::getNumBlocks() const {
return sortedAtoms.size()/blockSize;
}
int CpuNeighborList::getBlockSize() const {
return blockSize;
}
const std::vector<int>& CpuNeighborList::getSortedAtoms() const {
return sortedAtoms;
}
......
......@@ -34,6 +34,7 @@
#include "CpuKernels.h"
#include "CpuSETTLE.h"
#include "ReferenceConstraints.h"
#include "openmm/OpenMMException.h"
#include "openmm/internal/hardware.h"
#include "openmm/internal/vectorize.h"
#include <sstream>
......@@ -59,6 +60,7 @@ extern "C" OPENMM_EXPORT_CPU void registerPlatforms() {
map<const ContextImpl*, CpuPlatform::PlatformData*> CpuPlatform::contextData;
CpuPlatform::CpuPlatform() {
deprecatedPropertyReplacements["CpuThreads"] = CpuThreads();
CpuKernelFactory* factory = new CpuKernelFactory();
registerKernelFactory(CalcForcesAndEnergyKernel::Name(), factory);
registerKernelFactory(CalcHarmonicAngleForceKernel::Name(), factory);
......@@ -84,7 +86,10 @@ CpuPlatform::CpuPlatform() {
const string& CpuPlatform::getPropertyValue(const Context& context, const string& property) const {
const ContextImpl& impl = getContextImpl(context);
const PlatformData& data = getPlatformData(impl);
map<string, string>::const_iterator value = data.propertyValues.find(property);
string propertyName = property;
if (deprecatedPropertyReplacements.find(property) != deprecatedPropertyReplacements.end())
propertyName = deprecatedPropertyReplacements.find(property)->second;
map<string, string>::const_iterator value = data.propertyValues.find(propertyName);
if (value != data.propertyValues.end())
return value->second;
return ReferencePlatform::getPropertyValue(context, property);
......@@ -132,7 +137,8 @@ const CpuPlatform::PlatformData& CpuPlatform::getPlatformData(const ContextImpl&
return *contextData[&context];
}
CpuPlatform::PlatformData::PlatformData(int numParticles, int numThreads) : posq(4*numParticles), threads(numThreads) {
CpuPlatform::PlatformData::PlatformData(int numParticles, int numThreads) : posq(4*numParticles), threads(numThreads),
neighborList(NULL), cutoff(0.0), paddedCutoff(0.0), anyExclusions(false) {
numThreads = threads.getNumThreads();
threadForce.resize(numThreads);
for (int i = 0; i < numThreads; i++)
......@@ -142,3 +148,27 @@ CpuPlatform::PlatformData::PlatformData(int numParticles, int numThreads) : posq
threadsProperty << numThreads;
propertyValues[CpuThreads()] = threadsProperty.str();
}
CpuPlatform::PlatformData::~PlatformData() {
if (neighborList != NULL)
delete neighborList;
}
bool isVec8Supported();
void CpuPlatform::PlatformData::requestNeighborList(double cutoffDistance, double padding, bool useExclusions, vector<set<int> >& exclusionList) {
if (neighborList == NULL)
neighborList = new CpuNeighborList(isVec8Supported() ? 8 : 4);
if (cutoffDistance > cutoff)
cutoff = cutoffDistance;
if (cutoffDistance+padding > paddedCutoff)
paddedCutoff = cutoffDistance+padding;
if (useExclusions) {
if (anyExclusions && exclusions != exclusionList)
throw OpenMMException("All Forces must have identical exclusions");
else {
exclusions = exclusionList;
anyExclusions = true;
}
}
}
......@@ -53,14 +53,14 @@ void testNeighborList(bool periodic, bool triclinic) {
const float cutoff = 2.0f;
RealVec boxVectors[3];
if (triclinic) {
boxVectors[0] = RealVec(20, 0, 0);
boxVectors[1] = RealVec(5, 15, 0);
boxVectors[2] = RealVec(-3, -7, 22);
boxVectors[0] = RealVec(10, 0, 0);
boxVectors[1] = RealVec(4, 9, 0);
boxVectors[2] = RealVec(-3, -3.5, 11);
}
else {
boxVectors[0] = RealVec(20, 0, 0);
boxVectors[1] = RealVec(0, 15, 0);
boxVectors[2] = RealVec(0, 0, 22);
boxVectors[0] = RealVec(10, 0, 0);
boxVectors[1] = RealVec(0, 9, 0);
boxVectors[2] = RealVec(0, 0, 11);
}
const float boxSize[3] = {(float) boxVectors[0][0], (float) boxVectors[1][1], (float) boxVectors[2][2]};
const int blockSize = 8;
......
......@@ -178,7 +178,7 @@ public:
* @param b the vector defining the second edge of the periodic box
* @param c the vector defining the third edge of the periodic box
*/
void setPeriodicBoxVectors(ContextImpl& context, const Vec3& a, const Vec3& b, const Vec3& c) const;
void setPeriodicBoxVectors(ContextImpl& context, const Vec3& a, const Vec3& b, const Vec3& c);
/**
* Create a checkpoint recording the current state of the Context.
*
......@@ -718,7 +718,7 @@ public:
*/
void copyParametersToContext(ContextImpl& context, const CustomNonbondedForce& force);
private:
void initInteractionGroups(const CustomNonbondedForce& force, const std::string& interactionSource);
void initInteractionGroups(const CustomNonbondedForce& force, const std::string& interactionSource, const std::vector<std::string>& tableTypes);
CudaContext& cu;
CudaParameterSet* params;
CudaArray* globals;
......
......@@ -58,35 +58,35 @@ public:
* This is the name of the parameter for selecting which CUDA device or devices to use.
*/
static const std::string& CudaDeviceIndex() {
static const std::string key = "CudaDeviceIndex";
static const std::string key = "DeviceIndex";
return key;
}
/**
* This is the name of the parameter that reports the CUDA device or devices being used.
*/
static const std::string& CudaDeviceName() {
static const std::string key = "CudaDeviceName";
static const std::string key = "DeviceName";
return key;
}
/**
* This is the name of the parameter for selecting whether CUDA should sync or spin loop while waiting for results.
*/
static const std::string& CudaUseBlockingSync() {
static const std::string key = "CudaUseBlockingSync";
static const std::string key = "UseBlockingSync";
return key;
}
/**
* This is the name of the parameter for selecting what numerical precision to use.
*/
static const std::string& CudaPrecision() {
static const std::string key = "CudaPrecision";
static const std::string key = "Precision";
return key;
}
/**
* This is the name of the parameter for selecting whether to use the CPU based PME calculation.
*/
static const std::string& CudaUseCpuPme() {
static const std::string key = "CudaUseCpuPme";
static const std::string key = "UseCpuPme";
return key;
}
/**
......@@ -107,14 +107,21 @@ public:
* This is the name of the parameter for specifying the path to the directory for creating temporary files.
*/
static const std::string& CudaTempDirectory() {
static const std::string key = "CudaTempDirectory";
static const std::string key = "TempDirectory";
return key;
}
/**
* This is the name of the parameter for selecting whether to disable use of a separate stream for PME.
*/
static const std::string& CudaDisablePmeStream() {
static const std::string key = "CudaDisablePmeStream";
static const std::string key = "DisablePmeStream";
return key;
}
/**
* This is the name of the parameter for requesting that force computations be fully deterministic.
*/
static const std::string& CudaDeterministicForces() {
static const std::string key = "DeterministicForces";
return key;
}
};
......@@ -123,14 +130,14 @@ class OPENMM_EXPORT_CUDA CudaPlatform::PlatformData {
public:
PlatformData(ContextImpl* context, const System& system, const std::string& deviceIndexProperty, const std::string& blockingProperty, const std::string& precisionProperty,
const std::string& cpuPmeProperty, const std::string& compilerProperty, const std::string& tempProperty, const std::string& hostCompilerProperty,
const std::string& pmeStreamProperty, int numThreads);
const std::string& pmeStreamProperty, const std::string& deterministicForcesProperty, int numThreads);
~PlatformData();
void initializeContexts(const System& system);
void syncContexts();
ContextImpl* context;
std::vector<CudaContext*> contexts;
std::vector<double> contextEnergy;
bool hasInitializedContexts, removeCM, peerAccessSupported, useCpuPme, disablePmeStream;
bool hasInitializedContexts, removeCM, peerAccessSupported, useCpuPme, disablePmeStream, deterministicForces;
int cmMotionFrequency;
int stepCount, computeForceCount;
double time;
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2015 Stanford University and the Authors. *
* Portions copyright (c) 2009-2016 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -106,7 +106,7 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
useMixedPrecision = false;
}
else
throw OpenMMException("Illegal value for CudaPrecision: "+precision);
throw OpenMMException("Illegal value for Precision: "+precision);
char* cacheVariable = getenv("OPENMM_CACHE_DIR");
cacheDir = (cacheVariable == NULL ? tempDir : string(cacheVariable));
#ifdef WIN32
......@@ -121,7 +121,7 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
string errorMessage = "Error initializing Context";
CHECK_RESULT(cuDeviceGetCount(&numDevices));
if (deviceIndex < -1 || deviceIndex >= numDevices)
throw OpenMMException("Illegal value for CudaDeviceIndex: "+intToString(deviceIndex));
throw OpenMMException("Illegal value for DeviceIndex: "+intToString(deviceIndex));
vector<int> devicePrecedence;
if (deviceIndex == -1) {
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2015 Stanford University and the Authors. *
* Portions copyright (c) 2009-2016 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -254,7 +254,7 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
for (int k = 3; k >= 0; k--)
for (int m = 0; m < 4; m++) {
int base = 4*m;
string suffix = suffixes[m];
string suffix = suffixes[k];
out << "derivy[" << m << "] = da*derivy[" << m << "] + (3*c[" << (base+3) << "]" << suffix << "*db + 2*c[" << (base+2) << "]" << suffix << ")*db + c[" << (base+1) << "]" << suffix << ";\n";
}
out << nodeNames[j] << " = derivy[0] + dc*(derivy[1] + dc*(derivy[2] + dc*derivy[3]));\n";
......@@ -271,7 +271,7 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
out << nodeNames[j] << " *= " << paramsFloat[11] << ";\n";
}
else
throw OpenMMException("Unsupported derivative order for Continuous2DFunction");
throw OpenMMException("Unsupported derivative order for Continuous3DFunction");
}
out << "}\n";
}
......
This diff is collapsed.
......@@ -62,6 +62,14 @@ extern "C" OPENMM_EXPORT_CUDA void registerPlatforms() {
#endif
CudaPlatform::CudaPlatform() {
deprecatedPropertyReplacements["CudaDeviceIndex"] = CudaDeviceIndex();
deprecatedPropertyReplacements["CudaDeviceName"] = CudaDeviceName();
deprecatedPropertyReplacements["CudaUseBlockingSync"] = CudaUseBlockingSync();
deprecatedPropertyReplacements["CudaPrecision"] = CudaPrecision();
deprecatedPropertyReplacements["CudaUseCpuPme"] = CudaUseCpuPme();
deprecatedPropertyReplacements["CudaTempDirectory"] = CudaTempDirectory();
deprecatedPropertyReplacements["CudaDisablePmeStream"] = CudaDisablePmeStream();
deprecatedPropertyReplacements["CudaDeterministicForces"] = CudaDeterministicForces();
CudaKernelFactory* factory = new CudaKernelFactory();
registerKernelFactory(CalcForcesAndEnergyKernel::Name(), factory);
registerKernelFactory(UpdateStateDataKernel::Name(), factory);
......@@ -102,12 +110,14 @@ CudaPlatform::CudaPlatform() {
platformProperties.push_back(CudaTempDirectory());
platformProperties.push_back(CudaHostCompiler());
platformProperties.push_back(CudaDisablePmeStream());
platformProperties.push_back(CudaDeterministicForces());
setPropertyDefaultValue(CudaDeviceIndex(), "");
setPropertyDefaultValue(CudaDeviceName(), "");
setPropertyDefaultValue(CudaUseBlockingSync(), "true");
setPropertyDefaultValue(CudaPrecision(), "single");
setPropertyDefaultValue(CudaUseCpuPme(), "false");
setPropertyDefaultValue(CudaDisablePmeStream(), "false");
setPropertyDefaultValue(CudaDeterministicForces(), "false");
#ifdef _MSC_VER
char* bindir = getenv("CUDA_BIN_PATH");
string nvcc = (bindir == NULL ? "nvcc.exe" : string(bindir)+"\\nvcc.exe");
......@@ -142,7 +152,10 @@ bool CudaPlatform::supportsDoublePrecision() const {
const string& CudaPlatform::getPropertyValue(const Context& context, const string& property) const {
const ContextImpl& impl = getContextImpl(context);
const PlatformData* data = reinterpret_cast<const PlatformData*>(impl.getPlatformData());
map<string, string>::const_iterator value = data->propertyValues.find(property);
string propertyName = property;
if (deprecatedPropertyReplacements.find(property) != deprecatedPropertyReplacements.end())
propertyName = deprecatedPropertyReplacements.find(property)->second;
map<string, string>::const_iterator value = data->propertyValues.find(propertyName);
if (value != data->propertyValues.end())
return value->second;
return Platform::getPropertyValue(context, property);
......@@ -168,10 +181,13 @@ void CudaPlatform::contextCreated(ContextImpl& context, const map<string, string
getPropertyDefaultValue(CudaHostCompiler()) : properties.find(CudaHostCompiler())->second);
string pmeStreamPropValue = (properties.find(CudaDisablePmeStream()) == properties.end() ?
getPropertyDefaultValue(CudaDisablePmeStream()) : properties.find(CudaDisablePmeStream())->second);
string deterministicForcesValue = (properties.find(CudaDeterministicForces()) == properties.end() ?
getPropertyDefaultValue(CudaDeterministicForces()) : properties.find(CudaDeterministicForces())->second);
transform(blockingPropValue.begin(), blockingPropValue.end(), blockingPropValue.begin(), ::tolower);
transform(precisionPropValue.begin(), precisionPropValue.end(), precisionPropValue.begin(), ::tolower);
transform(cpuPmePropValue.begin(), cpuPmePropValue.end(), cpuPmePropValue.begin(), ::tolower);
transform(pmeStreamPropValue.begin(), pmeStreamPropValue.end(), pmeStreamPropValue.begin(), ::tolower);
transform(deterministicForcesValue.begin(), deterministicForcesValue.end(), deterministicForcesValue.begin(), ::tolower);
vector<string> pmeKernelName;
pmeKernelName.push_back(CalcPmeReciprocalForceKernel::Name());
if (!supportsKernels(pmeKernelName))
......@@ -180,7 +196,8 @@ void CudaPlatform::contextCreated(ContextImpl& context, const map<string, string
char* threadsEnv = getenv("OPENMM_CPU_THREADS");
if (threadsEnv != NULL)
stringstream(threadsEnv) >> threads;
context.setPlatformData(new PlatformData(&context, context.getSystem(), devicePropValue, blockingPropValue, precisionPropValue, cpuPmePropValue, compilerPropValue, tempPropValue, hostCompilerPropValue, pmeStreamPropValue, threads));
context.setPlatformData(new PlatformData(&context, context.getSystem(), devicePropValue, blockingPropValue, precisionPropValue, cpuPmePropValue, compilerPropValue, tempPropValue,
hostCompilerPropValue, pmeStreamPropValue, deterministicForcesValue, threads));
}
void CudaPlatform::contextDestroyed(ContextImpl& context) const {
......@@ -189,7 +206,8 @@ void CudaPlatform::contextDestroyed(ContextImpl& context) const {
}
CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& system, const string& deviceIndexProperty, const string& blockingProperty, const string& precisionProperty,
const string& cpuPmeProperty, const string& compilerProperty, const string& tempProperty, const string& hostCompilerProperty, const string& pmeStreamProperty, int numThreads) :
const string& cpuPmeProperty, const string& compilerProperty, const string& tempProperty, const string& hostCompilerProperty, const string& pmeStreamProperty,
const string& deterministicForcesProperty, int numThreads) :
context(context), removeCM(false), stepCount(0), computeForceCount(0), time(0.0), hasInitializedContexts(false), threads(numThreads) {
bool blocking = (blockingProperty == "true");
vector<string> devices;
......@@ -230,6 +248,7 @@ CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& sys
}
useCpuPme = (cpuPmeProperty == "true" && !contexts[0]->getUseDoublePrecision());
disablePmeStream = (pmeStreamProperty == "true");
deterministicForces = (deterministicForcesProperty == "true");
propertyValues[CudaPlatform::CudaDeviceIndex()] = deviceIndex.str();
propertyValues[CudaPlatform::CudaDeviceName()] = deviceName.str();
propertyValues[CudaPlatform::CudaUseBlockingSync()] = blocking ? "true" : "false";
......@@ -239,6 +258,7 @@ CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& sys
propertyValues[CudaPlatform::CudaTempDirectory()] = tempProperty;
propertyValues[CudaPlatform::CudaHostCompiler()] = hostCompilerProperty;
propertyValues[CudaPlatform::CudaDisablePmeStream()] = disablePmeStream ? "true" : "false";
propertyValues[CudaPlatform::CudaDeterministicForces()] = deterministicForces ? "true" : "false";
contextEnergy.resize(contexts.size());
// Determine whether peer-to-peer copying is supported, and enable it if so.
......
real3 v0 = make_real3(pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z);
real3 v1 = make_real3(pos2.x-pos3.x, pos2.y-pos3.y, pos2.z-pos3.z);
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA(v0)
APPLY_PERIODIC_TO_DELTA(v1)
#endif
real3 cp = cross(v0, v1);
real rp = cp.x*cp.x + cp.y*cp.y + cp.z*cp.z;
rp = max(SQRT(rp), (real) 1.0e-06f);
......
real3 delta = make_real3(pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z);
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r = SQRT(delta.x*delta.x + delta.y*delta.y + delta.z*delta.z);
COMPUTE_FORCE
dEdR = (r > 0) ? (dEdR / r) : 0;
......
......@@ -5,6 +5,11 @@ const real PI = (real) 3.14159265358979323846;
real3 v0a = make_real3(pos1.x-pos2.x, pos1.y-pos2.y, pos1.z-pos2.z);
real3 v1a = make_real3(pos3.x-pos2.x, pos3.y-pos2.y, pos3.z-pos2.z);
real3 v2a = make_real3(pos3.x-pos4.x, pos3.y-pos4.y, pos3.z-pos4.z);
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA(v0a)
APPLY_PERIODIC_TO_DELTA(v1a)
APPLY_PERIODIC_TO_DELTA(v2a)
#endif
real3 cp0a = cross(v0a, v1a);
real3 cp1a = cross(v1a, v2a);
real cosangle = dot(normalize(cp0a), normalize(cp1a));
......@@ -28,6 +33,11 @@ angleA = fmod(angleA+2.0f*PI, 2.0f*PI);
real3 v0b = make_real3(pos5.x-pos6.x, pos5.y-pos6.y, pos5.z-pos6.z);
real3 v1b = make_real3(pos7.x-pos6.x, pos7.y-pos6.y, pos7.z-pos6.z);
real3 v2b = make_real3(pos7.x-pos8.x, pos7.y-pos8.y, pos7.z-pos8.z);
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA(v0b)
APPLY_PERIODIC_TO_DELTA(v1b)
APPLY_PERIODIC_TO_DELTA(v2b)
#endif
real3 cp0b = cross(v0b, v1b);
real3 cp1b = cross(v1b, v2b);
cosangle = dot(normalize(cp0b), normalize(cp1b));
......
......@@ -66,8 +66,11 @@ inline __device__ real3 trim(real4 v) {
/**
* Compute the difference between two vectors, setting the fourth component to the squared magnitude.
*/
inline __device__ real4 delta(real4 vec1, real4 vec2) {
inline __device__ real4 delta(real4 vec1, real4 vec2, bool periodic, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ) {
real4 result = make_real4(vec1.x-vec2.x, vec1.y-vec2.y, vec1.z-vec2.z, 0);
if (periodic)
APPLY_PERIODIC_TO_DELTA(result);
result.w = result.x*result.x + result.y*result.y + result.z*result.z;
return result;
}
......@@ -105,7 +108,7 @@ inline __device__ real4 computeCross(real4 vec1, real4 vec2) {
* Compute the forces on groups based on the bonds.
*/
extern "C" __global__ void computeGroupForces(unsigned long long* __restrict__ groupForce, mixed* __restrict__ energyBuffer, const real4* __restrict__ centerPositions,
const int* __restrict__ bondGroups
const int* __restrict__ bondGroups, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
EXTRA_ARGS) {
mixed energy = 0;
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < NUM_BONDS; index += blockDim.x*gridDim.x) {
......
......@@ -8,8 +8,11 @@ inline __device__ real3 ccb_trim(real4 v) {
/**
* Compute the difference between two vectors, setting the fourth component to the squared magnitude.
*/
inline __device__ real4 ccb_delta(real4 vec1, real4 vec2) {
inline __device__ real4 ccb_delta(real4 vec1, real4 vec2, bool periodic, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ) {
real4 result = make_real4(vec1.x-vec2.x, vec1.y-vec2.y, vec1.z-vec2.z, 0);
if (periodic)
APPLY_PERIODIC_TO_DELTA(result);
result.w = result.x*result.x + result.y*result.y + result.z*result.z;
return result;
}
......
......@@ -59,7 +59,7 @@ inline __device__ real4 computeCross(real4 vec1, real4 vec2) {
/**
* Determine whether a particular interaction is in the list of exclusions.
*/
inline __device__ bool isInteractionExcluded(int atom1, int atom2, int* __restrict__ exclusions, int* __restrict__ exclusionStartIndex) {
inline __device__ bool isInteractionExcluded(int atom1, int atom2, const int* __restrict__ exclusions, const int* __restrict__ exclusionStartIndex) {
int first = exclusionStartIndex[atom1];
int last = exclusionStartIndex[atom1+1];
for (int i = last-1; i >= first; i--) {
......@@ -180,7 +180,7 @@ extern "C" __global__ void findNeighbors(real4 periodicBoxSize, real4 invPeriodi
const real4* __restrict__ posq, const real4* __restrict__ blockCenter, const real4* __restrict__ blockBoundingBox, int2* __restrict__ neighborPairs,
int* __restrict__ numNeighborPairs, int* __restrict__ numNeighborsForAtom, int maxNeighborPairs
#ifdef USE_EXCLUSIONS
, int* __restrict__ exclusions, int* __restrict__ exclusionStartIndex
, const int* __restrict__ exclusions, const int* __restrict__ exclusionStartIndex
#endif
) {
__shared__ real3 positionCache[FIND_NEIGHBORS_WORKGROUP_SIZE];
......@@ -265,7 +265,8 @@ extern "C" __global__ void findNeighbors(real4 periodicBoxSize, real4 invPeriodi
}
}
}
numNeighborsForAtom[atom1] = totalNeighborsForAtom1;
if (atom1 < NUM_ATOMS)
numNeighborsForAtom[atom1] = totalNeighborsForAtom1;
}
}
......@@ -308,6 +309,7 @@ extern "C" __global__ void computeNeighborStartIndices(int* __restrict__ numNeig
numNeighborsForAtom[globalIndex] = 0; // Clear this so the next kernel can use it as a counter
}
globalOffset += posBuffer[blockDim.x-1];
__syncthreads();
}
if (threadIdx.x == 0)
neighborStartIndex[0] = 0;
......
......@@ -83,7 +83,7 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real
#ifdef USE_DOUBLE_PRECISION
unsigned long long * ulonglong_p = (unsigned long long *) originalPmeGrid;
atomicAdd(&ulonglong_p[index], static_cast<unsigned long long>((long long) (add*0x100000000)));
#elif __CUDA_ARCH__ < 200
#elif __CUDA_ARCH__ < 200 || defined(USE_DETERMINISTIC_FORCES)
unsigned long long * ulonglong_p = (unsigned long long *) originalPmeGrid;
int gridIndex = index;
gridIndex = (gridIndex%2 == 0 ? gridIndex/2 : (gridIndex+GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z)/2);
......
......@@ -2,6 +2,11 @@ const real PI = (real) 3.14159265358979323846;
real3 v0 = make_real3(pos1.x-pos2.x, pos1.y-pos2.y, pos1.z-pos2.z);
real3 v1 = make_real3(pos3.x-pos2.x, pos3.y-pos2.y, pos3.z-pos2.z);
real3 v2 = make_real3(pos3.x-pos4.x, pos3.y-pos4.y, pos3.z-pos4.z);
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA(v0)
APPLY_PERIODIC_TO_DELTA(v1)
APPLY_PERIODIC_TO_DELTA(v2)
#endif
real3 cp0 = cross(v0, v1);
real3 cp1 = cross(v1, v2);
real cosangle = dot(normalize(cp0), normalize(cp1));
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2015 Stanford University and the Authors. *
* Portions copyright (c) 2015-2016 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -39,5 +39,5 @@ OpenMM::CudaPlatform platform;
void initializeTests(int argc, char* argv[]) {
if (argc > 1)
platform.setPropertyDefaultValue("CudaPrecision", std::string(argv[1]));
platform.setPropertyDefaultValue("Precision", std::string(argv[1]));
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment