Commit 1ebe88ba authored by Robert McGibbon's avatar Robert McGibbon
Browse files

merge

parents 804cbb22 a37dbc96
...@@ -43,6 +43,7 @@ ...@@ -43,6 +43,7 @@
#include <algorithm> #include <algorithm>
#include <fstream> #include <fstream>
#include <iostream> #include <iostream>
#include <set>
#include <sstream> #include <sstream>
#include <typeinfo> #include <typeinfo>
...@@ -492,13 +493,32 @@ void OpenCLContext::addForce(OpenCLForceInfo* force) { ...@@ -492,13 +493,32 @@ void OpenCLContext::addForce(OpenCLForceInfo* force) {
} }
string OpenCLContext::replaceStrings(const string& input, const std::map<std::string, std::string>& replacements) const { string OpenCLContext::replaceStrings(const string& input, const std::map<std::string, std::string>& replacements) const {
static set<char> symbolChars;
if (symbolChars.size() == 0) {
symbolChars.insert('_');
for (char c = 'a'; c <= 'z'; c++)
symbolChars.insert(c);
for (char c = 'A'; c <= 'Z'; c++)
symbolChars.insert(c);
for (char c = '0'; c <= '9'; c++)
symbolChars.insert(c);
}
string result = input; string result = input;
for (map<string, string>::const_iterator iter = replacements.begin(); iter != replacements.end(); iter++) { for (map<string, string>::const_iterator iter = replacements.begin(); iter != replacements.end(); iter++) {
int index = -1; int index = 0;
int size = iter->first.size();
do { do {
index = result.find(iter->first); index = result.find(iter->first, index);
if (index != result.npos) if (index != result.npos) {
result.replace(index, iter->first.size(), iter->second); if ((index == 0 || symbolChars.find(result[index-1]) == symbolChars.end()) && (index == result.size()-size || symbolChars.find(result[index+size]) == symbolChars.end())) {
// We have found a complete symbol, not part of a longer symbol.
result.replace(index, size, iter->second);
index += iter->second.size();
}
else
index++;
}
} while (index != result.npos); } while (index != result.npos);
} }
return result; return result;
...@@ -1130,7 +1150,7 @@ void OpenCLContext::reorderAtomsImpl() { ...@@ -1130,7 +1150,7 @@ void OpenCLContext::reorderAtomsImpl() {
if (useHilbert) if (useHilbert)
binWidth = (Real) (max(max(maxx-minx, maxy-miny), maxz-minz)/255.0); binWidth = (Real) (max(max(maxx-minx, maxy-miny), maxz-minz)/255.0);
else else
binWidth = (Real) (0.2*nonbonded->getCutoffDistance()); binWidth = (Real) (0.2*nonbonded->getMaxCutoffDistance());
Real invBinWidth = (Real) (1.0/binWidth); Real invBinWidth = (Real) (1.0/binWidth);
int xbins = 1 + (int) ((maxx-minx)*invBinWidth); int xbins = 1 + (int) ((maxx-minx)*invBinWidth);
int ybins = 1 + (int) ((maxy-miny)*invBinWidth); int ybins = 1 + (int) ((maxy-miny)*invBinWidth);
......
...@@ -121,16 +121,13 @@ void OpenCLCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, boo ...@@ -121,16 +121,13 @@ void OpenCLCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, boo
for (vector<OpenCLContext::ForcePreComputation*>::iterator iter = cl.getPreComputations().begin(); iter != cl.getPreComputations().end(); ++iter) for (vector<OpenCLContext::ForcePreComputation*>::iterator iter = cl.getPreComputations().begin(); iter != cl.getPreComputations().end(); ++iter)
(*iter)->computeForceAndEnergy(includeForces, includeEnergy, groups); (*iter)->computeForceAndEnergy(includeForces, includeEnergy, groups);
OpenCLNonbondedUtilities& nb = cl.getNonbondedUtilities(); OpenCLNonbondedUtilities& nb = cl.getNonbondedUtilities();
bool includeNonbonded = ((groups&(1<<nb.getForceGroup())) != 0);
cl.setComputeForceCount(cl.getComputeForceCount()+1); cl.setComputeForceCount(cl.getComputeForceCount()+1);
if (includeNonbonded) nb.prepareInteractions(groups);
nb.prepareInteractions();
} }
double OpenCLCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForces, bool includeEnergy, int groups, bool& valid) { double OpenCLCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bool includeForces, bool includeEnergy, int groups, bool& valid) {
cl.getBondedUtilities().computeInteractions(groups); cl.getBondedUtilities().computeInteractions(groups);
if ((groups&(1<<cl.getNonbondedUtilities().getForceGroup())) != 0) cl.getNonbondedUtilities().computeInteractions(groups);
cl.getNonbondedUtilities().computeInteractions();
double sum = 0.0; double sum = 0.0;
for (vector<OpenCLContext::ForcePostComputation*>::iterator iter = cl.getPostComputations().begin(); iter != cl.getPostComputations().end(); ++iter) for (vector<OpenCLContext::ForcePostComputation*>::iterator iter = cl.getPostComputations().begin(); iter != cl.getPostComputations().end(); ++iter)
sum += (*iter)->computeForceAndEnergy(includeForces, includeEnergy, groups); sum += (*iter)->computeForceAndEnergy(includeForces, includeEnergy, groups);
...@@ -2643,8 +2640,9 @@ void OpenCLCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOB ...@@ -2643,8 +2640,9 @@ void OpenCLCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOB
surfaceAreaFactor = -6.0*4*M_PI*force.getSurfaceAreaEnergy(); surfaceAreaFactor = -6.0*4*M_PI*force.getSurfaceAreaEnergy();
bool useCutoff = (force.getNonbondedMethod() != GBSAOBCForce::NoCutoff); bool useCutoff = (force.getNonbondedMethod() != GBSAOBCForce::NoCutoff);
bool usePeriodic = (force.getNonbondedMethod() != GBSAOBCForce::NoCutoff && force.getNonbondedMethod() != GBSAOBCForce::CutoffNonPeriodic); bool usePeriodic = (force.getNonbondedMethod() != GBSAOBCForce::NoCutoff && force.getNonbondedMethod() != GBSAOBCForce::CutoffNonPeriodic);
cutoff = force.getCutoffDistance();
string source = OpenCLKernelSources::gbsaObc2; string source = OpenCLKernelSources::gbsaObc2;
nb.addInteraction(useCutoff, usePeriodic, false, force.getCutoffDistance(), vector<vector<int> >(), source, force.getForceGroup()); nb.addInteraction(useCutoff, usePeriodic, false, cutoff, vector<vector<int> >(), source, force.getForceGroup());
nb.addParameter(OpenCLNonbondedUtilities::ParameterInfo("obcParams", "float", 2, sizeof(cl_float2), params->getDeviceBuffer()));; nb.addParameter(OpenCLNonbondedUtilities::ParameterInfo("obcParams", "float", 2, sizeof(cl_float2), params->getDeviceBuffer()));;
nb.addParameter(OpenCLNonbondedUtilities::ParameterInfo("bornForce", "real", 1, elementSize, bornForce->getDeviceBuffer()));; nb.addParameter(OpenCLNonbondedUtilities::ParameterInfo("bornForce", "real", 1, elementSize, bornForce->getDeviceBuffer()));;
cl.addForce(new OpenCLGBSAOBCForceInfo(nb.getNumForceBuffers(), force)); cl.addForce(new OpenCLGBSAOBCForceInfo(nb.getNumForceBuffers(), force));
...@@ -2663,8 +2661,8 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF ...@@ -2663,8 +2661,8 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
defines["USE_CUTOFF"] = "1"; defines["USE_CUTOFF"] = "1";
if (nb.getUsePeriodic()) if (nb.getUsePeriodic())
defines["USE_PERIODIC"] = "1"; defines["USE_PERIODIC"] = "1";
defines["CUTOFF_SQUARED"] = cl.doubleToString(nb.getCutoffDistance()*nb.getCutoffDistance()); defines["CUTOFF_SQUARED"] = cl.doubleToString(cutoff*cutoff);
defines["CUTOFF"] = cl.doubleToString(nb.getCutoffDistance()); defines["CUTOFF"] = cl.doubleToString(cutoff);
defines["PREFACTOR"] = cl.doubleToString(prefactor); defines["PREFACTOR"] = cl.doubleToString(prefactor);
defines["SURFACE_AREA_FACTOR"] = cl.doubleToString(surfaceAreaFactor); defines["SURFACE_AREA_FACTOR"] = cl.doubleToString(surfaceAreaFactor);
defines["NUM_ATOMS"] = cl.intToString(cl.getNumAtoms()); defines["NUM_ATOMS"] = cl.intToString(cl.getNumAtoms());
...@@ -2856,6 +2854,7 @@ OpenCLCalcCustomGBForceKernel::~OpenCLCalcCustomGBForceKernel() { ...@@ -2856,6 +2854,7 @@ OpenCLCalcCustomGBForceKernel::~OpenCLCalcCustomGBForceKernel() {
void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const CustomGBForce& force) { void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const CustomGBForce& force) {
if (cl.getPlatformData().contexts.size() > 1) if (cl.getPlatformData().contexts.size() > 1)
throw OpenMMException("CustomGBForce does not support using multiple OpenCL devices"); throw OpenMMException("CustomGBForce does not support using multiple OpenCL devices");
cutoff = force.getCutoffDistance();
bool useExclusionsForValue = false; bool useExclusionsForValue = false;
numComputedValues = force.getNumComputedValues(); numComputedValues = force.getNumComputedValues();
vector<string> computedValueNames(force.getNumComputedValues()); vector<string> computedValueNames(force.getNumComputedValues());
...@@ -3047,7 +3046,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo ...@@ -3047,7 +3046,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
if (useExclusionsForValue) if (useExclusionsForValue)
pairValueDefines["USE_EXCLUSIONS"] = "1"; pairValueDefines["USE_EXCLUSIONS"] = "1";
pairValueDefines["FORCE_WORK_GROUP_SIZE"] = cl.intToString(cl.getNonbondedUtilities().getForceThreadBlockSize()); pairValueDefines["FORCE_WORK_GROUP_SIZE"] = cl.intToString(cl.getNonbondedUtilities().getForceThreadBlockSize());
pairValueDefines["CUTOFF_SQUARED"] = cl.doubleToString(force.getCutoffDistance()*force.getCutoffDistance()); pairValueDefines["CUTOFF_SQUARED"] = cl.doubleToString(cutoff*cutoff);
pairValueDefines["NUM_ATOMS"] = cl.intToString(cl.getNumAtoms()); pairValueDefines["NUM_ATOMS"] = cl.intToString(cl.getNumAtoms());
pairValueDefines["PADDED_NUM_ATOMS"] = cl.intToString(cl.getPaddedNumAtoms()); pairValueDefines["PADDED_NUM_ATOMS"] = cl.intToString(cl.getPaddedNumAtoms());
pairValueDefines["NUM_BLOCKS"] = cl.intToString(cl.getNumAtomBlocks()); pairValueDefines["NUM_BLOCKS"] = cl.intToString(cl.getNumAtomBlocks());
...@@ -3240,7 +3239,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo ...@@ -3240,7 +3239,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
if (anyExclusions) if (anyExclusions)
pairEnergyDefines["USE_EXCLUSIONS"] = "1"; pairEnergyDefines["USE_EXCLUSIONS"] = "1";
pairEnergyDefines["FORCE_WORK_GROUP_SIZE"] = cl.intToString(cl.getNonbondedUtilities().getForceThreadBlockSize()); pairEnergyDefines["FORCE_WORK_GROUP_SIZE"] = cl.intToString(cl.getNonbondedUtilities().getForceThreadBlockSize());
pairEnergyDefines["CUTOFF_SQUARED"] = cl.doubleToString(force.getCutoffDistance()*force.getCutoffDistance()); pairEnergyDefines["CUTOFF_SQUARED"] = cl.doubleToString(cutoff*cutoff);
pairEnergyDefines["NUM_ATOMS"] = cl.intToString(cl.getNumAtoms()); pairEnergyDefines["NUM_ATOMS"] = cl.intToString(cl.getNumAtoms());
pairEnergyDefines["PADDED_NUM_ATOMS"] = cl.intToString(cl.getPaddedNumAtoms()); pairEnergyDefines["PADDED_NUM_ATOMS"] = cl.intToString(cl.getPaddedNumAtoms());
pairEnergyDefines["NUM_BLOCKS"] = cl.intToString(cl.getNumAtomBlocks()); pairEnergyDefines["NUM_BLOCKS"] = cl.intToString(cl.getNumAtomBlocks());
...@@ -3492,7 +3491,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo ...@@ -3492,7 +3491,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
globals->upload(globalParamValues); globals->upload(globalParamValues);
arguments.push_back(OpenCLNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(cl_float), globals->getDeviceBuffer())); arguments.push_back(OpenCLNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(cl_float), globals->getDeviceBuffer()));
} }
cl.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, force.getNumExclusions() > 0, force.getCutoffDistance(), exclusionList, source, force.getForceGroup()); cl.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, force.getNumExclusions() > 0, cutoff, exclusionList, source, force.getForceGroup());
for (int i = 0; i < (int) parameters.size(); i++) for (int i = 0; i < (int) parameters.size(); i++)
cl.getNonbondedUtilities().addParameter(parameters[i]); cl.getNonbondedUtilities().addParameter(parameters[i]);
for (int i = 0; i < (int) arguments.size(); i++) for (int i = 0; i < (int) arguments.size(); i++)
...@@ -3527,7 +3526,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include ...@@ -3527,7 +3526,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
int endExclusionIndex = (cl.getContextIndex()+1)*numExclusionTiles/numContexts; int endExclusionIndex = (cl.getContextIndex()+1)*numExclusionTiles/numContexts;
pairValueDefines["FIRST_EXCLUSION_TILE"] = cl.intToString(startExclusionIndex); pairValueDefines["FIRST_EXCLUSION_TILE"] = cl.intToString(startExclusionIndex);
pairValueDefines["LAST_EXCLUSION_TILE"] = cl.intToString(endExclusionIndex); pairValueDefines["LAST_EXCLUSION_TILE"] = cl.intToString(endExclusionIndex);
pairValueDefines["CUTOFF"] = cl.doubleToString(nb.getCutoffDistance()); pairValueDefines["CUTOFF"] = cl.doubleToString(cutoff);
cl::Program program = cl.createProgram(pairValueSrc, pairValueDefines); cl::Program program = cl.createProgram(pairValueSrc, pairValueDefines);
pairValueKernel = cl::Kernel(program, "computeN2Value"); pairValueKernel = cl::Kernel(program, "computeN2Value");
pairValueSrc = ""; pairValueSrc = "";
...@@ -3541,7 +3540,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include ...@@ -3541,7 +3540,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
int endExclusionIndex = (cl.getContextIndex()+1)*numExclusionTiles/numContexts; int endExclusionIndex = (cl.getContextIndex()+1)*numExclusionTiles/numContexts;
pairEnergyDefines["FIRST_EXCLUSION_TILE"] = cl.intToString(startExclusionIndex); pairEnergyDefines["FIRST_EXCLUSION_TILE"] = cl.intToString(startExclusionIndex);
pairEnergyDefines["LAST_EXCLUSION_TILE"] = cl.intToString(endExclusionIndex); pairEnergyDefines["LAST_EXCLUSION_TILE"] = cl.intToString(endExclusionIndex);
pairEnergyDefines["CUTOFF"] = cl.doubleToString(nb.getCutoffDistance()); pairEnergyDefines["CUTOFF"] = cl.doubleToString(cutoff);
cl::Program program = cl.createProgram(pairEnergySrc, pairEnergyDefines); cl::Program program = cl.createProgram(pairEnergySrc, pairEnergyDefines);
pairEnergyKernel = cl::Kernel(program, "computeN2Energy"); pairEnergyKernel = cl::Kernel(program, "computeN2Energy");
pairEnergySrc = ""; pairEnergySrc = "";
......
...@@ -220,9 +220,9 @@ __kernel void computeNonbonded( ...@@ -220,9 +220,9 @@ __kernel void computeNonbonded(
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
x = tiles[pos]; x = tiles[pos];
real4 blockSizeX = blockSize[x]; real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF && singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= MAX_CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= MAX_CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF); 0.5f*periodicBoxSize.z-blockSizeX.z >= MAX_CUTOFF);
} }
else else
#endif #endif
......
...@@ -973,6 +973,62 @@ void testInteractionGroupLongRangeCorrection() { ...@@ -973,6 +973,62 @@ void testInteractionGroupLongRangeCorrection() {
ASSERT_EQUAL_TOL(expected, energy2-energy1, 1e-4); ASSERT_EQUAL_TOL(expected, energy2-energy1, 1e-4);
} }
void testMultipleCutoffs() {
System system;
system.addParticle(1.0);
system.addParticle(1.0);
VerletIntegrator integrator(0.01);
// Add multiple nonbonded forces that have different cutoffs.
CustomNonbondedForce* nonbonded1 = new CustomNonbondedForce("2*r");
nonbonded1->addParticle(vector<double>());
nonbonded1->addParticle(vector<double>());
nonbonded1->setNonbondedMethod(CustomNonbondedForce::CutoffNonPeriodic);
nonbonded1->setCutoffDistance(2.5);
system.addForce(nonbonded1);
CustomNonbondedForce* nonbonded2 = new CustomNonbondedForce("3*r");
nonbonded2->addParticle(vector<double>());
nonbonded2->addParticle(vector<double>());
nonbonded2->setNonbondedMethod(CustomNonbondedForce::CutoffNonPeriodic);
nonbonded2->setCutoffDistance(2.9);
nonbonded2->setForceGroup(1);
system.addForce(nonbonded2);
Context context(system, integrator, platform);
vector<Vec3> positions(2);
positions[0] = Vec3(0, 0, 0);
positions[1] = Vec3(0, 0, 0);
for (double r = 2.4; r < 3.2; r += 0.2) {
positions[1][1] = r;
context.setPositions(positions);
double e1 = (r < 2.5 ? 2.0*r : 0.0);
double e2 = (r < 2.9 ? 3.0*r : 0.0);
double f1 = (r < 2.5 ? 2.0 : 0.0);
double f2 = (r < 2.9 ? 3.0 : 0.0);
// Check the first force.
State state = context.getState(State::Forces | State::Energy, false, 1);
ASSERT_EQUAL_VEC(Vec3(0, f1, 0), state.getForces()[0], TOL);
ASSERT_EQUAL_VEC(Vec3(0, -f1, 0), state.getForces()[1], TOL);
ASSERT_EQUAL_TOL(e1, state.getPotentialEnergy(), TOL);
// Check the second force.
state = context.getState(State::Forces | State::Energy, false, 2);
ASSERT_EQUAL_VEC(Vec3(0, f2, 0), state.getForces()[0], TOL);
ASSERT_EQUAL_VEC(Vec3(0, -f2, 0), state.getForces()[1], TOL);
ASSERT_EQUAL_TOL(e2, state.getPotentialEnergy(), TOL);
// Check the sum of both forces.
state = context.getState(State::Forces | State::Energy);
ASSERT_EQUAL_VEC(Vec3(0, f1+f2, 0), state.getForces()[0], TOL);
ASSERT_EQUAL_VEC(Vec3(0, -f1-f2, 0), state.getForces()[1], TOL);
ASSERT_EQUAL_TOL(e1+e2, state.getPotentialEnergy(), TOL);
}
}
int main(int argc, char* argv[]) { int main(int argc, char* argv[]) {
try { try {
if (argc > 1) if (argc > 1)
...@@ -997,6 +1053,7 @@ int main(int argc, char* argv[]) { ...@@ -997,6 +1053,7 @@ int main(int argc, char* argv[]) {
testInteractionGroups(); testInteractionGroups();
testLargeInteractionGroup(); testLargeInteractionGroup();
testInteractionGroupLongRangeCorrection(); testInteractionGroupLongRangeCorrection();
testMultipleCutoffs();
} }
catch(const exception& e) { catch(const exception& e) {
cout << "exception: " << e.what() << endl; cout << "exception: " << e.what() << endl;
......
...@@ -33,21 +33,19 @@ ...@@ -33,21 +33,19 @@
#include "openmm/Vec3.h" #include "openmm/Vec3.h"
#include "openmm/internal/ThreadPool.h" #include "openmm/internal/ThreadPool.h"
#include <map> #include <map>
#include <utility>
using std::map;
using std::pair;
using std::vector;
using std::set;
using namespace OpenMM; using namespace OpenMM;
using namespace std;
// This class extracts columns from the inverse matrix one at a time. It is done in parallel, // This class extracts columns from the inverse matrix one at a time. It is done in parallel,
// since this can be very slow. // since this can be very slow.
class ExtractMatrixTask : public ThreadPool::Task { class ExtractMatrixTask : public ThreadPool::Task {
public: public:
ExtractMatrixTask(int numConstraints, vector<vector<pair<int, RealOpenMM> > >& matrix, const vector<RealOpenMM>& distance, RealOpenMM elementCutoff, ExtractMatrixTask(int numConstraints, vector<vector<pair<int, RealOpenMM> > >& transposedMatrix, const vector<RealOpenMM>& distance, RealOpenMM elementCutoff,
const int* qRowStart, const int* qColIndex, const int* rRowStart, const int* rColIndex, const double* qValue, const double* rValue) : const int* qRowStart, const int* qColIndex, const int* rRowStart, const int* rColIndex, const double* qValue, const double* rValue) :
numConstraints(numConstraints), matrix(matrix), distance(distance), elementCutoff(elementCutoff), qRowStart(qRowStart), qColIndex(qColIndex), numConstraints(numConstraints), transposedMatrix(transposedMatrix), distance(distance), elementCutoff(elementCutoff), qRowStart(qRowStart), qColIndex(qColIndex),
rRowStart(rRowStart), rColIndex(rColIndex), qValue(qValue), rValue(rValue) { rRowStart(rRowStart), rColIndex(rColIndex), qValue(qValue), rValue(rValue) {
} }
...@@ -61,15 +59,15 @@ public: ...@@ -61,15 +59,15 @@ public:
QUERN_multiply_with_q_transpose(numConstraints, qRowStart, qColIndex, qValue, &rhs[0]); QUERN_multiply_with_q_transpose(numConstraints, qRowStart, qColIndex, qValue, &rhs[0]);
QUERN_solve_with_r(numConstraints, rRowStart, rColIndex, rValue, &rhs[0], &rhs[0]); QUERN_solve_with_r(numConstraints, rRowStart, rColIndex, rValue, &rhs[0], &rhs[0]);
for (int j = 0; j < numConstraints; j++) { for (int j = 0; j < numConstraints; j++) {
double value = rhs[j]*distance[j]/distance[i]; double value = rhs[j]*distance[i]/distance[j];
if (FABS((RealOpenMM) value) > elementCutoff) if (FABS((RealOpenMM) value) > elementCutoff)
matrix[i].push_back(pair<int, RealOpenMM>(j, (RealOpenMM) value)); transposedMatrix[i].push_back(pair<int, RealOpenMM>(j, (RealOpenMM) value));
} }
} }
} }
private: private:
int numConstraints; int numConstraints;
vector<vector<pair<int, RealOpenMM> > >& matrix; vector<vector<pair<int, RealOpenMM> > >& transposedMatrix;
const vector<RealOpenMM>& distance; const vector<RealOpenMM>& distance;
RealOpenMM elementCutoff; RealOpenMM elementCutoff;
const int *qRowStart, *qColIndex, *rRowStart, *rColIndex; const int *qRowStart, *qColIndex, *rRowStart, *rColIndex;
...@@ -194,12 +192,21 @@ ReferenceCCMAAlgorithm::ReferenceCCMAAlgorithm(int numberOfAtoms, ...@@ -194,12 +192,21 @@ ReferenceCCMAAlgorithm::ReferenceCCMAAlgorithm(int numberOfAtoms,
double *qValue, *rValue; double *qValue, *rValue;
QUERN_compute_qr(numberOfConstraints, numberOfConstraints, &matrixRowStart[0], &matrixColIndex[0], &matrixValue[0], NULL, QUERN_compute_qr(numberOfConstraints, numberOfConstraints, &matrixRowStart[0], &matrixColIndex[0], &matrixValue[0], NULL,
&qRowStart, &qColIndex, &qValue, &rRowStart, &rColIndex, &rValue); &qRowStart, &qColIndex, &qValue, &rRowStart, &rColIndex, &rValue);
vector<double> rhs(numberOfConstraints); vector<vector<pair<int, RealOpenMM> > > transposedMatrix(numberOfConstraints);
_matrix.resize(numberOfConstraints); _matrix.resize(numberOfConstraints);
ThreadPool threads; ThreadPool threads;
ExtractMatrixTask task(numberOfConstraints, _matrix, _distance, _elementCutoff, qRowStart, qColIndex, rRowStart, rColIndex, qValue, rValue); ExtractMatrixTask task(numberOfConstraints, transposedMatrix, _distance, _elementCutoff, qRowStart, qColIndex, rRowStart, rColIndex, qValue, rValue);
threads.execute(task); threads.execute(task);
threads.waitForThreads(); threads.waitForThreads();
// For purposes of thread safety we extracted the matrix in transposed form, so we need to transpose it again.
for (int i = 0; i < numberOfConstraints; i++) {
for (int j = 0; j < transposedMatrix[i].size(); j++) {
pair<int, RealOpenMM> value = transposedMatrix[i][j];
_matrix[value.first].push_back(make_pair(i, value.second));
}
}
QUERN_free_result(qRowStart, qColIndex, qValue); QUERN_free_result(qRowStart, qColIndex, qValue);
QUERN_free_result(rRowStart, rColIndex, rValue); QUERN_free_result(rRowStart, rColIndex, rValue);
} }
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2008-2014 Stanford University and the Authors. * * Portions copyright (c) 2008-2015 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -906,6 +906,62 @@ void testInteractionGroupLongRangeCorrection() { ...@@ -906,6 +906,62 @@ void testInteractionGroupLongRangeCorrection() {
ASSERT_EQUAL_TOL(expected, energy2-energy1, 1e-4); ASSERT_EQUAL_TOL(expected, energy2-energy1, 1e-4);
} }
void testMultipleCutoffs() {
System system;
system.addParticle(1.0);
system.addParticle(1.0);
VerletIntegrator integrator(0.01);
// Add multiple nonbonded forces that have different cutoffs.
CustomNonbondedForce* nonbonded1 = new CustomNonbondedForce("2*r");
nonbonded1->addParticle(vector<double>());
nonbonded1->addParticle(vector<double>());
nonbonded1->setNonbondedMethod(CustomNonbondedForce::CutoffNonPeriodic);
nonbonded1->setCutoffDistance(2.5);
system.addForce(nonbonded1);
CustomNonbondedForce* nonbonded2 = new CustomNonbondedForce("3*r");
nonbonded2->addParticle(vector<double>());
nonbonded2->addParticle(vector<double>());
nonbonded2->setNonbondedMethod(CustomNonbondedForce::CutoffNonPeriodic);
nonbonded2->setCutoffDistance(2.9);
nonbonded2->setForceGroup(1);
system.addForce(nonbonded2);
Context context(system, integrator, platform);
vector<Vec3> positions(2);
positions[0] = Vec3(0, 0, 0);
positions[1] = Vec3(0, 0, 0);
for (double r = 2.4; r < 3.2; r += 0.2) {
positions[1][1] = r;
context.setPositions(positions);
double e1 = (r < 2.5 ? 2.0*r : 0.0);
double e2 = (r < 2.9 ? 3.0*r : 0.0);
double f1 = (r < 2.5 ? 2.0 : 0.0);
double f2 = (r < 2.9 ? 3.0 : 0.0);
// Check the first force.
State state = context.getState(State::Forces | State::Energy, false, 1);
ASSERT_EQUAL_VEC(Vec3(0, f1, 0), state.getForces()[0], TOL);
ASSERT_EQUAL_VEC(Vec3(0, -f1, 0), state.getForces()[1], TOL);
ASSERT_EQUAL_TOL(e1, state.getPotentialEnergy(), TOL);
// Check the second force.
state = context.getState(State::Forces | State::Energy, false, 2);
ASSERT_EQUAL_VEC(Vec3(0, f2, 0), state.getForces()[0], TOL);
ASSERT_EQUAL_VEC(Vec3(0, -f2, 0), state.getForces()[1], TOL);
ASSERT_EQUAL_TOL(e2, state.getPotentialEnergy(), TOL);
// Check the sum of both forces.
state = context.getState(State::Forces | State::Energy);
ASSERT_EQUAL_VEC(Vec3(0, f1+f2, 0), state.getForces()[0], TOL);
ASSERT_EQUAL_VEC(Vec3(0, -f1-f2, 0), state.getForces()[1], TOL);
ASSERT_EQUAL_TOL(e1+e2, state.getPotentialEnergy(), TOL);
}
}
int main() { int main() {
try { try {
testSimpleExpression(); testSimpleExpression();
...@@ -926,6 +982,7 @@ int main() { ...@@ -926,6 +982,7 @@ int main() {
testInteractionGroups(); testInteractionGroups();
testLargeInteractionGroup(); testLargeInteractionGroup();
testInteractionGroupLongRangeCorrection(); testInteractionGroupLongRangeCorrection();
testMultipleCutoffs();
} }
catch(const exception& e) { catch(const exception& e) {
cout << "exception: " << e.what() << endl; cout << "exception: " << e.what() << endl;
......
...@@ -2407,7 +2407,7 @@ void CudaCalcAmoebaVdwForceKernel::initialize(const System& system, const Amoeba ...@@ -2407,7 +2407,7 @@ void CudaCalcAmoebaVdwForceKernel::initialize(const System& system, const Amoeba
replacements["TAPER_C5"] = cu.doubleToString(6/pow(taperCutoff-cutoff, 5.0)); replacements["TAPER_C5"] = cu.doubleToString(6/pow(taperCutoff-cutoff, 5.0));
bool useCutoff = (force.getNonbondedMethod() != AmoebaVdwForce::NoCutoff); bool useCutoff = (force.getNonbondedMethod() != AmoebaVdwForce::NoCutoff);
nonbonded->addInteraction(useCutoff, useCutoff, true, force.getCutoff(), exclusions, nonbonded->addInteraction(useCutoff, useCutoff, true, force.getCutoff(), exclusions,
cu.replaceStrings(CudaAmoebaKernelSources::amoebaVdwForce2, replacements), force.getForceGroup()); cu.replaceStrings(CudaAmoebaKernelSources::amoebaVdwForce2, replacements), 0);
// Create the other kernels. // Create the other kernels.
...@@ -2429,8 +2429,8 @@ double CudaCalcAmoebaVdwForceKernel::execute(ContextImpl& context, bool includeF ...@@ -2429,8 +2429,8 @@ double CudaCalcAmoebaVdwForceKernel::execute(ContextImpl& context, bool includeF
void* prepareArgs[] = {&cu.getForce().getDevicePointer(), &cu.getPosq().getDevicePointer(), &tempPosq->getDevicePointer(), void* prepareArgs[] = {&cu.getForce().getDevicePointer(), &cu.getPosq().getDevicePointer(), &tempPosq->getDevicePointer(),
&bondReductionAtoms->getDevicePointer(), &bondReductionFactors->getDevicePointer()}; &bondReductionAtoms->getDevicePointer(), &bondReductionFactors->getDevicePointer()};
cu.executeKernel(prepareKernel, prepareArgs, cu.getPaddedNumAtoms()); cu.executeKernel(prepareKernel, prepareArgs, cu.getPaddedNumAtoms());
nonbonded->prepareInteractions(); nonbonded->prepareInteractions(1);
nonbonded->computeInteractions(); nonbonded->computeInteractions(1);
void* spreadArgs[] = {&cu.getForce().getDevicePointer(), &tempForces->getDevicePointer(), &bondReductionAtoms->getDevicePointer(), &bondReductionFactors->getDevicePointer()}; void* spreadArgs[] = {&cu.getForce().getDevicePointer(), &tempForces->getDevicePointer(), &bondReductionAtoms->getDevicePointer(), &bondReductionFactors->getDevicePointer()};
cu.executeKernel(spreadKernel, spreadArgs, cu.getPaddedNumAtoms()); cu.executeKernel(spreadKernel, spreadArgs, cu.getPaddedNumAtoms());
tempPosq->copyTo(cu.getPosq()); tempPosq->copyTo(cu.getPosq());
...@@ -2534,7 +2534,7 @@ void CudaCalcAmoebaWcaDispersionForceKernel::initialize(const System& system, co ...@@ -2534,7 +2534,7 @@ void CudaCalcAmoebaWcaDispersionForceKernel::initialize(const System& system, co
// just so that CudaNonbondedUtilities will keep track of the tiles. // just so that CudaNonbondedUtilities will keep track of the tiles.
vector<vector<int> > exclusions; vector<vector<int> > exclusions;
cu.getNonbondedUtilities().addInteraction(false, false, false, cu.getNonbondedUtilities().getCutoffDistance(), exclusions, "", force.getForceGroup()); cu.getNonbondedUtilities().addInteraction(false, false, false, 1.0, exclusions, "", force.getForceGroup());
cu.addForce(new ForceInfo(force)); cu.addForce(new ForceInfo(force));
} }
......
...@@ -18,6 +18,9 @@ FOREACH(TEST_PROG ${TEST_PROGS}) ...@@ -18,6 +18,9 @@ FOREACH(TEST_PROG ${TEST_PROGS})
IF ((${TEST_ROOT} MATCHES TestVectorize) AND NOT (MSVC OR ANDROID OR PNACL)) IF ((${TEST_ROOT} MATCHES TestVectorize) AND NOT (MSVC OR ANDROID OR PNACL))
SET(EXTRA_TEST_FLAGS "${EXTRA_COMPILE_FLAGS} -msse4.1") SET(EXTRA_TEST_FLAGS "${EXTRA_COMPILE_FLAGS} -msse4.1")
ENDIF ((${TEST_ROOT} MATCHES TestVectorize) AND NOT (MSVC OR ANDROID OR PNACL)) ENDIF ((${TEST_ROOT} MATCHES TestVectorize) AND NOT (MSVC OR ANDROID OR PNACL))
IF ((${TEST_ROOT} MATCHES TestVectorize8) AND NOT (MSVC OR ANDROID OR PNACL))
SET(EXTRA_TEST_FLAGS "${EXTRA_COMPILE_FLAGS} -mavx")
ENDIF ((${TEST_ROOT} MATCHES TestVectorize8) AND NOT (MSVC OR ANDROID OR PNACL))
SET_TARGET_PROPERTIES(${TEST_ROOT} PROPERTIES LINK_FLAGS "${EXTRA_TEST_FLAGS}" COMPILE_FLAGS "${EXTRA_TEST_FLAGS}") SET_TARGET_PROPERTIES(${TEST_ROOT} PROPERTIES LINK_FLAGS "${EXTRA_TEST_FLAGS}" COMPILE_FLAGS "${EXTRA_TEST_FLAGS}")
ADD_TEST(${TEST_ROOT} ${EXECUTABLE_OUTPUT_PATH}/${TEST_ROOT}) ADD_TEST(${TEST_ROOT} ${EXECUTABLE_OUTPUT_PATH}/${TEST_ROOT})
ENDFOREACH(TEST_PROG ${TEST_PROGS}) ENDFOREACH(TEST_PROG ${TEST_PROGS})
......
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2014-2015 Stanford University and the Authors. *
* Authors: Robert T. McGibbon *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This tests vectorized operations.
*/
#include "openmm/internal/AssertionUtilities.h"
#include "openmm/internal/vectorize8.h"
#include <iostream>
#ifndef __AVX__
bool isVec8Supported() {
return false;
}
#else
/**
* Check whether 8 component vectors are supported with the current CPU.
*/
bool isVec8Supported() {
// Make sure the CPU supports AVX.
int cpuInfo[4];
cpuid(cpuInfo, 0);
if (cpuInfo[0] >= 1) {
cpuid(cpuInfo, 1);
return ((cpuInfo[2] & ((int) 1 << 28)) != 0);
}
return false;
}
#endif
using namespace OpenMM;
using namespace std;
#define ASSERT_VEC4_EQUAL(found, expected0, expected1, expected2, expected3) {if (std::abs((found)[0]-(expected0))>1e-6 || std::abs((found)[1]-(expected1))>1e-6 || std::abs((found)[2]-(expected2))>1e-6 || std::abs((found)[3]-(expected3))>1e-6) {std::stringstream details; details << " Expected ("<<(expected0)<<","<<(expected1)<<","<<(expected2)<<","<<(expected3)<<"), found ("<<(found)[0]<<","<<(found)[1]<<","<<(found)[2]<<","<<(found)[3]<<")"; throwException(__FILE__, __LINE__, details.str());}};
#define ASSERT_VEC8_EQUAL(found, expected0, expected1, expected2, expected3, expected4, expected5, expected6, expected7) {if (std::abs((found).lowerVec()[0]-(expected0))>1e-6 || std::abs((found).lowerVec()[1]-(expected1))>1e-6 || std::abs((found).lowerVec()[2]-(expected2))>1e-6 || std::abs((found).lowerVec()[3]-(expected3))>1e-6 || std::abs((found).upperVec()[0]-(expected4))>1e-6 || std::abs((found).upperVec()[1]-(expected5))>1e-6 || std::abs((found).upperVec()[2]-(expected6))>1e-6 || std::abs((found).upperVec()[3]-(expected7))>1e-6) {std::stringstream details; details << " Expected ("<<(expected0)<<","<<(expected1)<<","<<(expected2)<<","<<(expected3)<<","<<(expected4)<<","<<(expected5)<<","<<(expected6)<<","<<(expected7)<<"), found ("<<(found).lowerVec()[0]<<","<<(found).lowerVec()[1]<<","<<(found).lowerVec()[2]<<","<<(found).lowerVec()[3]<<","<<(found).upperVec()[0]<<","<<(found).upperVec()[1]<<","<<(found).upperVec()[2]<<","<<(found).upperVec()[3]<<")"; throwException(__FILE__, __LINE__, details.str());}};
#define ASSERT_VEC8_EQUAL_INT(found, expected0, expected1, expected2, expected3, expected4, expected5, expected6, expected7) {if ((found).lowerVec()[0] != (expected0) || (found).lowerVec()[1] != (expected1) || (found).lowerVec()[2] != (expected2) || (found).lowerVec()[3] != (expected3) || (found).upperVec()[0] != (expected4) || (found).upperVec()[1] != (expected5) ||(found).upperVec()[2] != (expected6) || (found).upperVec()[3] != (expected7)) {std::stringstream details; details << " Expected ("<<(expected0)<<","<<(expected1)<<","<<(expected2)<<","<<(expected3)<<","<<(expected4)<<","<<(expected5)<<","<<(expected6)<<","<<(expected7)<<"), found ("<<(found).lowerVec()[0]<<","<<(found).lowerVec()[1]<<","<<(found).lowerVec()[2]<<","<<(found).lowerVec()[3]<<","<<(found).upperVec()[0]<<","<<(found).upperVec()[1]<<","<<(found).upperVec()[2]<<","<<(found).upperVec()[3]<<")"; throwException(__FILE__, __LINE__, details.str());}};
void testLoadStore() {
fvec8 f1(2.0);
ivec8 i1(3);
ASSERT_VEC8_EQUAL(f1, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0);
ASSERT_VEC8_EQUAL_INT(i1, 3, 3, 3, 3, 3, 3, 3, 3);
fvec8 f2(2.5, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 6.0);
ivec8 i2(2, 3, 4, 5, 6, 7, 8, 9);
ASSERT_VEC8_EQUAL(f2, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 6.0);
ASSERT_VEC8_EQUAL_INT(i2, 2, 3, 4, 5, 6, 7, 8, 9);
float farray[8];
int iarray[8];
f2.store(farray);
i2.store(iarray);
fvec8 f3(farray);
ivec8 i3(iarray);
ASSERT_VEC8_EQUAL(f3, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 6.0);
ASSERT_VEC8_EQUAL_INT(i3, 2, 3, 4, 5, 6, 7, 8, 9);
ASSERT_EQUAL(f3.lowerVec()[0], 2.5);
ASSERT_EQUAL(f3.lowerVec()[1], 3.0);
ASSERT_EQUAL(f3.lowerVec()[2], 3.5);
ASSERT_EQUAL(f3.lowerVec()[3], 4.0);
ASSERT_EQUAL(f3.upperVec()[0], 4.5);
ASSERT_EQUAL(f3.upperVec()[1], 5.0);
ASSERT_EQUAL(f3.upperVec()[2], 5.5);
ASSERT_EQUAL(f3.upperVec()[3], 6.0);
ASSERT_EQUAL(i3.lowerVec()[0], 2);
ASSERT_EQUAL(i3.lowerVec()[1], 3);
ASSERT_EQUAL(i3.lowerVec()[2], 4);
ASSERT_EQUAL(i3.lowerVec()[3], 5);
ASSERT_EQUAL(i3.upperVec()[0], 6);
ASSERT_EQUAL(i3.upperVec()[1], 7);
ASSERT_EQUAL(i3.upperVec()[2], 8);
ASSERT_EQUAL(i3.upperVec()[3], 9);
}
void testArithmetic() {
fvec8 f1(0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0);
ASSERT_VEC8_EQUAL(f1+fvec8(1, 2, 3, 4, 5, 6, 7, 8), 1.5, 3. , 4.5, 6. , 7.5, 9. , 10.5, 12.);
ASSERT_VEC8_EQUAL(f1-fvec8(1, 2, 3, 4, 5, 6, 7, 8), -0.5, -1. , -1.5, -2. , -2.5, -3. , -3.5, -4.);
ASSERT_VEC8_EQUAL(f1*fvec8(1, 2, 3, 4, 5, 6, 7, 8), 0.5, 2. , 4.5, 8. , 12.5, 18. , 24.5, 32.);
ASSERT_VEC8_EQUAL(f1/fvec8(1, 2, 3, 4, 5, 6, 7, 8), 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5);
f1 = fvec8(0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0);
f1 += fvec8(1, 2, 3, 4, 5, 6, 7, 8);
ASSERT_VEC8_EQUAL(f1, 1.5, 3. , 4.5, 6. , 7.5, 9. , 10.5, 12.);
f1 = fvec8(0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0);
f1 -= fvec8(1, 2, 3, 4, 5, 6, 7, 8);
ASSERT_VEC8_EQUAL(f1, -0.5, -1. , -1.5, -2. , -2.5, -3. , -3.5, -4.);
f1 = fvec8(0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0);
f1 *= fvec8(1, 2, 3, 4, 5, 6, 7, 8);
ASSERT_VEC8_EQUAL(f1, 0.5, 2. , 4.5, 8. , 12.5, 18. , 24.5, 32.);
f1 = fvec8(0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0);
f1 /= fvec8(1, 2, 3, 4, 5, 6, 7, 8);
ASSERT_VEC8_EQUAL(f1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5);
}
void testLogic() {
int allBits = -1;
float allBitsf = *((float*) &allBits);
ivec8 mask(0, allBits, allBits, 0, 0, allBits, allBits, 0);
fvec8 fmask(0, allBitsf, allBitsf, 0, 0, allBitsf, allBitsf, 0);
fvec8 f1(0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0);
ivec8 i1(1, 2, 3, 4, 5, 6, 7, 8);
ASSERT_VEC8_EQUAL(f1&fmask, 0, 1.0, 1.5, 0, 0, 3.0, 3.5, 0.0);
fvec8 temp = f1|fmask;
ASSERT_EQUAL(0.5, temp.lowerVec()[0]);
ASSERT(temp.lowerVec()[1]!= temp.lowerVec()[1]); // All bits set, which is nan
ASSERT(temp.lowerVec()[2] != temp.lowerVec()[2]); // All bits set, which is nan
ASSERT_EQUAL(2.0, temp.lowerVec()[3]);
ASSERT_EQUAL(2.5, temp.upperVec()[0]);
ASSERT(temp.upperVec()[1] != temp.upperVec()[1]); // All bits set, which is nan
ASSERT(temp.upperVec()[2] != temp.upperVec()[2]); // All bits set, which is nan
ASSERT_EQUAL(4.0, temp.upperVec()[3]);
ASSERT_VEC8_EQUAL_INT(i1&mask, 0, 2, 3, 0, 0, 6, 7, 0);
ASSERT_VEC8_EQUAL_INT(i1|mask, 1, allBits, allBits, 4, 5, allBits, allBits, 8);
}
void testComparisons() {
fvec8 v1(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0);
fvec8 v2(1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5);
ASSERT_VEC8_EQUAL(blend(v1, v2,
fvec8(1.0, 1.5, 3.0, 2.2, 10.0, 10.5, 13.0, 12.2)==fvec8(1.1, 1.5, 3.0, 2.1, 10.1, 10.5, 13.0, 12.1)),
0.0, 1.5, 1.5, 0.0, 0.0, 1.5, 1.5, 0.0);
ASSERT_VEC8_EQUAL(blend(v1, v2,
fvec8(1.0, 1.5, 3.0, 2.2, 10.0, 10.5, 13.0, 12.2)!=fvec8(1.1, 1.5, 3.0, 2.1, 10.1, 10.5, 13.0, 12.1)),
1.5, 0.0, 0.0, 1.5, 1.5, 0.0, 0.0, 1.5);
ASSERT_VEC8_EQUAL(blend(v1, v2,
fvec8(1.0, 1.5, 3.0, 2.2, 10.0, 10.5, 13.0, 12.2)<fvec8(1.1, 1.5, 3.0, 2.1, 10.1, 10.5, 13.0, 12.1)),
1.5, 0.0, 0.0, 0.0, 1.5, 0.0, 0.0, 0.0);
ASSERT_VEC8_EQUAL(blend(v1, v2,
fvec8(1.0, 1.5, 3.0, 2.2, 10.0, 10.5, 13.0, 12.2)>fvec8(1.1, 1.5, 3.0, 2.1, 10.1, 10.5, 13.0, 12.1)),
0.0, 0.0, 0.0, 1.5, 0.0, 0.0, 0.0, 1.5);
ASSERT_VEC8_EQUAL(blend(v1, v2,
fvec8(1.0, 1.5, 3.0, 2.2, 10.0, 10.5, 13.0, 12.2)<=fvec8(1.1, 1.5, 3.0, 2.1, 10.1, 10.5, 13.0, 12.1)),
1.5, 1.5, 1.5, 0.0, 1.5, 1.5, 1.5, 0.0);
ASSERT_VEC8_EQUAL(blend(v1, v2,
fvec8(1.0, 1.5, 3.0, 2.2, 10.0, 10.5, 13.0, 12.2)>=fvec8(1.1, 1.5, 3.0, 2.1, 10.1, 10.5, 13.0, 12.1)),
0.0, 1.5, 1.5, 1.5, 0.0, 1.5, 1.5, 1.5);
}
void testMathFunctions() {
fvec8 f1(0.4, 1.9, -1.2, -3.8, 0.4, 1.9, -1.2, -3.8);
fvec8 f2(1.1, 1.2, 1.3, -5.0, 1.1, 1.2, 1.3, -5.0);
ASSERT_VEC8_EQUAL(floor(f1), 0.0, 1.0, -2.0, -4.0, 0.0, 1.0, -2.0, -4.0);
ASSERT_VEC8_EQUAL(ceil(f1), 1.0, 2.0, -1.0, -3.0, 1.0, 2.0, -1.0, -3.0);
ASSERT_VEC8_EQUAL(round(f1), 0.0, 2.0, -1.0, -4.0, 0.0, 2.0, -1.0, -4.0);
ASSERT_VEC8_EQUAL(abs(f1), 0.4, 1.9, 1.2, 3.8, 0.4, 1.9, 1.2, 3.8);
ASSERT_VEC8_EQUAL(min(f1, f2), 0.4, 1.2, -1.2, -5.0, 0.4, 1.2, -1.2, -5.0);
ASSERT_VEC8_EQUAL(max(f1, f2), 1.1, 1.9, 1.3, -3.8, 1.1, 1.9, 1.3, -3.8);
ASSERT_VEC8_EQUAL(sqrt(fvec8(1.5, 3.1, 4.0, 15.0, 1.5, 3.1, 4.0, 15.0)), sqrt(1.5), sqrt(3.1), sqrt(4.0), sqrt(15.0), sqrt(1.5), sqrt(3.1), sqrt(4.0), sqrt(15.0));
ASSERT_VEC8_EQUAL(rsqrt(fvec8(1.5, 3.1, 4.0, 15.0, 1.5, 3.1, 4.0, 15.0)), 1.0/sqrt(1.5), 1.0/sqrt(3.1), 1.0/sqrt(4.0), 1.0/sqrt(15.0), 1.0/sqrt(1.5), 1.0/sqrt(3.1), 1.0/sqrt(4.0), 1.0/sqrt(15.0));
ASSERT_EQUAL_TOL(f1.lowerVec()[0]*f2.lowerVec()[0]+f1.lowerVec()[1]*f2.lowerVec()[1]+f1.lowerVec()[2]*f2.lowerVec()[2]+f1.lowerVec()[3]*f2.lowerVec()[3]+f1.upperVec()[0]*f2.upperVec()[0]+f1.upperVec()[1]*f2.upperVec()[1]+f1.upperVec()[2]*f2.upperVec()[2]+f1.upperVec()[3]*f2.upperVec()[3], dot8(f1, f2), 1e-6);
ASSERT(any(f1 > 0.5));
ASSERT(!any(f1 > 2.0));
ASSERT_VEC8_EQUAL(blend(f1, f2, ivec8(-1, 0, -1, 0, -1, 0, -1, 0)), 1.1, 1.9, 1.3, -3.8, 1.1, 1.9, 1.3, -3.8);
}
void testTranspose() {
fvec4 f1(0.0, 1.0, 2.0, 3.0);
fvec4 f2(10.0, 11.0, 12.0, 13.0);
fvec4 f3(20.0, 21.0, 22.0, 23.0);
fvec4 f4(30.0, 31.0, 32.0, 33.0);
fvec4 f5(40.0, 41.0, 42.0, 43.0);
fvec4 f6(50.0, 51.0, 52.0, 53.0);
fvec4 f7(60.0, 61.0, 62.0, 63.0);
fvec4 f8(70.0, 71.0, 72.0, 73.0);
fvec8 o1, o2, o3, o4;
transpose(f1, f2, f3, f4, f5, f6, f7, f8, o1, o2, o3, o4);
ASSERT_VEC8_EQUAL(o1, 0.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0);
ASSERT_VEC8_EQUAL(o2, 1.0, 11.0, 21.0, 31.0, 41.0, 51.0, 61.0, 71.0);
ASSERT_VEC8_EQUAL(o3, 2.0, 12.0, 22.0, 32.0, 42.0, 52.0, 62.0, 72.0);
ASSERT_VEC8_EQUAL(o4, 3.0, 13.0, 23.0, 33.0, 43.0, 53.0, 63.0, 73.0);
fvec4 g1, g2, g3, g4, g5, g6, g7, g8;
transpose(o1, o2, o3, o4, g1, g2, g3, g4, g5, g6, g7, g8);
ASSERT_VEC4_EQUAL(g1, 0.0, 1.0, 2.0, 3.0);
ASSERT_VEC4_EQUAL(g2, 10.0, 11.0, 12.0, 13.0);
ASSERT_VEC4_EQUAL(g3, 20.0, 21.0, 22.0, 23.0);
ASSERT_VEC4_EQUAL(g4, 30.0, 31.0, 32.0, 33.0);
ASSERT_VEC4_EQUAL(g5, 40.0, 41.0, 42.0, 43.0);
ASSERT_VEC4_EQUAL(g6, 50.0, 51.0, 52.0, 53.0);
ASSERT_VEC4_EQUAL(g7, 60.0, 61.0, 62.0, 63.0);
ASSERT_VEC4_EQUAL(g8, 70.0, 71.0, 72.0, 73.0);
}
int main(int argc, char* argv[]) {
try {
if (!isVec8Supported()) {
cout << "CPU is not supported. Exiting." << endl;
return 0;
}
testLoadStore();
testArithmetic();
testLogic();
testComparisons();
testMathFunctions();
testTranspose();
}
catch(const exception& e) {
cout << "exception: " << e.what() << endl;
return 1;
}
cout << "Done" << endl;
return 0;
}
__author__ = "Peter Eastman" """
__version__ = "1.0" setup.py: Used for building python wrappers for Simbios' OpenMM library.
"""
import ast import ast
import re import re
import os import os
...@@ -13,6 +13,8 @@ MINOR_VERSION_NUM='@OPENMM_MINOR_VERSION@' ...@@ -13,6 +13,8 @@ MINOR_VERSION_NUM='@OPENMM_MINOR_VERSION@'
BUILD_INFO='@OPENMM_BUILD_VERSION@' BUILD_INFO='@OPENMM_BUILD_VERSION@'
IS_RELEASED = False IS_RELEASED = False
__author__ = "Peter Eastman"
__version__ = "%s.%s" % (MAJOR_VERSION_NUM, MINOR_VERSION_NUM)
def reportError(message): def reportError(message):
sys.stdout.write("ERROR: ") sys.stdout.write("ERROR: ")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment