Commit 208d5240 authored by ChayaSt's avatar ChayaSt
Browse files

Merge branch 'master' of https://github.com/pandegroup/openmm into nbfix

parents 79e76a4e 20af24c4
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2015 Stanford University and the Authors. *
* Portions copyright (c) 2008-2016 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -170,6 +170,7 @@ public:
*/
void loadCheckpoint(ContextImpl& context, std::istream& stream);
private:
class GetPositionsTask;
OpenCLContext& cl;
};
......
......@@ -29,6 +29,7 @@
#include "openmm/Platform.h"
#include "openmm/System.h"
#include "openmm/internal/ThreadPool.h"
#include "windowsExportOpenCL.h"
namespace OpenMM {
......@@ -107,7 +108,7 @@ public:
class OPENMM_EXPORT_OPENCL OpenCLPlatform::PlatformData {
public:
PlatformData(const System& system, const std::string& platformPropValue, const std::string& deviceIndexProperty, const std::string& precisionProperty,
const std::string& cpuPmeProperty, const std::string& pmeStreamProperty);
const std::string& cpuPmeProperty, const std::string& pmeStreamProperty, int numThreads);
~PlatformData();
void initializeContexts(const System& system);
void syncContexts();
......@@ -119,6 +120,7 @@ public:
int stepCount, computeForceCount;
double time;
std::map<std::string, std::string> propertyValues;
ThreadPool threads;
};
} // namespace OpenMM
......
......@@ -343,7 +343,7 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
break;
}
case Operation::POWER:
out << "pow(" << getTempName(node.getChildren()[0], temps) << ", " << getTempName(node.getChildren()[1], temps) << ")";
out << "pow((" << tempType << ") " << getTempName(node.getChildren()[0], temps) << ", (" << tempType << ") " << getTempName(node.getChildren()[1], temps) << ")";
break;
case Operation::NEGATE:
out << "-" << getTempName(node.getChildren()[0], temps);
......@@ -480,14 +480,14 @@ void OpenCLExpressionUtilities::processExpression(stringstream& out, const Expre
out << "}";
}
else
out << "pow(" << getTempName(node.getChildren()[0], temps) << ", " << context.doubleToString(exponent) << ")";
out << "pow((" << tempType << ") " << getTempName(node.getChildren()[0], temps) << ", (" << tempType << ") " << context.doubleToString(exponent) << ")";
break;
}
case Operation::MIN:
out << "min(" << getTempName(node.getChildren()[0], temps) << ", " << getTempName(node.getChildren()[1], temps) << ")";
out << "min((" << tempType << ") " << getTempName(node.getChildren()[0], temps) << ", (" << tempType << ") " << getTempName(node.getChildren()[1], temps) << ")";
break;
case Operation::MAX:
out << "max(" << getTempName(node.getChildren()[0], temps) << ", " << getTempName(node.getChildren()[1], temps) << ")";
out << "max((" << tempType << ") " << getTempName(node.getChildren()[0], temps) << ", (" << tempType << ") " << getTempName(node.getChildren()[1], temps) << ")";
break;
case Operation::ABS:
out << "fabs(" << getTempName(node.getChildren()[0], temps) << ")";
......
......@@ -165,42 +165,75 @@ void OpenCLUpdateStateDataKernel::setTime(ContextImpl& context, double time) {
contexts[i]->setTime(time);
}
class OpenCLUpdateStateDataKernel::GetPositionsTask : public ThreadPool::Task {
public:
GetPositionsTask(OpenCLContext& cl, vector<Vec3>& positions, vector<mm_float4>& posCorrection) : cl(cl), positions(positions), posCorrection(posCorrection) {
}
void execute(ThreadPool& threads, int threadIndex) {
// Compute the position of each particle to return to the user. This is done in parallel for speed.
const vector<int>& order = cl.getAtomIndex();
int numParticles = cl.getNumAtoms();
Vec3 boxVectors[3];
cl.getPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
int numThreads = threads.getNumThreads();
int start = threadIndex*numParticles/numThreads;
int end = (threadIndex+1)*numParticles/numThreads;
if (cl.getUseDoublePrecision()) {
mm_double4* posq = (mm_double4*) cl.getPinnedBuffer();
for (int i = start; i < end; ++i) {
mm_double4 pos = posq[i];
mm_int4 offset = cl.getPosCellOffsets()[i];
positions[order[i]] = Vec3(pos.x, pos.y, pos.z)-boxVectors[0]*offset.x-boxVectors[1]*offset.y-boxVectors[2]*offset.z;
}
}
else if (cl.getUseMixedPrecision()) {
mm_float4* posq = (mm_float4*) cl.getPinnedBuffer();
for (int i = start; i < end; ++i) {
mm_float4 pos1 = posq[i];
mm_float4 pos2 = posCorrection[i];
mm_int4 offset = cl.getPosCellOffsets()[i];
positions[order[i]] = Vec3((double)pos1.x+(double)pos2.x, (double)pos1.y+(double)pos2.y, (double)pos1.z+(double)pos2.z)-boxVectors[0]*offset.x-boxVectors[1]*offset.y-boxVectors[2]*offset.z;
}
}
else {
mm_float4* posq = (mm_float4*) cl.getPinnedBuffer();
for (int i = start; i < end; ++i) {
mm_float4 pos = posq[i];
mm_int4 offset = cl.getPosCellOffsets()[i];
positions[order[i]] = Vec3(pos.x, pos.y, pos.z)-boxVectors[0]*offset.x-boxVectors[1]*offset.y-boxVectors[2]*offset.z;
}
}
}
OpenCLContext& cl;
vector<Vec3>& positions;
vector<mm_float4>& posCorrection;
};
void OpenCLUpdateStateDataKernel::getPositions(ContextImpl& context, vector<Vec3>& positions) {
const vector<cl_int>& order = cl.getAtomIndex();
int numParticles = context.getSystem().getNumParticles();
positions.resize(numParticles);
Vec3 boxVectors[3];
cl.getPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
vector<mm_float4> posCorrection;
if (cl.getUseDoublePrecision()) {
mm_double4* posq = (mm_double4*) cl.getPinnedBuffer();
cl.getPosq().download(posq);
for (int i = 0; i < numParticles; ++i) {
mm_double4 pos = posq[i];
mm_int4 offset = cl.getPosCellOffsets()[i];
positions[order[i]] = Vec3(pos.x, pos.y, pos.z)-boxVectors[0]*offset.x-boxVectors[1]*offset.y-boxVectors[2]*offset.z;
}
}
else if (cl.getUseMixedPrecision()) {
mm_float4* posq = (mm_float4*) cl.getPinnedBuffer();
vector<mm_float4> posCorrection;
cl.getPosq().download(posq);
cl.getPosq().download(posq, false);
posCorrection.resize(numParticles);
cl.getPosqCorrection().download(posCorrection);
for (int i = 0; i < numParticles; ++i) {
mm_float4 pos1 = posq[i];
mm_float4 pos2 = posCorrection[i];
mm_int4 offset = cl.getPosCellOffsets()[i];
positions[order[i]] = Vec3((double)pos1.x+(double)pos2.x, (double)pos1.y+(double)pos2.y, (double)pos1.z+(double)pos2.z)-boxVectors[0]*offset.x-boxVectors[1]*offset.y-boxVectors[2]*offset.z;
}
}
else {
mm_float4* posq = (mm_float4*) cl.getPinnedBuffer();
cl.getPosq().download(posq);
for (int i = 0; i < numParticles; ++i) {
mm_float4 pos = posq[i];
mm_int4 offset = cl.getPosCellOffsets()[i];
positions[order[i]] = Vec3(pos.x, pos.y, pos.z)-boxVectors[0]*offset.x-boxVectors[1]*offset.y-boxVectors[2]*offset.z;
}
}
// Filling in the output array is done in parallel for speed.
GetPositionsTask task(cl, positions, posCorrection);
cl.getPlatformData().threads.execute(task);
cl.getPlatformData().threads.waitForThreads();
}
void OpenCLUpdateStateDataKernel::setPositions(ContextImpl& context, const vector<Vec3>& positions) {
......@@ -6906,12 +6939,12 @@ void OpenCLIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegr
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) {
double value;
summedValue->download(&value);
globalValuesDouble[stepTarget[step].variableIndex] = value;
recordGlobalValue(value, stepTarget[step]);
}
else {
float value;
summedValue->download(&value);
globalValuesDouble[stepTarget[step].variableIndex] = value;
recordGlobalValue(value, stepTarget[step]);
}
}
else if (stepType[step] == CustomIntegrator::UpdateContextState) {
......@@ -7020,6 +7053,7 @@ void OpenCLIntegrateCustomStepKernel::recordGlobalValue(double value, GlobalTarg
case DT:
if (value != globalValuesDouble[dtVariableIndex])
deviceGlobalsAreCurrent = false;
expressionSet.setVariable(dtVariableIndex, value);
globalValuesDouble[dtVariableIndex] = value;
cl.getIntegrationUtilities().setNextStepSize(value);
break;
......
......@@ -28,9 +28,10 @@
#include "OpenCLPlatform.h"
#include "OpenCLKernelFactory.h"
#include "OpenCLKernels.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/Context.h"
#include "openmm/System.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/internal/hardware.h"
#include <algorithm>
#include <cctype>
#include <sstream>
......@@ -165,7 +166,11 @@ void OpenCLPlatform::contextCreated(ContextImpl& context, const map<string, stri
pmeKernelName.push_back(CalcPmeReciprocalForceKernel::Name());
if (!supportsKernels(pmeKernelName))
cpuPmePropValue = "false";
context.setPlatformData(new PlatformData(context.getSystem(), platformPropValue, devicePropValue, precisionPropValue, cpuPmePropValue, pmeStreamPropValue));
int threads = getNumProcessors();
char* threadsEnv = getenv("OPENMM_CPU_THREADS");
if (threadsEnv != NULL)
stringstream(threadsEnv) >> threads;
context.setPlatformData(new PlatformData(context.getSystem(), platformPropValue, devicePropValue, precisionPropValue, cpuPmePropValue, pmeStreamPropValue, threads));
}
void OpenCLPlatform::contextDestroyed(ContextImpl& context) const {
......@@ -174,7 +179,8 @@ void OpenCLPlatform::contextDestroyed(ContextImpl& context) const {
}
OpenCLPlatform::PlatformData::PlatformData(const System& system, const string& platformPropValue, const string& deviceIndexProperty,
const string& precisionProperty, const string& cpuPmeProperty, const string& pmeStreamProperty) : removeCM(false), stepCount(0), computeForceCount(0), time(0.0), hasInitializedContexts(false) {
const string& precisionProperty, const string& cpuPmeProperty, const string& pmeStreamProperty, int numThreads) :
removeCM(false), stepCount(0), computeForceCount(0), time(0.0), hasInitializedContexts(false), threads(numThreads) {
int platformIndex = -1;
if (platformPropValue.length() > 0)
stringstream(platformPropValue) >> platformIndex;
......
......@@ -55,7 +55,7 @@ inline real4 computeCross(real4 vec1, real4 vec2) {
/**
* Determine whether a particular interaction is in the list of exclusions.
*/
inline bool isInteractionExcluded(int atom1, int atom2, __global int* restrict exclusions, __global int* restrict exclusionStartIndex) {
inline bool isInteractionExcluded(int atom1, int atom2, __global const int* restrict exclusions, __global const int* restrict exclusionStartIndex) {
int first = exclusionStartIndex[atom1];
int last = exclusionStartIndex[atom1+1];
for (int i = last-1; i >= first; i--) {
......@@ -174,7 +174,7 @@ __kernel void findNeighbors(real4 periodicBoxSize, real4 invPeriodicBoxSize, rea
__global const real4* restrict posq, __global const real4* restrict blockCenter, __global const real4* restrict blockBoundingBox, __global int2* restrict neighborPairs,
__global int* restrict numNeighborPairs, __global int* restrict numNeighborsForAtom, int maxNeighborPairs
#ifdef USE_EXCLUSIONS
, __global int* restrict exclusions, __global int* restrict exclusionStartIndex
, __global const int* restrict exclusions, __global const int* restrict exclusionStartIndex
#endif
) {
__local real4 positionCache[FIND_NEIGHBORS_WORKGROUP_SIZE];
......@@ -264,7 +264,9 @@ __kernel void findNeighbors(real4 periodicBoxSize, real4 invPeriodicBoxSize, rea
}
}
}
numNeighborsForAtom[atom1] = totalNeighborsForAtom1;
if (atom1 < NUM_ATOMS)
numNeighborsForAtom[atom1] = totalNeighborsForAtom1;
SYNC_WARPS;
}
}
......@@ -307,6 +309,7 @@ __kernel void computeNeighborStartIndices(__global int* restrict numNeighborsFor
numNeighborsForAtom[globalIndex] = 0; // Clear this so the next kernel can use it as a counter
}
globalOffset += posBuffer[get_local_size(0)-1];
barrier(CLK_LOCAL_MEM_FENCE);
}
if (get_local_id(0) == 0)
neighborStartIndex[0] = 0;
......
......@@ -54,7 +54,7 @@ template <class Real2>
void testTransform(bool realToComplex, int xsize, int ysize, int zsize) {
System system;
system.addParticle(0.0);
OpenCLPlatform::PlatformData platformData(system, "", "", platform.getPropertyDefaultValue("OpenCLPrecision"), "false", "false");
OpenCLPlatform::PlatformData platformData(system, "", "", platform.getPropertyDefaultValue("OpenCLPrecision"), "false", "false", 1);
OpenCLContext& context = *platformData.contexts[0];
context.initialize();
OpenMM_SFMT::SFMT sfmt;
......
......@@ -54,7 +54,7 @@ void testGaussian() {
System system;
for (int i = 0; i < numAtoms; i++)
system.addParticle(1.0);
OpenCLPlatform::PlatformData platformData(system, "", "", platform.getPropertyDefaultValue("OpenCLPrecision"), "false", "false");
OpenCLPlatform::PlatformData platformData(system, "", "", platform.getPropertyDefaultValue("OpenCLPrecision"), "false", "false", 1);
OpenCLContext& context = *platformData.contexts[0];
context.initialize();
context.getIntegrationUtilities().initRandomNumberGenerator(0);
......
......@@ -64,7 +64,7 @@ void verifySorting(vector<float> array) {
System system;
system.addParticle(0.0);
OpenCLPlatform::PlatformData platformData(system, "", "", platform.getPropertyDefaultValue("OpenCLPrecision"), "false", "false");
OpenCLPlatform::PlatformData platformData(system, "", "", platform.getPropertyDefaultValue("OpenCLPrecision"), "false", "false", 1);
OpenCLContext& context = *platformData.contexts[0];
context.initialize();
OpenCLArray data(context, array.size(), sizeof(float), "sortData");
......
......@@ -32,7 +32,7 @@ if (k1 != 0) {
// Compute the second anisotropic force.
if (k2 != 0) {
real3 dir = make_real3(pos3.x-pos4.x, pos3.y-pos4.y, pos3.z-pos4.z);
real3 dir = make_real3(pos4.x-pos5.x, pos4.y-pos5.y, pos4.z-pos5.z);
real invDist = RSQRT(dot(dir, dir));
dir *= invDist;
real rprime = dot(dir, delta);
......
......@@ -32,7 +32,7 @@ if (k1 != 0) {
// Compute the second anisotropic force.
if (k2 != 0) {
real4 dir = (real4) (pos3.xyz-pos4.xyz, 0);
real4 dir = (real4) (pos4.xyz-pos5.xyz, 0);
real invDist = RSQRT(dot(dir, dir));
dir *= invDist;
real rprime = dot(dir, delta);
......
......@@ -391,18 +391,21 @@ void testSum() {
}
CustomIntegrator integrator(0.005);
integrator.addGlobalVariable("ke", 0);
integrator.addGlobalVariable("temp", 0);
integrator.addComputePerDof("v", "v+dt*f/m");
integrator.addComputePerDof("x", "x+dt*v");
integrator.addComputeSum("ke", "m*v*v/2");
integrator.addComputeGlobal("temp", "ke+dt");
Context context(system, integrator, platform);
context.setPositions(positions);
// See if the sum is being computed correctly.
for (int i = 0; i < 100; ++i) {
integrator.step(1);
State state = context.getState(State::Energy);
ASSERT_EQUAL_TOL(state.getKineticEnergy(), integrator.getGlobalVariable(0), 1e-5);
integrator.step(1);
ASSERT_EQUAL_TOL(integrator.getGlobalVariable(0)+integrator.getStepSize(), integrator.getGlobalVariable(1), 1e-5);
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment