Commit 1f0ec7b5 authored by Peter Eastman's avatar Peter Eastman
Browse files

Continuing to implement new CUDA platform

parent 99cebd08
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "CudaBondedUtilities.h"
#include "CudaExpressionUtilities.h"
#include "openmm/OpenMMException.h"
#include "CudaNonbondedUtilities.h"
#include <iostream>
using namespace OpenMM;
using namespace std;
CudaBondedUtilities::CudaBondedUtilities(CudaContext& context) : context(context), numForceBuffers(0), maxBonds(0), hasInitializedKernels(false) {
}
CudaBondedUtilities::~CudaBondedUtilities() {
for (int i = 0; i < (int) atomIndices.size(); i++)
for (int j = 0; j < (int) atomIndices[i].size(); j++)
delete atomIndices[i][j];
}
void CudaBondedUtilities::addInteraction(const vector<vector<int> >& atoms, const string& source, int group) {
if (atoms.size() > 0) {
forceAtoms.push_back(atoms);
forceSource.push_back(source);
forceGroup.push_back(group);
}
}
std::string CudaBondedUtilities::addArgument(CUdeviceptr data, const string& type) {
arguments.push_back(data);
argTypes.push_back(type);
return "customArg"+context.intToString(arguments.size());
}
void CudaBondedUtilities::addPrefixCode(const string& source) {
prefixCode.push_back(source);
}
void CudaBondedUtilities::initialize(const System& system) {
int numForces = forceAtoms.size();
if (numForces == 0)
return;
// Build the lists of atom indices.
atomIndices.resize(numForces);
for (int i = 0; i < numForces; i++) {
int numBonds = forceAtoms[i].size();
int numAtoms = forceAtoms[i][0].size();
int startAtom = 0;
while (startAtom < numAtoms) {
int width = max(numAtoms-startAtom, 4);
if (width == 3)
width = 4;
vector<unsigned int> indexVec(width*numBonds);
for (int bond = 0; bond < numBonds; bond++) {
for (int atom = 0; atom < width; atom++)
indexVec[bond*width+atom] = forceAtoms[i][bond][startAtom+atom];
}
CudaArray* indices = CudaArray::create<unsigned int>(indexVec.size(), "bondedIndices");
indices->upload(indexVec);
atomIndices[i].push_back(indices);
startAtom += width;
}
}
// Create the kernel.
stringstream s;
for (int i = 0; i < (int) prefixCode.size(); i++)
s<<prefixCode[i];
s<<"extern \"C\" __global__ void computeBondedForces(long* __restrict__ forceBuffer, real* __restrict__ energyBuffer, const real4* __restrict__ posq, int groups";
for (int force = 0; force < numForces; force++) {
for (int i = 0; i < (int) atomIndices[force].size(); i++) {
int indexWidth = atomIndices[force][i]->getElementSize()/4;
string indexType = "unsigned int"+(indexWidth == 1 ? "" : context.intToString(indexWidth));
s<<", const "<<indexType<<"* __restrict__ atomIndices"<<force<<"_"<<i;
}
}
for (int i = 0; i < (int) arguments.size(); i++)
s<<", "<<argTypes[i]<<"* customArg"<<(i+1);
s<<") {\n";
s<<"real energy = 0;\n";
for (int force = 0; force < numForces; force++)
s<<createForceSource(force, forceAtoms[force].size(), forceAtoms[force][0].size(), forceGroup[force], forceSource[force]);
s<<"energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;\n";
s<<"}\n";
map<string, string> defines;
defines["PADDED_NUM_ATOMS"] = context.intToString(context.getPaddedNumAtoms());
CUmodule module = context.createModule(s.str(), defines);
kernel = context.getKernel(module, "computeBondedForces");
forceAtoms.clear();
forceSource.clear();
}
string CudaBondedUtilities::createForceSource(int forceIndex, int numBonds, int numAtoms, int group, const string& computeForce) {
maxBonds = max(maxBonds, numBonds);
string suffix1[] = {""};
string suffix4[] = {".x", ".y", ".z", ".w"};
string* suffix;
stringstream s;
s<<"if ((groups&"<<(1<<group)<<") != 0)\n";
s<<"for (unsigned int index = blockIdx.x*blockDim.x+threadIdx.x; index < "<<numBonds<<"; index += blockDim.x*gridDim.x) {\n";
int startAtom = 0;
for (int i = 0; i < (int) atomIndices[forceIndex].size(); i++) {
int indexWidth = atomIndices[forceIndex][i]->getElementSize()/4;
suffix = (indexWidth == 1 ? suffix1 : suffix4);
string indexType = "unsigned int"+(indexWidth == 1 ? "" : context.intToString(indexWidth));
s<<" "<<indexType<<" atoms"<<i<<" = atomIndices"<<forceIndex<<"_"<<i<<"[index];\n";
s<<" "<<indexType<<" buffers = bufferIndices"<<forceIndex<<"[index];\n";
for (int j = 0; j < indexWidth; j++) {
s<<" unsigned int atom"<<(startAtom+j+1)<<" = atoms"<<i<<suffix[j]<<";\n";
s<<" real4 pos"<<(j+1)<<" = posq[atom"<<(j+1)<<"];\n";
}
startAtom += indexWidth;
}
s<<computeForce<<"\n";
for (int i = 0; i < numAtoms; i++) {
s<<" atomicAdd(&forceBuffer[atom"<<(i+1)<<"], (long) (force.x*0xFFFFFFFF));\n";
s<<" atomicAdd(&forceBuffer[atom"<<(i+1)<<"+PADDED_NUM_ATOMS], (long) (force.x*0xFFFFFFFF));\n";
s<<" atomicAdd(&forceBuffer[atom"<<(i+1)<<"+PADDED_NUM_ATOMS*2], (long) (force.x*0xFFFFFFFF));\n";
}
s<<"}\n";
return s.str();
}
void CudaBondedUtilities::computeInteractions(int groups) {
// if (!hasInitializedKernels) {
// hasInitializedKernels = true;
// for (int i = 0; i < (int) forceSets.size(); i++) {
// int index = 0;
// cl::Kernel& kernel = kernels[i];
// kernel.setArg<cl::Buffer>(index++, context.getForceBuffers().getDeviceBuffer());
// kernel.setArg<cl::Buffer>(index++, context.getEnergyBuffer().getDeviceBuffer());
// kernel.setArg<cl::Buffer>(index++, context.getPosq().getDeviceBuffer());
// index++;
// for (int j = 0; j < (int) forceSets[i].size(); j++) {
// kernel.setArg<cl::Buffer>(index++, atomIndices[forceSets[i][j]]->getDeviceBuffer());
// kernel.setArg<cl::Buffer>(index++, bufferIndices[forceSets[i][j]]->getDeviceBuffer());
// }
// for (int j = 0; j < (int) arguments.size(); j++)
// kernel.setArg<cl::Memory>(index++, *arguments[j]);
// }
// }
// for (int i = 0; i < (int) kernels.size(); i++) {
// kernels[i].setArg<cl_int>(3, groups);
// context.executeKernel(kernels[i], maxBonds);
// }
}
#ifndef OPENMM_CUDABONDEDUTILITIES_H_
#define OPENMM_CUDABONDEDUTILITIES_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "CudaArray.h"
#include "CudaContext.h"
#include "openmm/System.h"
#include <string>
#include <vector>
namespace OpenMM {
/**
* This class provides a generic mechanism for evaluating bonded interactions. You write only
* the source code needed to compute one interaction, and this class takes care of creating
* and executing a complete kernel that loops over bonds, evaluates each one, and accumulates
* the resulting forces and energies. This offers two advantages. First, it simplifies the
* task of writing a new Force. Second, it allows multiple forces to be evaluated by a single
* kernel, which reduces overhead and improves performance.
*
* A "bonded interaction" means an interaction that affects a small, fixed set of particles.
* The interaction energy may depend on the positions of only those particles, and the list of
* particles forming a "bond" may not change with time. Examples of bonded interactions
* include HarmonicBondForce, HarmonicAngleForce, and PeriodicTorsionForce.
*
* To create a bonded interaction, call addInteraction(). You pass to it a block of source
* code for evaluating the interaction. The inputs and outputs for that source code are as
* follows:
*
* <ol>
* <li>The index of the bond being evaluated will have been stored in the unsigned int variable "index".</li>
* <li>The indices of the atoms forming that bond will have been stored in the unsigned int variables "atom1",
* "atom2", ....</li>
* <li>The positions of those atoms will have been stored in the real4 variables "pos1", "pos2", ....</li>
* <li>A real variable called "energy" will exist. Your code should add the potential energy of the
* bond to that variable.</li>
* <li>Your code should define real4 variables called "force1", "force2", ... that contain the force to
* apply to each atom.</li>
* </ol>
*
* As a simple example, the following source code would be used to implement a pairwise interaction of
* the form E=r^2:
*
* <tt><pre>
* real4 delta = pos2-pos1;
* energy += delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
* real4 force1 = 2.0f*delta;
* real4 force2 = -2.0f*delta;
* </pre></tt>
*
* Interactions will often depend on parameters or other data. Call addArgument() to provide the data
* to this class. It will be passed to the interaction kernel as an argument, and you can refer to it
* from your interaction code.
*/
class OPENMM_EXPORT CudaBondedUtilities {
public:
CudaBondedUtilities(CudaContext& context);
~CudaBondedUtilities();
/**
* Add a bonded interaction.
*
* @param atoms this should have one entry for each bond, and that entry should contain the list
* of atoms involved in the bond. Every entry must have the same number of atoms.
* @param source the code to evaluate the interaction
* @param group the force group in which the interaction should be calculated
*/
void addInteraction(const std::vector<std::vector<int> >& atoms, const std::string& source, int group);
/**
* Add an argument that should be passed to the interaction kernel.
*
* @param data the device memory containing the data to pass
* @param type the data type contained in the memory (e.g. "float4")
* @return the name that will be used for the argument. Any code you pass to addInteraction() should
* refer to it by this name.
*/
std::string addArgument(CUdeviceptr data, const std::string& type);
/**
* Add some Cuda code that should be included in the program, before the start of the kernel.
* This can be used, for example, to define functions that will be called by the kernel.
*
* @param source the code to include
*/
void addPrefixCode(const std::string& source);
/**
* Initialize this object in preparation for a simulation.
*/
void initialize(const System& system);
/**
* Compute the bonded interactions.
*
* @param groups a set of bit flags for which force groups to include
*/
void computeInteractions(int groups);
private:
std::string createForceSource(int forceIndex, int numBonds, int numAtoms, int group, const std::string& computeForce);
CudaContext& context;
CUfunction kernel;
std::vector<std::vector<std::vector<int> > > forceAtoms;
std::vector<std::vector<int> > indexWidth;
std::vector<std::string> forceSource;
std::vector<int> forceGroup;
std::vector<CUdeviceptr> arguments;
std::vector<std::string> argTypes;
std::vector<std::vector<CudaArray*> > atomIndices;
std::vector<std::string> prefixCode;
int numForceBuffers, maxBonds;
bool hasInitializedKernels;
};
} // namespace OpenMM
#endif /*OPENMM_CUDABONDEDUTILITIES_H_*/
...@@ -32,7 +32,7 @@ ...@@ -32,7 +32,7 @@
#include "CudaArray.h" #include "CudaArray.h"
//#include "CudaBondedUtilities.h" //#include "CudaBondedUtilities.h"
#include "CudaForceInfo.h" #include "CudaForceInfo.h"
//#include "CudaIntegrationUtilities.h" #include "CudaIntegrationUtilities.h"
#include "CudaKernelSources.h" #include "CudaKernelSources.h"
//#include "CudaNonbondedUtilities.h" //#include "CudaNonbondedUtilities.h"
#include "hilbert.h" #include "hilbert.h"
...@@ -40,6 +40,7 @@ ...@@ -40,6 +40,7 @@
#include "openmm/Platform.h" #include "openmm/Platform.h"
#include "openmm/System.h" #include "openmm/System.h"
#include "openmm/VirtualSite.h" #include "openmm/VirtualSite.h"
#include "CudaExpressionUtilities.h"
#include <algorithm> #include <algorithm>
#include <cstdlib> #include <cstdlib>
#include <fstream> #include <fstream>
...@@ -66,8 +67,8 @@ bool CudaContext::hasInitializedCuda = false; ...@@ -66,8 +67,8 @@ bool CudaContext::hasInitializedCuda = false;
CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlockingSync, const string& precision, const string& compiler, CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlockingSync, const string& precision, const string& compiler,
const string& tempDir, CudaPlatform::PlatformData& platformData) : system(system), compiler(compiler), const string& tempDir, CudaPlatform::PlatformData& platformData) : system(system), compiler(compiler),
time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), contextIsValid(false), atomsWereReordered(false), pinnedBuffer(NULL), posq(NULL), time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), contextIsValid(false), atomsWereReordered(false), pinnedBuffer(NULL), posq(NULL),
velm(NULL), /*forceBuffers(NULL), longForceBuffer(NULL), energyBuffer(NULL), atomIndex(NULL), integration(NULL), velm(NULL), /*forceBuffers(NULL), longForceBuffer(NULL), energyBuffer(NULL), atomIndex(NULL),*/ integration(NULL), expression(NULL),
bonded(NULL), nonbonded(NULL),*/ thread(NULL) { /*bonded(NULL), nonbonded(NULL),*/ thread(NULL) {
if (!hasInitializedCuda) { if (!hasInitializedCuda) {
CHECK_RESULT2(cuInit(0), "Error initializing CUDA"); CHECK_RESULT2(cuInit(0), "Error initializing CUDA");
hasInitializedCuda = true; hasInitializedCuda = true;
...@@ -143,11 +144,17 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking ...@@ -143,11 +144,17 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
CHECK_RESULT(cuMemHostAlloc(&pinnedBuffer, paddedNumAtoms*sizeof(double4), 0)); CHECK_RESULT(cuMemHostAlloc(&pinnedBuffer, paddedNumAtoms*sizeof(double4), 0));
posq = CudaArray::create<double4>(paddedNumAtoms, "posq"); posq = CudaArray::create<double4>(paddedNumAtoms, "posq");
velm = CudaArray::create<double4>(paddedNumAtoms, "velm"); velm = CudaArray::create<double4>(paddedNumAtoms, "velm");
compilationDefines["make_real2"] = "make_double2";
compilationDefines["make_real3"] = "make_double3";
compilationDefines["make_real4"] = "make_double4";
} }
else { else {
CHECK_RESULT(cuMemHostAlloc(&pinnedBuffer, paddedNumAtoms*sizeof(float4), 0)); CHECK_RESULT(cuMemHostAlloc(&pinnedBuffer, paddedNumAtoms*sizeof(float4), 0));
posq = CudaArray::create<float4>(paddedNumAtoms, "posq"); posq = CudaArray::create<float4>(paddedNumAtoms, "posq");
velm = CudaArray::create<float4>(paddedNumAtoms, "velm"); velm = CudaArray::create<float4>(paddedNumAtoms, "velm");
compilationDefines["make_real2"] = "make_float2";
compilationDefines["make_real3"] = "make_float3";
compilationDefines["make_real4"] = "make_float4";
} }
posCellOffsets.resize(paddedNumAtoms, make_int4(0, 0, 0, 0)); posCellOffsets.resize(paddedNumAtoms, make_int4(0, 0, 0, 0));
...@@ -160,8 +167,6 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking ...@@ -160,8 +167,6 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
clearFourBuffersKernel = getKernel(utilities, "clearFourBuffers"); clearFourBuffersKernel = getKernel(utilities, "clearFourBuffers");
clearFiveBuffersKernel = getKernel(utilities, "clearFiveBuffers"); clearFiveBuffersKernel = getKernel(utilities, "clearFiveBuffers");
clearSixBuffersKernel = getKernel(utilities, "clearSixBuffers"); clearSixBuffersKernel = getKernel(utilities, "clearSixBuffers");
reduceFloat4Kernel = getKernel(utilities, "reduceFloat4Buffer");
reduceForcesKernel = getKernel(utilities, "reduceForces");
// Set defines based on the requested precision. // Set defines based on the requested precision.
...@@ -170,14 +175,21 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking ...@@ -170,14 +175,21 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
compilationDefines["RECIP"] = useDoublePrecision ? "1.0/" : "1.0f/"; compilationDefines["RECIP"] = useDoublePrecision ? "1.0/" : "1.0f/";
compilationDefines["EXP"] = useDoublePrecision ? "exp" : "expf"; compilationDefines["EXP"] = useDoublePrecision ? "exp" : "expf";
compilationDefines["LOG"] = useDoublePrecision ? "log" : "logf"; compilationDefines["LOG"] = useDoublePrecision ? "log" : "logf";
compilationDefines["COS"] = useDoublePrecision ? "cos" : "cosf";
compilationDefines["SIN"] = useDoublePrecision ? "sin" : "sinf";
compilationDefines["TAN"] = useDoublePrecision ? "tan" : "tanf";
compilationDefines["ACOS"] = useDoublePrecision ? "acos" : "acosf";
compilationDefines["ASIN"] = useDoublePrecision ? "asin" : "asinf";
compilationDefines["ATAN"] = useDoublePrecision ? "atan" : "atanf";
// Create the work thread used for parallelization when running on multiple devices. // Create the work thread used for parallelization when running on multiple devices.
thread = new WorkThread(); thread = new WorkThread();
//
// // Create the integration utilities object. // Create utilities objects.
//
// integration = new CudaIntegrationUtilities(*this, system); integration = new CudaIntegrationUtilities(*this, system);
expression = new CudaExpressionUtilities(*this);
} }
CudaContext::~CudaContext() { CudaContext::~CudaContext() {
...@@ -201,8 +213,10 @@ CudaContext::~CudaContext() { ...@@ -201,8 +213,10 @@ CudaContext::~CudaContext() {
// delete energyBuffer; // delete energyBuffer;
// if (atomIndex != NULL) // if (atomIndex != NULL)
// delete atomIndex; // delete atomIndex;
// if (integration != NULL) if (integration != NULL)
// delete integration; delete integration;
if (expression != NULL)
delete expression;
// if (bonded != NULL) // if (bonded != NULL)
// delete bonded; // delete bonded;
// if (nonbonded != NULL) // if (nonbonded != NULL)
...@@ -272,6 +286,18 @@ CUmodule CudaContext::createModule(const string source, const map<string, string ...@@ -272,6 +286,18 @@ CUmodule CudaContext::createModule(const string source, const map<string, string
} }
if (!compilationDefines.empty()) if (!compilationDefines.empty())
src << endl; src << endl;
if (useDoublePrecision) {
src << "typedef double real;\n";
src << "typedef double2 real2;\n";
src << "typedef double3 real3;\n";
src << "typedef double4 real4;\n";
}
else {
src << "typedef float real;\n";
src << "typedef float2 real2;\n";
src << "typedef float3 real3;\n";
src << "typedef float4 real4;\n";
}
for (map<string, string>::const_iterator iter = defines.begin(); iter != defines.end(); ++iter) { for (map<string, string>::const_iterator iter = defines.begin(); iter != defines.end(); ++iter) {
src << "#define " << iter->first; src << "#define " << iter->first;
if (!iter->second.empty()) if (!iter->second.empty())
...@@ -498,22 +524,7 @@ void CudaContext::addAutoclearBuffer(CUdeviceptr memory, int size) { ...@@ -498,22 +524,7 @@ void CudaContext::addAutoclearBuffer(CUdeviceptr memory, int size) {
// clearBuffer(*autoclearBuffers[base], autoclearBufferSizes[base]); // clearBuffer(*autoclearBuffers[base], autoclearBufferSizes[base]);
// } // }
//} //}
//
//void CudaContext::reduceForces() {
// if (supports64BitGlobalAtomics)
// executeKernel(reduceForcesKernel, paddedNumAtoms, 128);
// else
// reduceBuffer(*forceBuffers, numForceBuffers);
//}
//
//void CudaContext::reduceBuffer(CudaArray<mm_float4>& array, int numBuffers) {
// int bufferSize = array.getSize()/numBuffers;
// reduceFloat4Kernel.setArg<cl::Buffer>(0, array.getDeviceBuffer());
// reduceFloat4Kernel.setArg<cl_int>(1, bufferSize);
// reduceFloat4Kernel.setArg<cl_int>(2, numBuffers);
// executeKernel(reduceFloat4Kernel, bufferSize, 128);
//}
//
void CudaContext::tagAtomsInMolecule(int atom, int molecule, vector<int>& atomMolecule, vector<vector<int> >& atomBonds) { void CudaContext::tagAtomsInMolecule(int atom, int molecule, vector<int>& atomMolecule, vector<vector<int> >& atomBonds) {
// Recursively tag atoms as belonging to a particular molecule. // Recursively tag atoms as belonging to a particular molecule.
......
...@@ -46,6 +46,7 @@ namespace OpenMM { ...@@ -46,6 +46,7 @@ namespace OpenMM {
class CudaArray; class CudaArray;
class CudaForceInfo; class CudaForceInfo;
class CudaExpressionUtilities;
class CudaIntegrationUtilities; class CudaIntegrationUtilities;
class CudaBondedUtilities; class CudaBondedUtilities;
class CudaNonbondedUtilities; class CudaNonbondedUtilities;
...@@ -216,25 +217,13 @@ public: ...@@ -216,25 +217,13 @@ public:
* Register a buffer that should be automatically cleared (all elements set to 0) at the start of each force or energy computation. * Register a buffer that should be automatically cleared (all elements set to 0) at the start of each force or energy computation.
* *
* @param memory the memory to clear * @param memory the memory to clear
* @param size the number of float/double elements in the buffer * @param size the number of 4-byte elements in the buffer
*/ */
void addAutoclearBuffer(CUdeviceptr memory, int size); void addAutoclearBuffer(CUdeviceptr memory, int size);
// /** // /**
// * Clear all buffers that have been registered with addAutoclearBuffer(). // * Clear all buffers that have been registered with addAutoclearBuffer().
// */ // */
// void clearAutoclearBuffers(); // void clearAutoclearBuffers();
// /**
// * Given a collection of buffers packed into an array, sum them and store
// * the sum in the first buffer.
// *
// * @param array the array containing the buffers to reduce
// * @param numBuffers the number of buffers packed into the array
// */
// void reduceBuffer(CudaArray<mm_float4>& array, int numBuffers);
// /**
// * Sum the buffesr containing forces.
// */
// void reduceForces();
/** /**
* Get the current simulation time. * Get the current simulation time.
*/ */
...@@ -341,12 +330,18 @@ public: ...@@ -341,12 +330,18 @@ public:
// float4 getInvPeriodicBoxSize() const { // float4 getInvPeriodicBoxSize() const {
// return invPeriodicBoxSize; // return invPeriodicBoxSize;
// } // }
// /** /**
// * Get the CudaIntegrationUtilities for this context. * Get the CudaIntegrationUtilities for this context.
// */ */
// CudaIntegrationUtilities& getIntegrationUtilities() { CudaIntegrationUtilities& getIntegrationUtilities() {
// return *integration; return *integration;
// } }
/**
* Get the CudaExpressionUtilities for this context.
*/
CudaExpressionUtilities& getExpressionUtilities() {
return *expression;
}
// /** // /**
// * Get the CudaBondedUtilities for this context. // * Get the CudaBondedUtilities for this context.
// */ // */
...@@ -445,8 +440,6 @@ private: ...@@ -445,8 +440,6 @@ private:
CUfunction clearFourBuffersKernel; CUfunction clearFourBuffersKernel;
CUfunction clearFiveBuffersKernel; CUfunction clearFiveBuffersKernel;
CUfunction clearSixBuffersKernel; CUfunction clearSixBuffersKernel;
CUfunction reduceFloat4Kernel;
CUfunction reduceForcesKernel;
std::vector<CudaForceInfo*> forces; std::vector<CudaForceInfo*> forces;
std::vector<Molecule> molecules; std::vector<Molecule> molecules;
std::vector<MoleculeGroup> moleculeGroups; std::vector<MoleculeGroup> moleculeGroups;
...@@ -461,7 +454,8 @@ private: ...@@ -461,7 +454,8 @@ private:
std::vector<CUdeviceptr> autoclearBuffers; std::vector<CUdeviceptr> autoclearBuffers;
std::vector<int> autoclearBufferSizes; std::vector<int> autoclearBufferSizes;
std::vector<ReorderListener*> reorderListeners; std::vector<ReorderListener*> reorderListeners;
// CudaIntegrationUtilities* integration; CudaIntegrationUtilities* integration;
CudaExpressionUtilities* expression;
// CudaBondedUtilities* bonded; // CudaBondedUtilities* bonded;
// CudaNonbondedUtilities* nonbonded; // CudaNonbondedUtilities* nonbonded;
WorkThread* thread; WorkThread* thread;
......
...@@ -33,19 +33,6 @@ using namespace OpenMM; ...@@ -33,19 +33,6 @@ using namespace OpenMM;
using namespace Lepton; using namespace Lepton;
using namespace std; using namespace std;
string CudaExpressionUtilities::doubleToString(double value) {
stringstream s;
s.precision(8);
s << scientific << value << "f";
return s.str();
}
string CudaExpressionUtilities::intToString(int value) {
stringstream s;
s << value;
return s.str();
}
string CudaExpressionUtilities::createExpressions(const map<string, ParsedExpression>& expressions, const map<string, string>& variables, string CudaExpressionUtilities::createExpressions(const map<string, ParsedExpression>& expressions, const map<string, string>& variables,
const vector<pair<string, string> >& functions, const string& prefix, const string& functionParams, const string& tempType) { const vector<pair<string, string> >& functions, const string& prefix, const string& functionParams, const string& tempType) {
vector<pair<ExpressionTreeNode, string> > variableNodes; vector<pair<ExpressionTreeNode, string> > variableNodes;
...@@ -75,13 +62,13 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express ...@@ -75,13 +62,13 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
return; return;
for (int i = 0; i < (int) node.getChildren().size(); i++) for (int i = 0; i < (int) node.getChildren().size(); i++)
processExpression(out, node.getChildren()[i], temps, functions, prefix, functionParams, allExpressions, tempType); processExpression(out, node.getChildren()[i], temps, functions, prefix, functionParams, allExpressions, tempType);
string name = prefix+intToString(temps.size()); string name = prefix+context.intToString(temps.size());
bool hasRecordedNode = false; bool hasRecordedNode = false;
out << tempType << " " << name << " = "; out << tempType << " " << name << " = ";
switch (node.getOperation().getId()) { switch (node.getOperation().getId()) {
case Operation::CONSTANT: case Operation::CONSTANT:
out << doubleToString(dynamic_cast<const Operation::Constant*>(&node.getOperation())->getValue()); out << context.doubleToString(dynamic_cast<const Operation::Constant*>(&node.getOperation())->getValue());
break; break;
case Operation::VARIABLE: case Operation::VARIABLE:
throw OpenMMException("Unknown variable in expression: "+node.getOperation().getName()); throw OpenMMException("Unknown variable in expression: "+node.getOperation().getName());
...@@ -107,7 +94,7 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express ...@@ -107,7 +94,7 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
string valueName = name; string valueName = name;
string derivName = name; string derivName = name;
if (valueNode != NULL && derivNode != NULL) { if (valueNode != NULL && derivNode != NULL) {
string name2 = prefix+intToString(temps.size()); string name2 = prefix+context.intToString(temps.size());
out << tempType << " " << name2 << " = 0.0f;\n"; out << tempType << " " << name2 << " = 0.0f;\n";
if (isDeriv) { if (isDeriv) {
valueName = name2; valueName = name2;
...@@ -120,14 +107,14 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express ...@@ -120,14 +107,14 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
} }
out << "{\n"; out << "{\n";
out << "float4 params = " << functionParams << "[" << i << "];\n"; out << "float4 params = " << functionParams << "[" << i << "];\n";
out << "float x = " << getTempName(node.getChildren()[0], temps) << ";\n"; out << "real x = " << getTempName(node.getChildren()[0], temps) << ";\n";
out << "if (x >= params.x && x <= params.y) {\n"; out << "if (x >= params.x && x <= params.y) {\n";
out << "x = (x-params.x)*params.z;\n"; out << "x = (x-params.x)*params.z;\n";
out << "int index = (int) (floor(x));\n"; out << "int index = (int) (floor(x));\n";
out << "index = min(index, (int) params.w);\n"; out << "index = min(index, (int) params.w);\n";
out << "float4 coeff = " << functions[i].second << "[index];\n"; out << "float4 coeff = " << functions[i].second << "[index];\n";
out << "float b = x-index;\n"; out << "real b = x-index;\n";
out << "float a = 1.0f-b;\n"; out << "real a = 1.0f-b;\n";
if (valueNode != NULL) if (valueNode != NULL)
out << valueName << " = a*coeff.x+b*coeff.y+((a*a*a-a)*coeff.z+(b*b*b-b)*coeff.w)/(params.z*params.z);\n"; out << valueName << " = a*coeff.x+b*coeff.y+((a*a*a-a)*coeff.z+(b*b*b-b)*coeff.w)/(params.z*params.z);\n";
if (derivNode != NULL) if (derivNode != NULL)
...@@ -164,7 +151,7 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express ...@@ -164,7 +151,7 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
out << "-" << getTempName(node.getChildren()[0], temps); out << "-" << getTempName(node.getChildren()[0], temps);
break; break;
case Operation::SQRT: case Operation::SQRT:
out << "sqrt(" << getTempName(node.getChildren()[0], temps) << ")"; out << "SQRT(" << getTempName(node.getChildren()[0], temps) << ")";
break; break;
case Operation::EXP: case Operation::EXP:
out << "EXP(" << getTempName(node.getChildren()[0], temps) << ")"; out << "EXP(" << getTempName(node.getChildren()[0], temps) << ")";
...@@ -173,31 +160,31 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express ...@@ -173,31 +160,31 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
out << "LOG(" << getTempName(node.getChildren()[0], temps) << ")"; out << "LOG(" << getTempName(node.getChildren()[0], temps) << ")";
break; break;
case Operation::SIN: case Operation::SIN:
out << "sin(" << getTempName(node.getChildren()[0], temps) << ")"; out << "SIN(" << getTempName(node.getChildren()[0], temps) << ")";
break; break;
case Operation::COS: case Operation::COS:
out << "cos(" << getTempName(node.getChildren()[0], temps) << ")"; out << "COS(" << getTempName(node.getChildren()[0], temps) << ")";
break; break;
case Operation::SEC: case Operation::SEC:
out << "1.0f/cos(" << getTempName(node.getChildren()[0], temps) << ")"; out << "RECIP(COS(" << getTempName(node.getChildren()[0], temps) << "))";
break; break;
case Operation::CSC: case Operation::CSC:
out << "1.0f/sin(" << getTempName(node.getChildren()[0], temps) << ")"; out << "RECIP(SIN(" << getTempName(node.getChildren()[0], temps) << "))";
break; break;
case Operation::TAN: case Operation::TAN:
out << "tan(" << getTempName(node.getChildren()[0], temps) << ")"; out << "TAN(" << getTempName(node.getChildren()[0], temps) << ")";
break; break;
case Operation::COT: case Operation::COT:
out << "1.0f/tan(" << getTempName(node.getChildren()[0], temps) << ")"; out << "RECIP(TAN(" << getTempName(node.getChildren()[0], temps) << "))";
break; break;
case Operation::ASIN: case Operation::ASIN:
out << "asin(" << getTempName(node.getChildren()[0], temps) << ")"; out << "ASIN(" << getTempName(node.getChildren()[0], temps) << ")";
break; break;
case Operation::ACOS: case Operation::ACOS:
out << "acos(" << getTempName(node.getChildren()[0], temps) << ")"; out << "ACSO(" << getTempName(node.getChildren()[0], temps) << ")";
break; break;
case Operation::ATAN: case Operation::ATAN:
out << "atan(" << getTempName(node.getChildren()[0], temps) << ")"; out << "ATAN(" << getTempName(node.getChildren()[0], temps) << ")";
break; break;
case Operation::SINH: case Operation::SINH:
out << "sinh(" << getTempName(node.getChildren()[0], temps) << ")"; out << "sinh(" << getTempName(node.getChildren()[0], temps) << ")";
...@@ -236,10 +223,10 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express ...@@ -236,10 +223,10 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
out << "RECIP(" << getTempName(node.getChildren()[0], temps) << ")"; out << "RECIP(" << getTempName(node.getChildren()[0], temps) << ")";
break; break;
case Operation::ADD_CONSTANT: case Operation::ADD_CONSTANT:
out << doubleToString(dynamic_cast<const Operation::AddConstant*>(&node.getOperation())->getValue()) << "+" << getTempName(node.getChildren()[0], temps); out << context.doubleToString(dynamic_cast<const Operation::AddConstant*>(&node.getOperation())->getValue()) << "+" << getTempName(node.getChildren()[0], temps);
break; break;
case Operation::MULTIPLY_CONSTANT: case Operation::MULTIPLY_CONSTANT:
out << doubleToString(dynamic_cast<const Operation::MultiplyConstant*>(&node.getOperation())->getValue()) << "*" << getTempName(node.getChildren()[0], temps); out << context.doubleToString(dynamic_cast<const Operation::MultiplyConstant*>(&node.getOperation())->getValue()) << "*" << getTempName(node.getChildren()[0], temps);
break; break;
case Operation::POWER_CONSTANT: case Operation::POWER_CONSTANT:
{ {
...@@ -266,14 +253,14 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express ...@@ -266,14 +253,14 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
for (map<int, const ExpressionTreeNode*>::const_iterator iter = powers.begin(); iter != powers.end(); ++iter) { for (map<int, const ExpressionTreeNode*>::const_iterator iter = powers.begin(); iter != powers.end(); ++iter) {
if (iter->first != exponent) { if (iter->first != exponent) {
exponents.push_back(iter->first >= 0 ? iter->first : -iter->first); exponents.push_back(iter->first >= 0 ? iter->first : -iter->first);
string name2 = prefix+intToString(temps.size()); string name2 = prefix+context.intToString(temps.size());
names.push_back(name2); names.push_back(name2);
temps.push_back(make_pair(*iter->second, name2)); temps.push_back(make_pair(*iter->second, name2));
out << tempType << " " << name2 << " = 0.0f;\n"; out << tempType << " " << name2 << " = 0.0f;\n";
} }
} }
out << "{\n"; out << "{\n";
out << "float multiplier = " << (exponent < 0.0 ? "1.0f/" : "") << getTempName(node.getChildren()[0], temps) << ";\n"; out << "real multiplier = " << (exponent < 0.0 ? "RECIP(" : "(") << getTempName(node.getChildren()[0], temps) << ");\n";
bool done = false; bool done = false;
while (!done) { while (!done) {
done = true; done = true;
...@@ -295,7 +282,7 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express ...@@ -295,7 +282,7 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
out << "}"; out << "}";
} }
else else
out << "pow(" << getTempName(node.getChildren()[0], temps) << ", " << doubleToString(exponent) << ")"; out << "pow(" << getTempName(node.getChildren()[0], temps) << ", " << context.doubleToString(exponent) << ")";
break; break;
} }
case Operation::MIN: case Operation::MIN:
......
...@@ -45,6 +45,8 @@ namespace OpenMM { ...@@ -45,6 +45,8 @@ namespace OpenMM {
class OPENMM_EXPORT CudaExpressionUtilities { class OPENMM_EXPORT CudaExpressionUtilities {
public: public:
CudaExpressionUtilities(CudaContext& context) : context(context) {
}
/** /**
* Generate the source code for calculating a set of expressions. * Generate the source code for calculating a set of expressions.
* *
...@@ -54,10 +56,10 @@ public: ...@@ -54,10 +56,10 @@ public:
* @param functions defines the variable name for each tabulated function that may appear in the expressions * @param functions defines the variable name for each tabulated function that may appear in the expressions
* @param prefix a prefix to put in front of temporary variables * @param prefix a prefix to put in front of temporary variables
* @param functionParams the variable name containing the parameters for each tabulated function * @param functionParams the variable name containing the parameters for each tabulated function
* @param tempType the type of value to use for temporary variables (defaults to "float") * @param tempType the type of value to use for temporary variables (defaults to "real")
*/ */
static std::string createExpressions(const std::map<std::string, Lepton::ParsedExpression>& expressions, const std::map<std::string, std::string>& variables, std::string createExpressions(const std::map<std::string, Lepton::ParsedExpression>& expressions, const std::map<std::string, std::string>& variables,
const std::vector<std::pair<std::string, std::string> >& functions, const std::string& prefix, const std::string& functionParams, const std::string& tempType="float"); const std::vector<std::pair<std::string, std::string> >& functions, const std::string& prefix, const std::string& functionParams, const std::string& tempType="real");
/** /**
* Generate the source code for calculating a set of expressions. * Generate the source code for calculating a set of expressions.
* *
...@@ -67,10 +69,10 @@ public: ...@@ -67,10 +69,10 @@ public:
* @param functions defines the variable name for each tabulated function that may appear in the expressions * @param functions defines the variable name for each tabulated function that may appear in the expressions
* @param prefix a prefix to put in front of temporary variables * @param prefix a prefix to put in front of temporary variables
* @param functionParams the variable name containing the parameters for each tabulated function * @param functionParams the variable name containing the parameters for each tabulated function
* @param tempType the type of value to use for temporary variables (defaults to "float") * @param tempType the type of value to use for temporary variables (defaults to "real")
*/ */
static std::string createExpressions(const std::map<std::string, Lepton::ParsedExpression>& expressions, const std::vector<std::pair<Lepton::ExpressionTreeNode, std::string> >& variables, std::string createExpressions(const std::map<std::string, Lepton::ParsedExpression>& expressions, const std::vector<std::pair<Lepton::ExpressionTreeNode, std::string> >& variables,
const std::vector<std::pair<std::string, std::string> >& functions, const std::string& prefix, const std::string& functionParams, const std::string& tempType="float"); const std::vector<std::pair<std::string, std::string> >& functions, const std::string& prefix, const std::string& functionParams, const std::string& tempType="real");
/** /**
* Calculate the spline coefficients for a tabulated function that appears in expressions. * Calculate the spline coefficients for a tabulated function that appears in expressions.
* *
...@@ -79,26 +81,19 @@ public: ...@@ -79,26 +81,19 @@ public:
* @param max the value of the independent variable corresponding to the last element of values * @param max the value of the independent variable corresponding to the last element of values
* @return the spline coefficients * @return the spline coefficients
*/ */
static std::vector<float4> computeFunctionCoefficients(const std::vector<double>& values, double min, double max); std::vector<float4> computeFunctionCoefficients(const std::vector<double>& values, double min, double max);
/**
* Convert a number to a string in a format suitable for including in a kernel.
*/
static std::string doubleToString(double value);
/**
* Convert a number to a string in a format suitable for including in a kernel.
*/
static std::string intToString(int value);
class FunctionPlaceholder; class FunctionPlaceholder;
private: private:
static void processExpression(std::stringstream& out, const Lepton::ExpressionTreeNode& node, void processExpression(std::stringstream& out, const Lepton::ExpressionTreeNode& node,
std::vector<std::pair<Lepton::ExpressionTreeNode, std::string> >& temps, std::vector<std::pair<Lepton::ExpressionTreeNode, std::string> >& temps,
const std::vector<std::pair<std::string, std::string> >& functions, const std::string& prefix, const std::string& functionParams, const std::vector<std::pair<std::string, std::string> >& functions, const std::string& prefix, const std::string& functionParams,
const std::vector<Lepton::ParsedExpression>& allExpressions, const std::string& tempType); const std::vector<Lepton::ParsedExpression>& allExpressions, const std::string& tempType);
static std::string getTempName(const Lepton::ExpressionTreeNode& node, const std::vector<std::pair<Lepton::ExpressionTreeNode, std::string> >& temps); std::string getTempName(const Lepton::ExpressionTreeNode& node, const std::vector<std::pair<Lepton::ExpressionTreeNode, std::string> >& temps);
static void findRelatedTabulatedFunctions(const Lepton::ExpressionTreeNode& node, const Lepton::ExpressionTreeNode& searchNode, void findRelatedTabulatedFunctions(const Lepton::ExpressionTreeNode& node, const Lepton::ExpressionTreeNode& searchNode,
const Lepton::ExpressionTreeNode*& valueNode, const Lepton::ExpressionTreeNode*& derivNode); const Lepton::ExpressionTreeNode*& valueNode, const Lepton::ExpressionTreeNode*& derivNode);
static void findRelatedPowers(const Lepton::ExpressionTreeNode& node, const Lepton::ExpressionTreeNode& searchNode, void findRelatedPowers(const Lepton::ExpressionTreeNode& node, const Lepton::ExpressionTreeNode& searchNode,
std::map<int, const Lepton::ExpressionTreeNode*>& powers); std::map<int, const Lepton::ExpressionTreeNode*>& powers);
CudaContext& context;
}; };
/** /**
......
This diff is collapsed.
#ifndef OPENMM_CUDAINTEGRATIONUTILITIES_H_
#define OPENMM_CUDAINTEGRATIONUTILITIES_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "openmm/System.h"
#include "CudaContext.h"
#include "openmm/internal/windowsExport.h"
#include <iosfwd>
namespace OpenMM {
/**
* This class implements features that are used by many different integrators, including
* common workspace arrays, random number generation, and enforcing constraints.
*/
class OPENMM_EXPORT CudaIntegrationUtilities {
public:
CudaIntegrationUtilities(CudaContext& context, const System& system);
~CudaIntegrationUtilities();
/**
* Get the array which contains position deltas.
*/
CudaArray& getPosDelta() {
return *posDelta;
}
/**
* Get the array which contains random values. Each element is a float4, whose components
* are independent, normally distributed random numbers with mean 0 and variance 1.
*/
CudaArray& getRandom() {
return *random;
}
/**
* Get the array which contains the current step size.
*/
CudaArray& getStepSize() {
return *stepSize;
}
/**
* Apply constraints to the atom positions.
*
* @param tol the constraint tolerance
*/
void applyConstraints(double tol);
/**
* Apply constraints to the atom velocities.
*
* @param tol the constraint tolerance
*/
void applyVelocityConstraints(double tol);
/**
* Initialize the random number generator.
*/
void initRandomNumberGenerator(unsigned int randomNumberSeed);
/**
* Ensure that sufficient random numbers are available in the array, and generate new ones if not.
*
* @param numValues the number of random float4's that will be required
* @return the index in the array at which to start reading
*/
int prepareRandomNumbers(int numValues);
/**
* Compute the positions of virtual sites.
*/
void computeVirtualSites();
/**
* Distribute forces from virtual sites to the atoms they are based on.
*/
void distributeForcesFromVirtualSites();
/**
* Create a checkpoint recording the current state of the random number generator.
*
* @param stream an output stream the checkpoint data should be written to
*/
void createCheckpoint(std::ostream& stream);
/**
* Load a checkpoint that was written by createCheckpoint().
*
* @param stream an input stream the checkpoint data should be read from
*/
void loadCheckpoint(std::istream& stream);
private:
void applyConstraints(bool constrainVelocities, double tol);
CudaContext& context;
CUfunction settlePosKernel, settleVelKernel;
CUfunction shakePosKernel, shakeVelKernel;
CUfunction ccmaDirectionsKernel;
CUfunction ccmaPosForceKernel, ccmaVelForceKernel;
CUfunction ccmaMultiplyKernel;
CUfunction ccmaPosUpdateKernel, ccmaVelUpdateKernel;
CUfunction vsitePositionKernel, vsiteForceKernel;
CUfunction randomKernel;
CudaArray* posDelta;
CudaArray* settleAtoms;
CudaArray* settleParams;
CudaArray* shakeAtoms;
CudaArray* shakeParams;
CudaArray* random;
CudaArray* randomSeed;
CudaArray* stepSize;
CudaArray* ccmaAtoms;
CudaArray* ccmaDistance;
CudaArray* ccmaReducedMass;
CudaArray* ccmaAtomConstraints;
CudaArray* ccmaNumAtomConstraints;
CudaArray* ccmaConstraintMatrixColumn;
CudaArray* ccmaConstraintMatrixValue;
CudaArray* ccmaDelta1;
CudaArray* ccmaDelta2;
CudaArray* ccmaConverged;
int* ccmaConvergedMemory;
CudaArray* vsite2AvgAtoms;
CudaArray* vsite2AvgWeights;
CudaArray* vsite3AvgAtoms;
CudaArray* vsite3AvgWeights;
CudaArray* vsiteOutOfPlaneAtoms;
CudaArray* vsiteOutOfPlaneWeights;
int randomPos;
int lastSeed, numVsites;
bool hasInitializedPosConstraintKernels, hasInitializedVelConstraintKernels;
struct ShakeCluster;
struct ConstraintOrderer;
};
} // namespace OpenMM
#endif /*OPENMM_CUDAINTEGRATIONUTILITIES_H_*/
/**
* Generate random numbers
*/
extern "C" __global__ void generateRandomNumbers(int numValues, float4* __restrict__ random, uint4* __restrict__ seed) {
int index = blockIdx.x*blockDim.x+threadIdx.x;
uint4 state = seed[index];
unsigned int carry = 0;
while (index < numValues) {
float4 value;
// Generate first value.
state.x = state.x * 69069 + 1;
state.y ^= state.y << 13;
state.y ^= state.y >> 17;
state.y ^= state.y << 5;
unsigned int k = (state.z >> 2) + (state.w >> 3) + (carry >> 2);
unsigned int m = state.w + state.w + state.z + carry;
state.z = state.w;
state.w = m;
carry = k >> 30;
float x1 = (float)max(state.x + state.y + state.w, 0x00000001u) / (float)0xffffffff;
state.x = state.x * 69069 + 1;
state.y ^= state.y << 13;
state.y ^= state.y >> 17;
state.y ^= state.y << 5;
x1 = sqrt(-2.0f * log(x1));
k = (state.z >> 2) + (state.w >> 3) + (carry >> 2);
m = state.w + state.w + state.z + carry;
state.z = state.w;
state.w = m;
carry = k >> 30;
float x2 = (float)(state.x + state.y + state.w) / (float)0xffffffff;
value.x = x1 * cos(2.0f * 3.14159265f * x2);
// Generate second value.
state.x = state.x * 69069 + 1;
state.y ^= state.y << 13;
state.y ^= state.y >> 17;
state.y ^= state.y << 5;
k = (state.z >> 2) + (state.w >> 3) + (carry >> 2);
m = state.w + state.w + state.z + carry;
state.z = state.w;
state.w = m;
carry = k >> 30;
float x3 = (float)max(state.x + state.y + state.w, 0x00000001u) / (float)0xffffffff;
state.x = state.x * 69069 + 1;
state.y ^= state.y << 13;
state.y ^= state.y >> 17;
state.y ^= state.y << 5;
x3 = sqrt(-2.0f * log(x3));
k = (state.z >> 2) + (state.w >> 3) + (carry >> 2);
m = state.w + state.w + state.z + carry;
state.z = state.w;
state.w = m;
carry = k >> 30;
float x4 = (float)(state.x + state.y + state.w) / (float)0xffffffff;
value.y = x3 * cos(2.0f * 3.14159265f * x4);
// Generate third value.
state.x = state.x * 69069 + 1;
state.y ^= state.y << 13;
state.y ^= state.y >> 17;
state.y ^= state.y << 5;
k = (state.z >> 2) + (state.w >> 3) + (carry >> 2);
m = state.w + state.w + state.z + carry;
state.z = state.w;
state.w = m;
carry = k >> 30;
float x5 = (float)max(state.x + state.y + state.w, 0x00000001u) / (float)0xffffffff;
state.x = state.x * 69069 + 1;
state.y ^= state.y << 13;
state.y ^= state.y >> 17;
state.y ^= state.y << 5;
x5 = sqrt(-2.0f * log(x5));
k = (state.z >> 2) + (state.w >> 3) + (carry >> 2);
m = state.w + state.w + state.z + carry;
state.z = state.w;
state.w = m;
carry = k >> 30;
float x6 = (float)(state.x + state.y + state.w) / (float)0xffffffff;
value.z = x5 * cos(2.0f * 3.14159265f * x6);
// Generate fourth value.
state.x = state.x * 69069 + 1;
state.y ^= state.y << 13;
state.y ^= state.y >> 17;
state.y ^= state.y << 5;
k = (state.z >> 2) + (state.w >> 3) + (carry >> 2);
m = state.w + state.w + state.z + carry;
state.z = state.w;
state.w = m;
carry = k >> 30;
float x7 = (float)max(state.x + state.y + state.w, 0x00000001u) / (float)0xffffffff;
state.x = state.x * 69069 + 1;
state.y ^= state.y << 13;
state.y ^= state.y >> 17;
state.y ^= state.y << 5;
x7 = sqrt(-2.0f * log(x7));
k = (state.z >> 2) + (state.w >> 3) + (carry >> 2);
m = state.w + state.w + state.z + carry;
state.z = state.w;
state.w = m;
carry = k >> 30;
float x8 = (float)(state.x + state.y + state.w) / (float)0xffffffff;
value.w = x7 * cos(2.0f * 3.14159265f * x8);
// Record the values.
random[index] = value;
index += blockDim.x*gridDim.x;
}
seed[blockIdx.x*blockDim.x+threadIdx.x] = state;
}
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This tests the CUDA implementation of random number generation.
*/
#include "openmm/internal/AssertionUtilities.h"
#include "../src/CudaArray.h"
#include "../src/CudaContext.h"
#include "../src/CudaIntegrationUtilities.h"
#include "openmm/System.h"
#include <iostream>
using namespace OpenMM;
using namespace std;
void testGaussian() {
int numAtoms = 5000;
System system;
for (int i = 0; i < numAtoms; i++)
system.addParticle(1.0);
CudaPlatform platform;
CudaPlatform::PlatformData platformData(system, "", "true", "single",
platform.getPropertyDefaultValue(CudaPlatform::CudaCompiler()), platform.getPropertyDefaultValue(CudaPlatform::CudaTempDirectory()));
CudaContext& context = *platformData.contexts[0];
context.initialize();
context.getIntegrationUtilities().initRandomNumberGenerator(0);
CudaArray& random = context.getIntegrationUtilities().getRandom();
context.getIntegrationUtilities().prepareRandomNumbers(random.getSize());
const int numValues = random.getSize()*4;
vector<float4> values(numValues);
random.download(values);
float* data = reinterpret_cast<float*>(&values[0]);
double mean = 0.0;
double var = 0.0;
double skew = 0.0;
double kurtosis = 0.0;
for (int i = 0; i < numValues; i++) {
double value = data[i];
mean += value;
var += value*value;
skew += value*value*value;
kurtosis += value*value*value*value;
}
mean /= numValues;
var /= numValues;
skew /= numValues;
kurtosis /= numValues;
double c2 = var-mean*mean;
double c3 = skew-3*var*mean+2*mean*mean*mean;
double c4 = kurtosis-4*skew*mean-3*var*var+12*var*mean*mean-6*mean*mean*mean*mean;
ASSERT_EQUAL_TOL(0.0, mean, 3.0/sqrt((double)numValues));
ASSERT_EQUAL_TOL(1.0, c2, 3.0/pow(numValues, 1.0/3.0));
ASSERT_EQUAL_TOL(0.0, c3, 3.0/pow(numValues, 1.0/4.0));
ASSERT_EQUAL_TOL(0.0, c4, 3.0/pow(numValues, 1.0/4.0));
}
int main() {
try {
testGaussian();
}
catch(const exception& e) {
cout << "exception: " << e.what() << endl;
return 1;
}
cout << "Done" << endl;
return 0;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment